Skip to content

Commit 541c681

Browse files
jmchiltonclaude
andcommitted
Add pydantic schema validation lint step, fix runtime_inputs leak in converter
lint.py: lint_pydantic_validation() tries strict model (extra=forbid) first, falls back to lax (extra=allow). Strict-only failures are warnings, lax failures are errors. Called by gxwf-lint CLI. converter.py: runtime_inputs is a Format2 concept - change get() to pop() so it doesn't leak into native output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6ee1366 commit 541c681

File tree

4 files changed

+148
-5
lines changed

4 files changed

+148
-5
lines changed

gxformat2/converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def transform_tool(context, step):
512512
connect = pop_connect_from_step_dict(step)
513513

514514
# TODO: handle runtime inputs and state together.
515-
runtime_inputs = step.get("runtime_inputs", [])
515+
runtime_inputs = step.pop("runtime_inputs", [])
516516
if "state" in step or runtime_inputs:
517517
encode = context.import_options.encode_tool_state_json
518518
encoder = context.import_options.native_state_encoder

gxformat2/lint.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,43 @@ def _lint_training(lint_context, workflow_dict):
179179
lint_context.warn("Empty workflow documentation (annotation or doc element)")
180180

181181

182+
def lint_pydantic_validation(lint_context, workflow_dict, format2=False):
183+
"""Validate workflow dict against pydantic schema models.
184+
185+
Tries strict model (extra=forbid) first. If strict fails, falls back to
186+
the lax model (extra=allow) to distinguish fundamental type errors from
187+
merely having extra/unknown fields.
188+
"""
189+
from pydantic import ValidationError
190+
191+
if format2:
192+
from gxformat2.schema.gxformat2_strict import GalaxyWorkflow as StrictModel
193+
from gxformat2.schema.gxformat2 import GalaxyWorkflow as LaxModel
194+
else:
195+
from gxformat2.schema.native_strict import NativeGalaxyWorkflow as StrictModel
196+
from gxformat2.schema.native import NativeGalaxyWorkflow as LaxModel
197+
198+
strict_errors = None
199+
try:
200+
StrictModel.model_validate(workflow_dict)
201+
return # strict passes — nothing to report
202+
except ValidationError as e:
203+
strict_errors = e.errors()
204+
205+
# Strict failed — try lax to see if the core schema is valid
206+
try:
207+
LaxModel.model_validate(workflow_dict)
208+
# Lax passes: only extra/unknown fields caused strict failure
209+
for error in strict_errors:
210+
loc = " -> ".join(str(p) for p in error["loc"])
211+
lint_context.warn(f"Schema validation (strict): {error['msg']} at {loc}")
212+
except ValidationError as e:
213+
# Lax also fails: fundamental schema errors
214+
for error in e.errors():
215+
loc = " -> ".join(str(p) for p in error["loc"])
216+
lint_context.error(f"Schema validation: {error['msg']} at {loc}")
217+
218+
182219
SKIP_DISCONNECTED_CHECK_TYPES = {"data_input", "data_collection_input", "parameter_input", "pause"}
183220

184221

@@ -339,6 +376,7 @@ def main(argv=None):
339376
lint_func = lint_format2 if is_format2 else lint_ga
340377
lint_context = LintContext(training_topic=args.training_topic)
341378
lint_func(lint_context, workflow_dict, path=path)
379+
lint_pydantic_validation(lint_context, workflow_dict, format2=is_format2)
342380
if not args.skip_best_practices:
343381
best_practices_func = lint_best_practices_format2 if is_format2 else lint_best_practices_ga
344382
best_practices_func(lint_context, workflow_dict)
@@ -370,4 +408,11 @@ def _parser():
370408
sys.exit(main())
371409

372410

373-
__all__ = ("main", "lint_format2", "lint_ga", "lint_best_practices_format2", "lint_best_practices_ga")
411+
__all__ = (
412+
"main",
413+
"lint_format2",
414+
"lint_ga",
415+
"lint_best_practices_format2",
416+
"lint_best_practices_ga",
417+
"lint_pydantic_validation",
418+
)

tests/test_lint.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,14 @@ def test_lint_ga_unicycler_training():
265265
# no tags, fails linting
266266
assert main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler.ga")]) == 1
267267
# correct tag passes linting
268-
assert main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 0
268+
assert (
269+
main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")])
270+
== 0
271+
)
269272
# incorrect tag, fails linting
270-
assert main(["lint", SKIP_BP, "--training-topic", "mapping", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 1
273+
assert (
274+
main(["lint", SKIP_BP, "--training-topic", "mapping", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 1
275+
)
271276

272277

273278
def test_lint_ga_unicycler_missing_tools():

tests/test_lint_best_practices.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
"""Tests for best-practice linting checks."""
1+
"""Tests for best-practice linting and pydantic schema validation checks."""
22

33
from gxformat2.lint import (
44
lint_best_practices_format2,
55
lint_best_practices_ga,
6+
lint_pydantic_validation,
67
)
78
from gxformat2.linting import LintContext
89

@@ -466,3 +467,95 @@ def test_untyped_in_out(self):
466467
},
467468
)
468469
assert any("untyped parameter in the post-job actions" in m for m in ctx.warn_messages)
470+
471+
472+
# --- Pydantic schema validation ---
473+
474+
475+
class TestPydanticValidationNative:
476+
def test_valid_native(self):
477+
ctx = _lint_ctx()
478+
lint_pydantic_validation(
479+
ctx,
480+
{
481+
"a_galaxy_workflow": "true",
482+
"format-version": "0.1",
483+
"steps": {},
484+
},
485+
format2=False,
486+
)
487+
assert not ctx.error_messages
488+
assert not ctx.warn_messages
489+
490+
def test_missing_required_field(self):
491+
ctx = _lint_ctx()
492+
lint_pydantic_validation(ctx, {"steps": {}}, format2=False)
493+
assert any("Schema validation:" in m for m in ctx.error_messages)
494+
495+
def test_extra_field_strict_only(self):
496+
ctx = _lint_ctx()
497+
lint_pydantic_validation(
498+
ctx,
499+
{
500+
"a_galaxy_workflow": "true",
501+
"format-version": "0.1",
502+
"steps": {
503+
"0": {
504+
"id": 0,
505+
"type": "tool",
506+
"some_unknown_field": "value",
507+
}
508+
},
509+
},
510+
format2=False,
511+
)
512+
# Strict-only failure -> warnings, not errors
513+
assert not ctx.error_messages
514+
assert any("strict" in m for m in ctx.warn_messages)
515+
516+
def test_wrong_type(self):
517+
ctx = _lint_ctx()
518+
lint_pydantic_validation(
519+
ctx,
520+
{
521+
"a_galaxy_workflow": "true",
522+
"format-version": "0.1",
523+
"steps": "not a dict",
524+
},
525+
format2=False,
526+
)
527+
assert any("Schema validation:" in m for m in ctx.error_messages)
528+
529+
530+
class TestPydanticValidationFormat2:
531+
VALID_FORMAT2 = {
532+
"class": "GalaxyWorkflow",
533+
"inputs": {},
534+
"outputs": {},
535+
"steps": {},
536+
}
537+
538+
def test_valid_format2(self):
539+
ctx = _lint_ctx()
540+
lint_pydantic_validation(ctx, self.VALID_FORMAT2, format2=True)
541+
assert not ctx.error_messages
542+
assert not ctx.warn_messages
543+
544+
def test_missing_steps(self):
545+
ctx = _lint_ctx()
546+
lint_pydantic_validation(
547+
ctx,
548+
{"class": "GalaxyWorkflow", "inputs": {}, "outputs": {}},
549+
format2=True,
550+
)
551+
assert any("Schema validation:" in m for m in ctx.error_messages)
552+
553+
def test_extra_field_strict_only(self):
554+
ctx = _lint_ctx()
555+
lint_pydantic_validation(
556+
ctx,
557+
{**self.VALID_FORMAT2, "some_unknown_field": "value"},
558+
format2=True,
559+
)
560+
assert not ctx.error_messages
561+
assert any("strict" in m for m in ctx.warn_messages)

0 commit comments

Comments
 (0)