Add pydantic schema validation lint step, fix runtime_inputs leak in converter

jmchilton · claude · jmchilton · commit 541c6815c433 · 2026-03-22T23:53:49.000-04:00
lint.py: lint_pydantic_validation() tries strict model (extra=forbid) first,
falls back to lax (extra=allow). Strict-only failures are warnings, lax
failures are errors. Called by gxwf-lint CLI.

converter.py: runtime_inputs is a Format2 concept - change get() to pop() so
it doesn't leak into native output.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/gxformat2/converter.py b/gxformat2/converter.py
@@ -512,7 +512,7 @@ def transform_tool(context, step):
     connect = pop_connect_from_step_dict(step)
 
     # TODO: handle runtime inputs and state together.
-    runtime_inputs = step.get("runtime_inputs", [])
+    runtime_inputs = step.pop("runtime_inputs", [])
     if "state" in step or runtime_inputs:
         encode = context.import_options.encode_tool_state_json
         encoder = context.import_options.native_state_encoder
diff --git a/gxformat2/lint.py b/gxformat2/lint.py
@@ -179,6 +179,43 @@ def _lint_training(lint_context, workflow_dict):
         lint_context.warn("Empty workflow documentation (annotation or doc element)")
 
 
+def lint_pydantic_validation(lint_context, workflow_dict, format2=False):
+    """Validate workflow dict against pydantic schema models.
+
+    Tries strict model (extra=forbid) first. If strict fails, falls back to
+    the lax model (extra=allow) to distinguish fundamental type errors from
+    merely having extra/unknown fields.
+    """
+    from pydantic import ValidationError
+
+    if format2:
+        from gxformat2.schema.gxformat2_strict import GalaxyWorkflow as StrictModel
+        from gxformat2.schema.gxformat2 import GalaxyWorkflow as LaxModel
+    else:
+        from gxformat2.schema.native_strict import NativeGalaxyWorkflow as StrictModel
+        from gxformat2.schema.native import NativeGalaxyWorkflow as LaxModel
+
+    strict_errors = None
+    try:
+        StrictModel.model_validate(workflow_dict)
+        return  # strict passes — nothing to report
+    except ValidationError as e:
+        strict_errors = e.errors()
+
+    # Strict failed — try lax to see if the core schema is valid
+    try:
+        LaxModel.model_validate(workflow_dict)
+        # Lax passes: only extra/unknown fields caused strict failure
+        for error in strict_errors:
+            loc = " -> ".join(str(p) for p in error["loc"])
+            lint_context.warn(f"Schema validation (strict): {error['msg']} at {loc}")
+    except ValidationError as e:
+        # Lax also fails: fundamental schema errors
+        for error in e.errors():
+            loc = " -> ".join(str(p) for p in error["loc"])
+            lint_context.error(f"Schema validation: {error['msg']} at {loc}")
+
+
 SKIP_DISCONNECTED_CHECK_TYPES = {"data_input", "data_collection_input", "parameter_input", "pause"}
 
 
@@ -339,6 +376,7 @@ def main(argv=None):
     lint_func = lint_format2 if is_format2 else lint_ga
     lint_context = LintContext(training_topic=args.training_topic)
     lint_func(lint_context, workflow_dict, path=path)
+    lint_pydantic_validation(lint_context, workflow_dict, format2=is_format2)
     if not args.skip_best_practices:
         best_practices_func = lint_best_practices_format2 if is_format2 else lint_best_practices_ga
         best_practices_func(lint_context, workflow_dict)
@@ -370,4 +408,11 @@ def _parser():
     sys.exit(main())
 
 
-__all__ = ("main", "lint_format2", "lint_ga", "lint_best_practices_format2", "lint_best_practices_ga")
+__all__ = (
+    "main",
+    "lint_format2",
+    "lint_ga",
+    "lint_best_practices_format2",
+    "lint_best_practices_ga",
+    "lint_pydantic_validation",
+)
diff --git a/tests/test_lint.py b/tests/test_lint.py
@@ -265,9 +265,14 @@ def test_lint_ga_unicycler_training():
     # no tags, fails linting
     assert main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler.ga")]) == 1
     # correct tag passes linting
-    assert main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 0
+    assert (
+        main(["lint", SKIP_BP, "--training-topic", "assembly", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")])
+        == 0
+    )
     # incorrect tag, fails linting
-    assert main(["lint", SKIP_BP, "--training-topic", "mapping", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 1
+    assert (
+        main(["lint", SKIP_BP, "--training-topic", "mapping", os.path.join(TEST_PATH, "unicycler-hacked-tags.ga")]) == 1
+    )
 
 
 def test_lint_ga_unicycler_missing_tools():
diff --git a/tests/test_lint_best_practices.py b/tests/test_lint_best_practices.py
@@ -1,8 +1,9 @@
-"""Tests for best-practice linting checks."""
+"""Tests for best-practice linting and pydantic schema validation checks."""
 
 from gxformat2.lint import (
     lint_best_practices_format2,
     lint_best_practices_ga,
+    lint_pydantic_validation,
 )
 from gxformat2.linting import LintContext
 
@@ -466,3 +467,95 @@ def test_untyped_in_out(self):
             },
         )
         assert any("untyped parameter in the post-job actions" in m for m in ctx.warn_messages)
+
+
+# --- Pydantic schema validation ---
+
+
+class TestPydanticValidationNative:
+    def test_valid_native(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(
+            ctx,
+            {
+                "a_galaxy_workflow": "true",
+                "format-version": "0.1",
+                "steps": {},
+            },
+            format2=False,
+        )
+        assert not ctx.error_messages
+        assert not ctx.warn_messages
+
+    def test_missing_required_field(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(ctx, {"steps": {}}, format2=False)
+        assert any("Schema validation:" in m for m in ctx.error_messages)
+
+    def test_extra_field_strict_only(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(
+            ctx,
+            {
+                "a_galaxy_workflow": "true",
+                "format-version": "0.1",
+                "steps": {
+                    "0": {
+                        "id": 0,
+                        "type": "tool",
+                        "some_unknown_field": "value",
+                    }
+                },
+            },
+            format2=False,
+        )
+        # Strict-only failure -> warnings, not errors
+        assert not ctx.error_messages
+        assert any("strict" in m for m in ctx.warn_messages)
+
+    def test_wrong_type(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(
+            ctx,
+            {
+                "a_galaxy_workflow": "true",
+                "format-version": "0.1",
+                "steps": "not a dict",
+            },
+            format2=False,
+        )
+        assert any("Schema validation:" in m for m in ctx.error_messages)
+
+
+class TestPydanticValidationFormat2:
+    VALID_FORMAT2 = {
+        "class": "GalaxyWorkflow",
+        "inputs": {},
+        "outputs": {},
+        "steps": {},
+    }
+
+    def test_valid_format2(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(ctx, self.VALID_FORMAT2, format2=True)
+        assert not ctx.error_messages
+        assert not ctx.warn_messages
+
+    def test_missing_steps(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(
+            ctx,
+            {"class": "GalaxyWorkflow", "inputs": {}, "outputs": {}},
+            format2=True,
+        )
+        assert any("Schema validation:" in m for m in ctx.error_messages)
+
+    def test_extra_field_strict_only(self):
+        ctx = _lint_ctx()
+        lint_pydantic_validation(
+            ctx,
+            {**self.VALID_FORMAT2, "some_unknown_field": "value"},
+            format2=True,
+        )
+        assert not ctx.error_messages
+        assert any("strict" in m for m in ctx.warn_messages)