galaxyproject
diff --git a/‎gxformat2/abstract.py‎
Lines changed: 9 additions & 8 deletions b/‎gxformat2/abstract.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎gxformat2/cytoscape/_builder.py‎
Lines changed: 11 additions & 8 deletions b/‎gxformat2/cytoscape/_builder.py‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎gxformat2/lint.py‎
Lines changed: 2 additions & 1 deletion b/‎gxformat2/lint.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎gxformat2/normalize.py‎
Lines changed: 7 additions & 3 deletions b/‎gxformat2/normalize.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎gxformat2/normalized/_conversion.py‎
Lines changed: 24 additions & 11 deletions b/‎gxformat2/normalized/_conversion.py‎
Lines changed: 24 additions & 11 deletions
diff --git a/‎gxformat2/normalized/_format2.py‎
Lines changed: 50 additions & 12 deletions b/‎gxformat2/normalized/_format2.py‎
Lines changed: 50 additions & 12 deletions
@@ -7,7 +7,7 @@
 from typing import Any
 
 from gxformat2.normalized import ensure_format2, NormalizedFormat2, NormalizedWorkflowStep
-from gxformat2.schema.gxformat2 import GalaxyType, WorkflowInputParameter, WorkflowOutputParameter, WorkflowStepOutput
+from gxformat2.schema.gxformat2 import BaseInputParameter, GalaxyType, WorkflowOutputParameter, WorkflowStepOutput
 from gxformat2.yaml import ordered_dump_to_path, ordered_load
 
 CWL_VERSION = "v1.2"
@@ -105,7 +105,7 @@ def _step_outputs_to_abstract(step: NormalizedWorkflowStep):
     return [out.id for out in step.out if out.id is not None]
 
 
-def _inputs_to_abstract(inputs: list[WorkflowInputParameter]):
+def _inputs_to_abstract(inputs: list[BaseInputParameter]):
     """Convert Format2 inputs to abstract CWL inputs."""
     abstract_inputs: dict[str, Any] = {}
     for inp in inputs:
@@ -114,8 +114,8 @@ def _inputs_to_abstract(inputs: list[WorkflowInputParameter]):
             continue
         input_def: dict[str, Any] = {}
 
-        # Convert type
-        cwl_type = _galaxy_type_to_cwl(inp.type_)
+        # Convert type (type_ lives on concrete subclasses, not BaseInputParameter)
+        cwl_type = _galaxy_type_to_cwl(getattr(inp, "type_", None))
         if inp.optional:
             cwl_type += "?"
         input_def["type"] = cwl_type
@@ -134,7 +134,7 @@ def _inputs_to_abstract(inputs: list[WorkflowInputParameter]):
     return abstract_inputs
 
 
-def _galaxy_type_to_cwl(galaxy_type: GalaxyType | list[GalaxyType] | None) -> str:
+def _galaxy_type_to_cwl(galaxy_type: GalaxyType | str | list[GalaxyType | str] | None) -> str:
     """Map a Galaxy/Format2 type to a CWL type string."""
     if galaxy_type is None:
         return "File"
@@ -144,12 +144,13 @@ def _galaxy_type_to_cwl(galaxy_type: GalaxyType | list[GalaxyType] | None) -> st
             if t != GalaxyType.null:
                 return _galaxy_type_to_cwl(t) + "[]"
         return "File"
-    if galaxy_type == GalaxyType.data:
+    type_str = galaxy_type.value if isinstance(galaxy_type, GalaxyType) else str(galaxy_type)
+    if type_str in ("data", "File"):
         return "File"
-    if galaxy_type == GalaxyType.collection:
+    if type_str == "collection":
         # TODO: handle nested collections, pairs, etc...
         return "File[]"
-    return galaxy_type.value
+    return type_str
 
 
 def _outputs_to_abstract(outputs: list[WorkflowOutputParameter]):
 
@@ -6,7 +6,7 @@
 from typing import Any
 
 from gxformat2.normalized import ensure_format2, NormalizedFormat2, NormalizedWorkflowStep
-from gxformat2.schema.gxformat2 import GalaxyWorkflow, WorkflowInputParameter
+from gxformat2.schema.gxformat2 import BaseInputParameter, GalaxyType, GalaxyWorkflow
 
 from .models import (
     CytoscapeEdge,
@@ -57,17 +57,20 @@ def _to_position(step_position, order_index: int) -> CytoscapePosition:
     return CytoscapePosition(x=int(step_position.left), y=int(step_position.top))
 
 
-def _input_type_str(inp: WorkflowInputParameter) -> str:
-    if inp.type_ is None:
+def _input_type_str(inp: BaseInputParameter) -> str:
+    # type_ lives on concrete subclasses, not BaseInputParameter
+    type_ = getattr(inp, "type_", None)
+    if type_ is None:
         return "input"
-    if isinstance(inp.type_, list):
-        if inp.type_:
-            return inp.type_[0].value + "[]"
+    if isinstance(type_, list):
+        if type_:
+            t = type_[0]
+            return (t.value if isinstance(t, GalaxyType) else str(t)) + "[]"
         return "input"
-    return inp.type_.value
+    return type_.value if isinstance(type_, GalaxyType) else str(type_)
 
 
-def _input_node(inp: WorkflowInputParameter, order_index: int) -> CytoscapeNode:
+def _input_node(inp: BaseInputParameter, order_index: int) -> CytoscapeNode:
     input_id = inp.id or str(order_index)
     type_str = _input_type_str(inp)
     return CytoscapeNode(
 
@@ -159,7 +159,8 @@ def _validate_input_types(lint_context: LintContext, nf2: NormalizedFormat2):
     for inp in nf2.inputs:
         if inp.default is None:
             continue
-        input_type = inp.type_
+        # type_ lives on concrete subclasses, not BaseInputParameter
+        input_type = getattr(inp, "type_", None)
         if isinstance(input_type, list):
             # Array type like [string] — skip default validation for now
             continue
 
@@ -14,7 +14,11 @@
 
 from gxformat2.normalized import ensure_format2, NormalizedFormat2, NormalizedNativeWorkflow, NormalizedWorkflowStep
 from gxformat2.options import ConversionOptions
-from gxformat2.schema.gxformat2 import GalaxyWorkflow, WorkflowInputParameter, WorkflowOutputParameter
+from gxformat2.schema.gxformat2 import (
+    BaseInputParameter,
+    GalaxyWorkflow,
+    WorkflowOutputParameter,
+)
 from gxformat2.schema.native import NativeGalaxyWorkflow
 
 # Any input ensure_format2 accepts
@@ -37,7 +41,7 @@ def steps(
     workflow_path: str | PathLike | None = None,
     options: ConversionOptions | None = None,
     expand: bool = False,
-) -> list[WorkflowInputParameter | NormalizedWorkflowStep]:
+) -> list[BaseInputParameter | NormalizedWorkflowStep]:
     """Return input parameters followed by steps as typed models."""
     nf2 = _ensure_format2(workflow_dict, workflow_path, options, expand)
     return list(nf2.inputs) + list(nf2.steps)
@@ -48,7 +52,7 @@ def inputs(
     workflow_path: str | PathLike | None = None,
     options: ConversionOptions | None = None,
     expand: bool = False,
-) -> list[WorkflowInputParameter]:
+) -> list[BaseInputParameter]:
     """Return normalized inputs as typed models."""
     nf2 = _ensure_format2(workflow_dict, workflow_path, options, expand)
     return list(nf2.inputs)
 
@@ -38,11 +38,13 @@
     MAX_EXPANSION_DEPTH,
 )
 from ..schema.gxformat2 import (
+    BaseInputParameter,
     CreatorOrganization,
     CreatorPerson,
     FrameComment,
     FreehandComment,
     GalaxyWorkflow,
+    input_parameter_class,
     MarkdownComment,
     Report,
 )
@@ -443,7 +445,7 @@ def _build_format2_workflow(
             label_map[str(key)] = f"{UNLABELED_STEP_PREFIX}{step.id}"
 
     # Separate inputs from non-input steps
-    input_params: list[WorkflowInputParameter] = []
+    input_params: list[BaseInputParameter] = []
     fmt2_steps: list[NormalizedWorkflowStep] = []
     labels = Labels()
 
@@ -491,7 +493,7 @@ def _build_format2_workflow(
     )
 
 
-def _build_input_param(step: NormalizedNativeStep) -> WorkflowInputParameter:
+def _build_input_param(step: NormalizedNativeStep) -> BaseInputParameter:
     step_id = step.label if step.label is not None else f"{UNLABELED_INPUT_PREFIX}{step.id}"
     tool_state = step.tool_state
     input_type = native_input_to_format2_type({"type": step.type_}, tool_state)
@@ -521,7 +523,12 @@ def _build_input_param(step: NormalizedNativeStep) -> WorkflowInputParameter:
     if step.position:
         kwargs["position"] = _convert_position(step.position)
 
-    return WorkflowInputParameter(**kwargs)
+    # Use the specific discriminated type when possible; fall back to
+    # WorkflowInputParameter for list types (multiple inputs) since the
+    # specific classes only accept scalar Literal type_ values.
+    if isinstance(input_type, list):
+        return WorkflowInputParameter(**kwargs)
+    return input_parameter_class(input_type)(**kwargs)
 
 
 def _build_format2_step(
@@ -1094,13 +1101,14 @@ def _build_native_workflow(
 
 
 def _build_input_step(
-    inp: WorkflowInputParameter,
+    inp: BaseInputParameter,
     order_index: int,
     ctx: _ConversionContext,
 ) -> NormalizedNativeStep:
     raw_label = inp.id or f"Input {order_index}"
     label = None if Labels.is_unlabeled(raw_label) else raw_label
-    input_type = inp.type_
+    # type_ lives on concrete subclasses, not BaseInputParameter
+    input_type = getattr(inp, "type_", None)
     if isinstance(input_type, list):
         if len(input_type) != 1:
             raise Exception("Only simple arrays of workflow inputs are currently supported")
@@ -1135,10 +1143,15 @@ def _build_input_step(
         tool_state["multiple"] = True
     if inp.optional is not None:
         tool_state["optional"] = inp.optional
-    if inp.format:
-        tool_state["format"] = inp.format
-    if inp.collection_type:
-        tool_state["collection_type"] = inp.collection_type
+    # getattr because inp is typed as BaseInputParameter but may be any subclass:
+    # format lives on BaseDataParameter, collection_type on WorkflowCollectionParameter
+    # and WorkflowInputParameter (catch-all). Non-data types (integer, text, etc.) lack these.
+    fmt = getattr(inp, "format", None)
+    if fmt:
+        tool_state["format"] = fmt
+    collection_type = getattr(inp, "collection_type", None)
+    if collection_type:
+        tool_state["collection_type"] = collection_type
     if inp.default is not None:
         tool_state["default"] = inp.default
 
@@ -1682,8 +1695,8 @@ def _expand_format2(wf: NormalizedFormat2, ctx: _ExpansionContext) -> ExpandedFo
         step_data = step.model_dump(by_alias=True, exclude={"run"})
         expanded_steps.append(ExpandedWorkflowStep(**step_data, run=expanded_run))
 
-    wf_data = wf.model_dump(by_alias=True, exclude={"steps"})
-    return ExpandedFormat2(**wf_data, steps=expanded_steps)
+    wf_data = wf.model_dump(by_alias=True, exclude={"steps", "inputs"})
+    return ExpandedFormat2(**wf_data, inputs=wf.inputs, steps=expanded_steps)
 
 
 def _expand_native(wf: NormalizedNativeWorkflow, ctx: _ExpansionContext) -> ExpandedNativeWorkflow:
 
@@ -15,15 +15,17 @@
 from pathlib import Path
 from typing import Any, Literal, NamedTuple, Union
 
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, SerializeAsAny
 from typing_extensions import TypeAlias
 
 from gxformat2.schema.gxformat2 import (
+    BaseInputParameter,
     CreatorOrganization,
     CreatorPerson,
     FrameComment,
     FreehandComment,
     GalaxyWorkflow,
+    input_parameter_class,
     MarkdownComment,
     Report,
     StepPosition,
@@ -175,7 +177,7 @@ class NormalizedFormat2(_DictMixin, BaseModel):
     class_: Literal["GalaxyWorkflow"] = Field(default="GalaxyWorkflow", alias="class")
     label: str | None = Field(default=None)
     doc: str | None = Field(default=None, description="Annotation, joined if originally a list.")
-    inputs: list[WorkflowInputParameter] = Field(
+    inputs: list[SerializeAsAny[BaseInputParameter]] = Field(
         default_factory=list, description="Always a list, shorthands expanded."
     )
     outputs: list[WorkflowOutputParameter] = Field(default_factory=list, description="Always a list.")
@@ -257,6 +259,7 @@ def normalized_format2(
         if "steps" not in workflow:
             workflow = {**workflow, "steps": {}}
         workflow = _pre_clean_steps(workflow)
+        workflow = _pre_normalize_input_types(workflow)
         workflow = GalaxyWorkflow.model_validate(workflow)
     assert isinstance(workflow, GalaxyWorkflow)
     return _normalize_workflow(workflow)
@@ -310,20 +313,28 @@ def _normalize_input_type(value: Any) -> Any:
     return value
 
 
+def _validate_input_dict(d: dict[str, Any]) -> BaseInputParameter:
+    """Validate an input dict using the specific discriminated type."""
+    type_val = d.get("type")
+    if isinstance(type_val, list):
+        return WorkflowInputParameter.model_validate(d)
+    return input_parameter_class(type_val).model_validate(d)
+
+
 def _normalize_inputs(
-    inputs: list[WorkflowInputParameter] | dict[str, WorkflowInputParameter | str] | dict[str, Any],
-) -> list[WorkflowInputParameter]:
+    inputs: list[BaseInputParameter] | dict[str, BaseInputParameter | str] | dict[str, Any] | Any,
+) -> list[BaseInputParameter]:
     if isinstance(inputs, list):
-        result = []
+        result: list[BaseInputParameter] = []
         for inp in inputs:
-            if isinstance(inp, WorkflowInputParameter):
+            if isinstance(inp, BaseInputParameter):
                 result.append(inp)
             elif isinstance(inp, dict):
                 if "type" in inp:
                     inp = {**inp, "type": _normalize_input_type(inp["type"])}
-                result.append(WorkflowInputParameter.model_validate(inp))
+                result.append(_validate_input_dict(inp))
             else:
-                result.append(WorkflowInputParameter.model_validate(inp))
+                result.append(_validate_input_dict(inp))
         return result
 
     # Dict form — keys are ids, values are WorkflowInputParameter, type string, or dict
@@ -332,8 +343,8 @@ def _normalize_inputs(
         if isinstance(value, str):
             # Shorthand: input_name: "data"
             normalized_type = _normalize_input_type(value)
-            result.append(WorkflowInputParameter.model_validate({"id": key, "type": normalized_type}))
-        elif isinstance(value, WorkflowInputParameter):
+            result.append(input_parameter_class(normalized_type)(id=key, type_=normalized_type))
+        elif isinstance(value, BaseInputParameter):
             if value.id is None:
                 value = value.model_copy(update={"id": key})
             result.append(value)
@@ -344,9 +355,9 @@ def _normalize_inputs(
                 value = {**value, "type": _normalize_input_type(value["type"])}
             if "format" in value and isinstance(value["format"], str):
                 value = {**value, "format": [value["format"]]}
-            result.append(WorkflowInputParameter.model_validate(value))
+            result.append(_validate_input_dict(value))
         else:
-            result.append(WorkflowInputParameter(id=key))
+            result.append(input_parameter_class(None)(id=key))
     return result
 
 
@@ -377,6 +388,33 @@ def _normalize_outputs(
     return result
 
 
+def _pre_normalize_input_types(workflow: dict[str, Any]) -> dict[str, Any]:
+    """Normalize input type aliases (File → data, etc.) before discriminator runs.
+
+    The discriminated union on ``Process.inputs`` routes based on the raw
+    ``type`` field, so alias normalization must happen before model validation.
+    """
+    inputs = workflow.get("inputs")
+    if inputs is None:
+        return workflow
+
+    def norm_entry(entry: Any) -> Any:
+        if isinstance(entry, dict) and "type" in entry:
+            return {**entry, "type": _normalize_input_type(entry["type"])}
+        if isinstance(entry, str):
+            return _normalize_input_type(entry)
+        return entry
+
+    new_inputs: dict[str, Any] | list[Any]
+    if isinstance(inputs, dict):
+        new_inputs = {k: norm_entry(v) for k, v in inputs.items()}
+    elif isinstance(inputs, list):
+        new_inputs = [norm_entry(v) for v in inputs]
+    else:
+        return workflow
+    return {**workflow, "inputs": new_inputs}
+
+
 def _pre_clean_steps(workflow: dict[str, Any]) -> dict[str, Any]:
     """Resolve ``$link`` entries in step state dicts before model validation.