Skip to content

Commit 8120e27

Browse files
jmchiltonclaude
andcommitted
Remove dead connect syntax support, drop pop_connect_from_step_dict
The connect: key on format2 steps was never in the schema and has zero usage in Galaxy, Planemo, or gxformat2 tests (only a 2016 slide deck). Remove handling from pre-clean and delete pop_connect_from_step_dict from model.py. Rewrite tests to verify $link resolution through normalization instead of old dict helpers. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3aa8092 commit 8120e27

File tree

3 files changed

+104
-122
lines changed

3 files changed

+104
-122
lines changed

gxformat2/model.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,43 +61,6 @@ def get_native_step_type(gxformat2_step_dict: dict) -> _NativeGalaxyStepType:
6161
return step_type
6262

6363

64-
def pop_connect_from_step_dict(step: dict) -> dict:
65-
"""Merge 'in' and 'connect' keys into a unified connection dict separated from state.
66-
67-
Meant to be used an initial processing step in reasoning about connections defined by the
68-
format2 step description.
69-
"""
70-
if "connect" not in step:
71-
step["connect"] = {}
72-
73-
connect = step["connect"]
74-
del step["connect"]
75-
76-
# handle CWL-style in dict connections.
77-
if "in" in step:
78-
step_in = step["in"]
79-
assert isinstance(step_in, dict)
80-
connection_keys = set()
81-
for key, value in step_in.items():
82-
# TODO: this can be a list right?
83-
if isinstance(value, dict) and "source" in value:
84-
value = value["source"]
85-
elif isinstance(value, dict) and "default" in value:
86-
continue
87-
elif isinstance(value, dict):
88-
raise KeyError(f"step input must define either source or default {value}")
89-
connect[key] = [value]
90-
connection_keys.add(key)
91-
92-
for key in connection_keys:
93-
del step_in[key]
94-
95-
if len(step_in) == 0:
96-
del step["in"]
97-
98-
return connect
99-
100-
10164
def setup_connected_values(value, key: str = "", append_to: Optional[dict[str, list]] = None) -> Any:
10265
"""Replace links with connected value."""
10366

gxformat2/normalized/_format2.py

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,11 @@ def _normalize_outputs(
290290

291291

292292
def _pre_clean_steps(workflow: dict[str, Any]) -> dict[str, Any]:
293-
"""Resolve non-schema connection conventions in step dicts before model validation.
293+
"""Resolve ``$link`` entries in step state dicts before model validation.
294294
295-
Handles two conventions not in the Format2 schema:
296-
- ``connect`` key on steps → merged into ``in`` as source references
297-
- ``$link`` entries in ``state`` → replaced with ConnectedValue, source added to ``in``
298-
299-
This runs on the raw dict so that model validation sees only schema-compliant data.
295+
``$link`` in ``state`` is a Format2 shorthand for connections embedded in
296+
tool state. This replaces them with ConnectedValue markers and adds the
297+
connection source to ``in``, so the model layer sees only schema-compliant data.
300298
"""
301299
steps = workflow.get("steps", {})
302300
if isinstance(steps, dict):
@@ -309,44 +307,36 @@ def _pre_clean_steps(workflow: dict[str, Any]) -> dict[str, Any]:
309307

310308

311309
def _pre_clean_step(step: dict[str, Any]) -> dict[str, Any]:
312-
"""Resolve connect and $link on a single step dict."""
313-
step = dict(step)
314-
in_dict: dict[str, Any] = dict(step.get("in", {})) if isinstance(step.get("in"), dict) else {}
315-
in_list: list | None = step.get("in") if isinstance(step.get("in"), list) else None
316-
extra_inputs: list[dict[str, Any]] = []
317-
318-
# Resolve connect key → in entries
319-
connect = step.pop("connect", None)
320-
if isinstance(connect, dict):
321-
for key, sources in connect.items():
322-
if in_list is not None:
323-
extra_inputs.append({"id": key, "source": sources})
324-
else:
325-
in_dict[key] = {"source": sources} if isinstance(sources, list) else sources
326-
327-
# Resolve $link in state → ConnectedValue + in entries
310+
"""Resolve $link in state on a single step dict."""
328311
state = step.get("state")
329-
if isinstance(state, dict):
330-
clean_state, link_connections = _resolve_links(state)
331-
step["state"] = clean_state
332-
for key, sources in link_connections.items():
333-
source = sources if len(sources) > 1 else sources[0]
334-
if in_list is not None:
335-
extra_inputs.append({"id": key, "source": source})
336-
else:
312+
if not isinstance(state, dict):
313+
# Recursively clean subworkflow runs even if no state
314+
run = step.get("run")
315+
if isinstance(run, dict) and run.get("class") == "GalaxyWorkflow":
316+
return {**step, "run": _pre_clean_steps(run)}
317+
return step
318+
319+
step = dict(step)
320+
clean_state, link_connections = _resolve_links(state)
321+
step["state"] = clean_state
322+
323+
if link_connections:
324+
in_val = step.get("in")
325+
if isinstance(in_val, list):
326+
extra = [{"id": k, "source": srcs if len(srcs) > 1 else srcs[0]} for k, srcs in link_connections.items()]
327+
step["in"] = in_val + extra
328+
else:
329+
in_dict = dict(in_val) if isinstance(in_val, dict) else {}
330+
for key, sources in link_connections.items():
331+
source = sources if len(sources) > 1 else sources[0]
337332
in_dict[key] = {"source": source} if isinstance(source, list) else source
333+
step["in"] = in_dict
338334

339335
# Recursively clean subworkflow runs
340336
run = step.get("run")
341337
if isinstance(run, dict) and run.get("class") == "GalaxyWorkflow":
342338
step["run"] = _pre_clean_steps(run)
343339

344-
# Write back in
345-
if in_list is not None:
346-
step["in"] = in_list + extra_inputs
347-
elif in_dict:
348-
step["in"] = in_dict
349-
350340
return step
351341

352342

tests/test_model_helpers.py

Lines changed: 78 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,78 @@
1-
from gxformat2.model import (
2-
pop_connect_from_step_dict,
3-
setup_connected_values,
4-
)
5-
6-
7-
def test_pop_connect():
8-
raw_step = {
9-
"in": {
10-
"bar": {
11-
"source": "foo/moo",
12-
},
13-
},
14-
}
15-
connect = pop_connect_from_step_dict(raw_step)
16-
assert connect["bar"] == ["foo/moo"]
17-
assert "in" not in raw_step
18-
19-
20-
def test_pop_connect_preserves_defaults():
21-
raw_step = {
22-
"in": {
23-
"bar": {
24-
"default": 7,
25-
},
26-
},
27-
}
28-
connect = pop_connect_from_step_dict(raw_step)
29-
assert "bar" not in connect
30-
assert "in" in raw_step
31-
32-
33-
def test_setup_connected_values():
34-
raw_state = {
35-
"input": {"$link": "moo/cow"},
36-
}
37-
connect = {}
38-
setup_connected_values(raw_state, append_to=connect)
39-
assert connect["input"][0] == "moo/cow"
40-
41-
42-
def test_setup_connected_values_in_array():
43-
raw_state = {
44-
"input": [{"$link": "moo/cow"}, {"$link": "moo/cow2"}],
45-
}
46-
connect = {}
47-
setup_connected_values(raw_state, append_to=connect)
48-
assert connect["input"][0] == "moo/cow"
49-
assert connect["input"][1] == "moo/cow2"
1+
"""Tests for connection resolution during normalization."""
2+
3+
from gxformat2.to_format2 import ensure_format2
4+
from gxformat2.yaml import ordered_load
5+
6+
7+
def test_link_in_state_resolved():
8+
"""$link in state → ConnectedValue in state + source in in_."""
9+
nf2 = ensure_format2(ordered_load("""
10+
class: GalaxyWorkflow
11+
inputs:
12+
moo: data
13+
steps:
14+
s1:
15+
tool_id: cat1
16+
state:
17+
input:
18+
$link: moo/cow
19+
"""))
20+
step = nf2.steps[0]
21+
assert step.state["input"] == {"__class__": "ConnectedValue"}
22+
sources = {si.id: si.source for si in step.in_}
23+
assert sources["input"] == "moo/cow"
24+
25+
26+
def test_link_array_in_state_resolved():
27+
"""Multiple $link in array → ConnectedValue markers + sources in in_."""
28+
nf2 = ensure_format2(ordered_load("""
29+
class: GalaxyWorkflow
30+
inputs:
31+
moo: data
32+
steps:
33+
s1:
34+
tool_id: cat1
35+
state:
36+
input:
37+
- $link: moo/cow
38+
- $link: moo/cow2
39+
"""))
40+
step = nf2.steps[0]
41+
assert step.state["input"] == [None, None]
42+
sources = {si.id: si.source for si in step.in_}
43+
assert "moo/cow" in sources["input"]
44+
assert "moo/cow2" in sources["input"]
45+
46+
47+
def test_in_source_preserved():
48+
"""Regular in sources pass through to in_."""
49+
nf2 = ensure_format2(ordered_load("""
50+
class: GalaxyWorkflow
51+
inputs:
52+
foo: data
53+
steps:
54+
s1:
55+
tool_id: cat1
56+
in:
57+
bar: foo/moo
58+
"""))
59+
step = nf2.steps[0]
60+
sources = {si.id: si.source for si in step.in_}
61+
assert sources["bar"] == "foo/moo"
62+
63+
64+
def test_in_default_preserved():
65+
"""in entries with default (no source) preserved."""
66+
nf2 = ensure_format2(ordered_load("""
67+
class: GalaxyWorkflow
68+
inputs: {}
69+
steps:
70+
s1:
71+
tool_id: cat1
72+
in:
73+
bar:
74+
default: 7
75+
"""))
76+
step = nf2.steps[0]
77+
defaults = {si.id: si.default for si in step.in_}
78+
assert defaults["bar"] == 7

0 commit comments

Comments
 (0)