Merge pull request #177 from jmchilton/mermaid_docs

jmchilton · web-flow · commit 9b0170d63323 · 2026-04-04T17:32:13.000-04:00
Mermaid docs
diff --git a/docs/_ext/examples_catalog.py b/docs/_ext/examples_catalog.py
@@ -11,6 +11,7 @@
 
 from gxformat2.cytoscape import cytoscape_elements
 from gxformat2.examples import EXAMPLES_DIR, load_catalog
+from gxformat2.mermaid import workflow_to_mermaid
 
 GITHUB_BASE = "https://github.com/galaxyproject/gxformat2/blob/main"
 
@@ -197,6 +198,11 @@ def _section(self, title, entries):
             if viz_node is not None:
                 entry_section += viz_node
 
+            # Mermaid diagram
+            mermaid_node = self._build_mermaid(entry)
+            if mermaid_node is not None:
+                entry_section += mermaid_node
+
             # Workflow source (collapsible)
             contents = entry.load_contents()
             if entry.format == "format2":
@@ -239,6 +245,23 @@ def _build_viz(self, entry):
         )
         return nodes.raw("", iframe_html, format="html")
 
+    def _build_mermaid(self, entry):
+        """Generate a mermaid diagram node, or None on failure."""
+        try:
+            diagram = workflow_to_mermaid(entry.path, comments=True)
+        except Exception:
+            return None
+
+        from sphinxcontrib.mermaid import mermaid as mermaid_node
+
+        container = nodes.container(classes=["toggle"])
+        container += nodes.caption(text="Mermaid diagram")
+        node = mermaid_node()
+        node["code"] = diagram
+        node["options"] = {}
+        container += node
+        return container
+
     def _field(self, name, value):
         field = nodes.field()
         field += nodes.field_name(text=name)
diff --git a/docs/conf.py b/docs/conf.py
@@ -18,6 +18,7 @@
     "sphinx.ext.viewcode",
     "sphinxarg.ext",
     "sphinx_design",
+    "sphinxcontrib.mermaid",
     "examples_catalog",
 ]
 
diff --git a/gxformat2/mermaid/__init__.py b/gxformat2/mermaid/__init__.py
@@ -0,0 +1,10 @@
+"""Mermaid flowchart visualization for Galaxy workflows."""
+
+from ._builder import workflow_to_mermaid
+from ._cli import main, to_mermaid
+
+__all__ = (
+    "main",
+    "to_mermaid",
+    "workflow_to_mermaid",
+)
diff --git a/gxformat2/mermaid/_builder.py b/gxformat2/mermaid/_builder.py
@@ -0,0 +1,154 @@
+"""Build Mermaid flowchart diagrams from Galaxy workflows."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from gxformat2.normalized import ensure_format2, NormalizedFormat2
+from gxformat2.schema.gxformat2 import FrameComment, GalaxyWorkflow, WorkflowInputParameter
+
+# Standard Mermaid shape wrappers: (open, close) bracket pairs.
+#   >label]   = asymmetric / flag (inputs)
+#   [[label]] = subroutine (subworkflows)
+#   [label]   = rectangle (tool steps, default)
+SHAPE_INPUT = (">", "]")
+SHAPE_PARAM = ("{{", "}}")
+SHAPE_TOOL = ("[", "]")
+SHAPE_SUBWORKFLOW = ("[[", "]]")
+
+STEP_TYPE_SHAPES = {
+    "data": SHAPE_INPUT,
+    "collection": SHAPE_INPUT,
+    "integer": SHAPE_PARAM,
+    "float": SHAPE_PARAM,
+    "text": SHAPE_PARAM,
+    "boolean": SHAPE_PARAM,
+    "color": SHAPE_PARAM,
+    "input": SHAPE_INPUT,
+    "tool": SHAPE_TOOL,
+    "subworkflow": SHAPE_SUBWORKFLOW,
+}
+
+MAIN_TS_PREFIX = "toolshed.g2.bx.psu.edu/repos/"
+
+
+def _sanitize_label(label: str) -> str:
+    """Escape characters that have special meaning in Mermaid labels."""
+    label = label.replace('"', "#quot;")
+    for ch in "()[]{}<>":
+        label = label.replace(ch, f"#{ord(ch)};")
+    return label
+
+
+def _input_type_str(inp: WorkflowInputParameter) -> str:
+    if inp.type_ is None:
+        return "input"
+    if isinstance(inp.type_, list):
+        if inp.type_:
+            return inp.type_[0].value
+        return "input"
+    return inp.type_.value
+
+
+def _node_line(node_id: str, label: str, shape: tuple[str, str]) -> str:
+    open_br, close_br = shape
+    return f'{node_id}{open_br}"{label}"{close_br}'
+
+
+def workflow_to_mermaid(
+    workflow: dict[str, Any] | str | Path | GalaxyWorkflow | NormalizedFormat2,
+    *,
+    comments: bool = False,
+) -> str:
+    """Convert a Galaxy workflow to a Mermaid flowchart string.
+
+    Accepts anything ``ensure_format2()`` supports, plus an already
+    normalized ``NormalizedFormat2`` instance.
+
+    When *comments* is True, FrameComment objects are rendered as
+    Mermaid subgraphs that group their contained steps.
+    """
+    if isinstance(workflow, NormalizedFormat2):
+        nf2 = workflow
+    else:
+        nf2 = ensure_format2(workflow)
+
+    lines = ["graph LR"]
+
+    # Build node ID mappings and collect node declaration lines
+    input_ids: dict[str, str] = {}
+    input_lines: dict[str, str] = {}
+    for i, inp in enumerate(nf2.inputs):
+        node_id = f"input_{i}"
+        inp_label = inp.id or str(i)
+        input_ids[inp_label] = node_id
+        label = _sanitize_label(inp_label)
+        type_str = _input_type_str(inp)
+        input_lines[inp_label] = _node_line(
+            node_id, f"{label}<br/><i>{type_str}</i>", STEP_TYPE_SHAPES.get(type_str, SHAPE_INPUT)
+        )
+
+    step_ids: dict[str, str] = {}
+    step_lines: dict[str, str] = {}
+    for i, step in enumerate(nf2.steps):
+        node_id = f"step_{i}"
+        step_label = step.label or step.id
+        step_ids[step_label] = node_id
+
+        tool_id = step.tool_id
+        if tool_id and tool_id.startswith(MAIN_TS_PREFIX):
+            tool_id = tool_id[len(MAIN_TS_PREFIX) :]
+
+        label = _sanitize_label(step.label or step.id or (f"tool:{tool_id}" if tool_id else str(i)))
+        step_type = step.type_.value if step.type_ else "tool"
+        step_lines[step_label] = _node_line(node_id, label, STEP_TYPE_SHAPES.get(step_type, SHAPE_TOOL))
+
+    # Collect frame comments and which labels they claim
+    framed: set[str] = set()
+    frames: list[FrameComment] = []
+    if comments:
+        for comment in nf2.comments:
+            if isinstance(comment, FrameComment) and comment.contains_steps:
+                frames.append(comment)
+                for ref in comment.contains_steps:
+                    framed.add(str(ref))
+
+    # Emit nodes — framed ones go inside subgraph blocks, others at top level
+    for inp_label, line in input_lines.items():
+        if inp_label not in framed:
+            lines.append(f"    {line}")
+
+    for step_label, line in step_lines.items():
+        if step_label not in framed:
+            lines.append(f"    {line}")
+
+    for i, frame in enumerate(frames):
+        title = _sanitize_label(frame.title or f"Group {i}")
+        lines.append(f'    subgraph sub_{i} ["{title}"]')
+        for ref in frame.contains_steps or []:
+            ref_str = str(ref)
+            if ref_str in input_lines:
+                lines.append(f"        {input_lines[ref_str]}")
+            elif ref_str in step_lines:
+                lines.append(f"        {step_lines[ref_str]}")
+        lines.append("    end")
+
+    # Build edges (deduplicate identical connections)
+    seen_edges: set[tuple[str, str]] = set()
+    for i, step in enumerate(nf2.steps):
+        node_id = f"step_{i}"
+        for step_input in step.in_:
+            if step_input.source is None:
+                continue
+            sources = step_input.source if isinstance(step_input.source, list) else [step_input.source]
+            for source in sources:
+                source_ref = nf2.resolve_source(source)
+                source_id = input_ids.get(source_ref.step_label) or step_ids.get(source_ref.step_label)
+                if source_id:
+                    edge_key = (source_id, node_id)
+                    if edge_key not in seen_edges:
+                        seen_edges.add(edge_key)
+                        lines.append(f"    {source_id} --> {node_id}")
+
+    return "\n".join(lines)
diff --git a/gxformat2/mermaid/_cli.py b/gxformat2/mermaid/_cli.py
@@ -0,0 +1,50 @@
+"""Command-line interface for Mermaid workflow diagram generation."""
+
+import sys
+
+from ._builder import workflow_to_mermaid
+
+SCRIPT_DESCRIPTION = """
+Convert a Galaxy workflow (Format 2 or native .ga) into a Mermaid flowchart
+diagram.
+
+Outputs Mermaid markdown to stdout by default, or to a file if an output
+path is provided. If the output path ends with .md, the diagram is wrapped
+in a fenced code block.
+"""
+
+
+def to_mermaid(workflow_path: str, output_path=None, *, comments: bool = False):
+    """Produce mermaid output for the supplied workflow path."""
+    diagram = workflow_to_mermaid(workflow_path, comments=comments)
+
+    if output_path is None:
+        print(diagram)
+        return
+
+    if output_path.endswith(".md"):
+        content = f"```mermaid\n{diagram}\n```\n"
+    else:
+        content = diagram + "\n"
+
+    with open(output_path, "w") as f:
+        f.write(content)
+
+
+def main(argv=None):
+    """Entry point for generating Mermaid diagrams of Galaxy workflows."""
+    if argv is None:
+        argv = sys.argv[1:]
+
+    args = _parser().parse_args(argv)
+    to_mermaid(args.input_path, args.output_path, comments=args.comments)
+
+
+def _parser():
+    import argparse
+
+    parser = argparse.ArgumentParser(description=SCRIPT_DESCRIPTION)
+    parser.add_argument("input_path", metavar="INPUT", type=str, help="input workflow path (.ga/gxwf.yml)")
+    parser.add_argument("output_path", metavar="OUTPUT", type=str, nargs="?", help="output path (.mmd/.md)")
+    parser.add_argument("--comments", action="store_true", default=False, help="render frame comments as subgraphs")
+    return parser
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,6 +40,7 @@ gxwf-to-format2 = "gxformat2.export:main"
 gxwf-lint = "gxformat2.lint:main"
 gxwf-viz = "gxformat2.cytoscape:main"
 gxwf-abstract-export = "gxformat2.abstract:main"
+gxwf-mermaid = "gxformat2.mermaid:main"
 
 [project.urls]
 Homepage = "https://github.com/galaxyproject/gxformat2"
@@ -72,6 +73,7 @@ docs = [
     "sphinx-rtd-theme",
     "myst-parser",
     "sphinx-argparse",
+    "sphinxcontrib-mermaid",
 ]
 
 [tool.ruff]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`"sphinx.ext.viewcode",`
`19`	`19`	`"sphinxarg.ext",`
`20`	`20`	`"sphinx_design",`
	`21`	`+ "sphinxcontrib.mermaid",`
`21`	`22`	`"examples_catalog",`
`22`	`23`	`]`
`23`	`24`