mayflower
diff --git a/‎apps/worker/contextmine_worker/flows.py‎
Lines changed: 49 additions & 34 deletions b/‎apps/worker/contextmine_worker/flows.py‎
Lines changed: 49 additions & 34 deletions
diff --git a/‎packages/core/contextmine_core/analyzer/extractors/ast_utils.py‎
Lines changed: 27 additions & 19 deletions b/‎packages/core/contextmine_core/analyzer/extractors/ast_utils.py‎
Lines changed: 27 additions & 19 deletions
diff --git a/‎packages/core/contextmine_core/analyzer/extractors/rules.py‎
Lines changed: 2 additions & 2 deletions b/‎packages/core/contextmine_core/analyzer/extractors/rules.py‎
Lines changed: 2 additions & 2 deletions
@@ -928,57 +928,72 @@ async def _extract_schema_fallback(
     return aggregate_schema_extractions(extractions)
 
 
+async def _kg_load_rule_candidate_docs(
+    session: object,
+    source_uuid: object,
+    changed_doc_ids: list[str] | None,
+) -> list:
+    """Load documents eligible for business rule extraction."""
+    import uuid as uuid_module
+
+    from contextmine_core.models import Document
+
+    if changed_doc_ids:
+        result = await session.execute(
+            select(Document.id, Document.uri, Document.content_markdown).where(
+                Document.id.in_([uuid_module.UUID(d) for d in changed_doc_ids])
+            )
+        )
+    else:
+        result = await session.execute(
+            select(Document.id, Document.uri, Document.content_markdown).where(
+                Document.source_id == source_uuid
+            )
+        )
+    return result.all()
+
+
+async def _kg_extract_rules_from_docs(docs: list, research_llm: object) -> list:
+    """Run rule extraction on a list of documents, filtering by language."""
+    from contextmine_core.analyzer.extractors.rules import extract_rules_from_file
+    from contextmine_core.treesitter.languages import detect_language
+
+    all_extractions = []
+    for _doc_id, uri, content in docs:
+        if not content:
+            continue
+        file_path = _uri_to_file_path(uri)
+        if _is_ignored_repo_path(file_path) or detect_language(file_path) is None:
+            continue
+        try:
+            rule_result = await extract_rules_from_file(file_path, content, research_llm)
+            if rule_result.rules:
+                all_extractions.append(rule_result)
+        except Exception as e:
+            logger.debug("Rule extraction failed for %s: %s", file_path, e)
+    return all_extractions
+
+
 async def _kg_extract_business_rules(
     source_uuid: object,
     collection_uuid: object,
     changed_doc_ids: list[str] | None,
     research_llm: object,
 ) -> int:
     """Extract business rules from code files using LLM. Returns rules created count."""
-    import uuid as uuid_module
-
-    from contextmine_core.analyzer.extractors.rules import (
-        build_rules_graph,
-        extract_rules_from_file,
-    )
-    from contextmine_core.models import Document
-    from contextmine_core.treesitter.languages import detect_language
+    from contextmine_core.analyzer.extractors.rules import build_rules_graph
 
     if changed_doc_ids is not None and len(changed_doc_ids) == 0:
         if await _kg_has_business_rules(collection_uuid):
             logger.info("No changed documents and business rules exist - skipping extraction")
             return 0
         logger.info("No changed documents but no business rules found - running initial extraction")
 
-    all_extractions = []
     async with get_session() as session:
-        if changed_doc_ids:
-            result = await session.execute(
-                select(Document.id, Document.uri, Document.content_markdown).where(
-                    Document.id.in_([uuid_module.UUID(d) for d in changed_doc_ids])
-                )
-            )
-        else:
-            result = await session.execute(
-                select(Document.id, Document.uri, Document.content_markdown).where(
-                    Document.source_id == source_uuid
-                )
-            )
-        docs = result.all()
+        docs = await _kg_load_rule_candidate_docs(session, source_uuid, changed_doc_ids)
         logger.info("Extracting business rules from %d documents", len(docs))
 
-        for _doc_id, uri, content in docs:
-            if not content:
-                continue
-            file_path = _uri_to_file_path(uri)
-            if _is_ignored_repo_path(file_path) or detect_language(file_path) is None:
-                continue
-            try:
-                rule_result = await extract_rules_from_file(file_path, content, research_llm)
-                if rule_result.rules:
-                    all_extractions.append(rule_result)
-            except Exception as e:
-                logger.debug("Rule extraction failed for %s: %s", file_path, e)
+        all_extractions = await _kg_extract_rules_from_docs(docs, research_llm)
 
         if all_extractions:
             rule_stats = await build_rules_graph(session, collection_uuid, all_extractions)
 
@@ -120,17 +120,24 @@ def ruby_first_string_arg(content: str, call_node: Any) -> str | None:
 
 def java_annotation_names(content: str, node: Any) -> list[str]:
     """Extract annotation names from a Java method/class node's modifiers."""
-    names: list[str] = []
     parent = node.parent
     if parent is None:
-        return names
+        return []
+    names: list[str] = []
     for child in parent.children:
         if child.type == "modifiers":
-            for mod in child.children:
-                if mod.type in {"marker_annotation", "annotation"}:
-                    name_node = first_child(mod, "identifier")
-                    if name_node:
-                        names.append(node_text(content, name_node).strip().lower())
+            names.extend(_extract_annotation_names_from_modifiers(content, child))
+    return names
+
+
+def _extract_annotation_names_from_modifiers(content: str, modifiers_node: Any) -> list[str]:
+    """Extract annotation names from a Java modifiers node."""
+    names: list[str] = []
+    for mod in modifiers_node.children:
+        if mod.type in {"marker_annotation", "annotation"}:
+            name_node = first_child(mod, "identifier")
+            if name_node:
+                names.append(node_text(content, name_node).strip().lower())
     return names
 
 
@@ -144,24 +151,25 @@ def csharp_attribute_names(content: str, node: Any) -> set[str]:
         if child is node:
             break
         if child.type == "attribute_list":
-            for attr in walk(child):
-                if attr.type in {"identifier", "attribute"}:
-                    name = node_text(content, attr).strip().lower()
-                    if name.endswith("attribute"):
-                        name = name[: -len("attribute")]
-                    attrs.add(name)
+            _collect_csharp_attrs(content, child, attrs)
     # Also check direct children
     for child in node.children:
         if child.type == "attribute_list":
-            for attr in walk(child):
-                if attr.type in {"identifier", "attribute"}:
-                    name = node_text(content, attr).strip().lower()
-                    if name.endswith("attribute"):
-                        name = name[: -len("attribute")]
-                    attrs.add(name)
+            _collect_csharp_attrs(content, child, attrs)
     return attrs
 
 
+def _collect_csharp_attrs(content: str, attr_list_node: Any, attrs: set[str]) -> None:
+    """Walk an attribute_list node and add normalized attribute names to the set."""
+    for attr in walk(attr_list_node):
+        if attr.type not in {"identifier", "attribute"}:
+            continue
+        name = node_text(content, attr).strip().lower()
+        if name.endswith("attribute"):
+            name = name[: -len("attribute")]
+        attrs.add(name)
+
+
 # ---------------------------------------------------------------------------
 # JS/TS AST helpers shared between the tests and UI extractors
 # ---------------------------------------------------------------------------
 
@@ -246,7 +246,7 @@ def _traverse_for_units(
         name = _get_node_name(content, node)
         if name:
             # Include preceding decorators/annotations
-            actual_start = _include_decorators(content, node)
+            actual_start = _include_decorators(node)
             actual_start_line = content[:actual_start].count("\n") + 1
             units.append(
                 _CodeUnit(
@@ -265,7 +265,7 @@ def _traverse_for_units(
         _traverse_for_units(content, child, func_types, class_types, units, parent_name)
 
 
-def _include_decorators(content: str, node: Any) -> int:
+def _include_decorators(node: Any) -> int:
     """Return the byte offset including any decorators/annotations before the node."""
     start = node.start_byte
     prev = node.prev_sibling