Skip to content

Commit 08ad4a3

Browse files
refactor: resolve 32 SonarQube issues (S3776, S1172, S5855, S5857)
Reduce cognitive complexity across 7 files by extracting focused helper functions. No behavioral changes — only structural decomposition. ast_utils.py: Extract _extract_annotation_names_from_modifiers, _collect_csharp_attrs helpers. builder.py: Remove unused collection_id param. Extract _handle_file_level_edge and _handle_symbol_level_edge. rules.py: Remove unused content param from _include_decorators. schema.py: Fix redundant regex (S5855), reluctant quantifier (S5857). Extract ORM/Django/Prisma/LLM parsing helpers. tests.py: Replace detect_test_framework if/elif with lookup tuple. Extract per-language helpers for Java, Go, PHP, Ruby, C# extraction. ui.py: Split extract_ui_from_file into backend/JS/inference paths. Replace route dispatch with dict. Extract per-language route helpers. flows.py: Extract _kg_load_rule_candidate_docs and _kg_extract_rules_from_docs.
1 parent 39034ca commit 08ad4a3

7 files changed

Lines changed: 1153 additions & 909 deletions

File tree

apps/worker/contextmine_worker/flows.py

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -928,57 +928,72 @@ async def _extract_schema_fallback(
928928
return aggregate_schema_extractions(extractions)
929929

930930

931+
async def _kg_load_rule_candidate_docs(
932+
session: object,
933+
source_uuid: object,
934+
changed_doc_ids: list[str] | None,
935+
) -> list:
936+
"""Load documents eligible for business rule extraction."""
937+
import uuid as uuid_module
938+
939+
from contextmine_core.models import Document
940+
941+
if changed_doc_ids:
942+
result = await session.execute(
943+
select(Document.id, Document.uri, Document.content_markdown).where(
944+
Document.id.in_([uuid_module.UUID(d) for d in changed_doc_ids])
945+
)
946+
)
947+
else:
948+
result = await session.execute(
949+
select(Document.id, Document.uri, Document.content_markdown).where(
950+
Document.source_id == source_uuid
951+
)
952+
)
953+
return result.all()
954+
955+
956+
async def _kg_extract_rules_from_docs(docs: list, research_llm: object) -> list:
957+
"""Run rule extraction on a list of documents, filtering by language."""
958+
from contextmine_core.analyzer.extractors.rules import extract_rules_from_file
959+
from contextmine_core.treesitter.languages import detect_language
960+
961+
all_extractions = []
962+
for _doc_id, uri, content in docs:
963+
if not content:
964+
continue
965+
file_path = _uri_to_file_path(uri)
966+
if _is_ignored_repo_path(file_path) or detect_language(file_path) is None:
967+
continue
968+
try:
969+
rule_result = await extract_rules_from_file(file_path, content, research_llm)
970+
if rule_result.rules:
971+
all_extractions.append(rule_result)
972+
except Exception as e:
973+
logger.debug("Rule extraction failed for %s: %s", file_path, e)
974+
return all_extractions
975+
976+
931977
async def _kg_extract_business_rules(
932978
source_uuid: object,
933979
collection_uuid: object,
934980
changed_doc_ids: list[str] | None,
935981
research_llm: object,
936982
) -> int:
937983
"""Extract business rules from code files using LLM. Returns rules created count."""
938-
import uuid as uuid_module
939-
940-
from contextmine_core.analyzer.extractors.rules import (
941-
build_rules_graph,
942-
extract_rules_from_file,
943-
)
944-
from contextmine_core.models import Document
945-
from contextmine_core.treesitter.languages import detect_language
984+
from contextmine_core.analyzer.extractors.rules import build_rules_graph
946985

947986
if changed_doc_ids is not None and len(changed_doc_ids) == 0:
948987
if await _kg_has_business_rules(collection_uuid):
949988
logger.info("No changed documents and business rules exist - skipping extraction")
950989
return 0
951990
logger.info("No changed documents but no business rules found - running initial extraction")
952991

953-
all_extractions = []
954992
async with get_session() as session:
955-
if changed_doc_ids:
956-
result = await session.execute(
957-
select(Document.id, Document.uri, Document.content_markdown).where(
958-
Document.id.in_([uuid_module.UUID(d) for d in changed_doc_ids])
959-
)
960-
)
961-
else:
962-
result = await session.execute(
963-
select(Document.id, Document.uri, Document.content_markdown).where(
964-
Document.source_id == source_uuid
965-
)
966-
)
967-
docs = result.all()
993+
docs = await _kg_load_rule_candidate_docs(session, source_uuid, changed_doc_ids)
968994
logger.info("Extracting business rules from %d documents", len(docs))
969995

970-
for _doc_id, uri, content in docs:
971-
if not content:
972-
continue
973-
file_path = _uri_to_file_path(uri)
974-
if _is_ignored_repo_path(file_path) or detect_language(file_path) is None:
975-
continue
976-
try:
977-
rule_result = await extract_rules_from_file(file_path, content, research_llm)
978-
if rule_result.rules:
979-
all_extractions.append(rule_result)
980-
except Exception as e:
981-
logger.debug("Rule extraction failed for %s: %s", file_path, e)
996+
all_extractions = await _kg_extract_rules_from_docs(docs, research_llm)
982997

983998
if all_extractions:
984999
rule_stats = await build_rules_graph(session, collection_uuid, all_extractions)

packages/core/contextmine_core/analyzer/extractors/ast_utils.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,24 @@ def ruby_first_string_arg(content: str, call_node: Any) -> str | None:
120120

121121
def java_annotation_names(content: str, node: Any) -> list[str]:
122122
"""Extract annotation names from a Java method/class node's modifiers."""
123-
names: list[str] = []
124123
parent = node.parent
125124
if parent is None:
126-
return names
125+
return []
126+
names: list[str] = []
127127
for child in parent.children:
128128
if child.type == "modifiers":
129-
for mod in child.children:
130-
if mod.type in {"marker_annotation", "annotation"}:
131-
name_node = first_child(mod, "identifier")
132-
if name_node:
133-
names.append(node_text(content, name_node).strip().lower())
129+
names.extend(_extract_annotation_names_from_modifiers(content, child))
130+
return names
131+
132+
133+
def _extract_annotation_names_from_modifiers(content: str, modifiers_node: Any) -> list[str]:
134+
"""Extract annotation names from a Java modifiers node."""
135+
names: list[str] = []
136+
for mod in modifiers_node.children:
137+
if mod.type in {"marker_annotation", "annotation"}:
138+
name_node = first_child(mod, "identifier")
139+
if name_node:
140+
names.append(node_text(content, name_node).strip().lower())
134141
return names
135142

136143

@@ -144,24 +151,25 @@ def csharp_attribute_names(content: str, node: Any) -> set[str]:
144151
if child is node:
145152
break
146153
if child.type == "attribute_list":
147-
for attr in walk(child):
148-
if attr.type in {"identifier", "attribute"}:
149-
name = node_text(content, attr).strip().lower()
150-
if name.endswith("attribute"):
151-
name = name[: -len("attribute")]
152-
attrs.add(name)
154+
_collect_csharp_attrs(content, child, attrs)
153155
# Also check direct children
154156
for child in node.children:
155157
if child.type == "attribute_list":
156-
for attr in walk(child):
157-
if attr.type in {"identifier", "attribute"}:
158-
name = node_text(content, attr).strip().lower()
159-
if name.endswith("attribute"):
160-
name = name[: -len("attribute")]
161-
attrs.add(name)
158+
_collect_csharp_attrs(content, child, attrs)
162159
return attrs
163160

164161

162+
def _collect_csharp_attrs(content: str, attr_list_node: Any, attrs: set[str]) -> None:
163+
"""Walk an attribute_list node and add normalized attribute names to the set."""
164+
for attr in walk(attr_list_node):
165+
if attr.type not in {"identifier", "attribute"}:
166+
continue
167+
name = node_text(content, attr).strip().lower()
168+
if name.endswith("attribute"):
169+
name = name[: -len("attribute")]
170+
attrs.add(name)
171+
172+
165173
# ---------------------------------------------------------------------------
166174
# JS/TS AST helpers shared between the tests and UI extractors
167175
# ---------------------------------------------------------------------------

packages/core/contextmine_core/analyzer/extractors/rules.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def _traverse_for_units(
246246
name = _get_node_name(content, node)
247247
if name:
248248
# Include preceding decorators/annotations
249-
actual_start = _include_decorators(content, node)
249+
actual_start = _include_decorators(node)
250250
actual_start_line = content[:actual_start].count("\n") + 1
251251
units.append(
252252
_CodeUnit(
@@ -265,7 +265,7 @@ def _traverse_for_units(
265265
_traverse_for_units(content, child, func_types, class_types, units, parent_name)
266266

267267

268-
def _include_decorators(content: str, node: Any) -> int:
268+
def _include_decorators(node: Any) -> int:
269269
"""Return the byte offset including any decorators/annotations before the node."""
270270
start = node.start_byte
271271
prev = node.prev_sibling

0 commit comments

Comments
 (0)