-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathengine.py
More file actions
68 lines (55 loc) · 2.53 KB
/
engine.py
File metadata and controls
68 lines (55 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""Clinically-constrained validation engine with conflict resolution."""
from __future__ import annotations
import logging
from collections import defaultdict
import pandas as pd
from ..constraints.base import CandidateCorrection, ClinicalConstraint
from ..models import CorrectionRecord
LOGGER = logging.getLogger(__name__)
class ValidationEngine:
"""Runs constraints and resolves correction conflicts deterministically."""
def __init__(self, constraints: list[ClinicalConstraint]) -> None:
self.constraints = constraints
def validate_and_correct(
self, df: pd.DataFrame
) -> tuple[pd.DataFrame, list[CorrectionRecord]]:
"""Apply constraints, resolve candidate conflicts, and return corrections."""
working = df.copy(deep=True)
all_candidates: list[CandidateCorrection] = []
for constraint in self.constraints:
result = constraint.apply(working)
all_candidates.extend(result.candidates)
grouped: dict[tuple[int, str], list[CandidateCorrection]] = defaultdict(list)
for candidate in all_candidates:
grouped[(candidate.row_index, candidate.column)].append(candidate)
logs: list[CorrectionRecord] = []
for (row_idx, col), candidates in grouped.items():
winner = self._select_winner(candidates)
original = working.at[row_idx, col]
if original == winner.proposed_value:
continue
working.at[row_idx, col] = winner.proposed_value
conflict_note = ""
if len(candidates) > 1:
conflict_note = " Conflict resolved by highest confidence."
logs.append(
CorrectionRecord(
row_index=row_idx,
column=col,
original_value=original,
corrected_value=winner.proposed_value,
constraint_name=winner.constraint_name,
rationale=f"{winner.rationale}{conflict_note}",
confidence=winner.confidence,
)
)
LOGGER.info("Validation complete with %d accepted corrections", len(logs))
return working, logs
@staticmethod
def _select_winner(candidates: list[CandidateCorrection]) -> CandidateCorrection:
"""Deterministically pick correction candidate by confidence then name."""
ordered = sorted(
candidates,
key=lambda c: (-c.confidence, c.constraint_name, str(c.proposed_value)),
)
return ordered[0]