test: address AI findings in recent test changes (#1234)

mldangelo-oai · web-flow · commit 5aa48f27f116 · 2026-05-08T21:52:35.000-04:00
* test: address AI findings in recent test changes

* test: dedupe shared pickle child setup
diff --git a/packages/modelaudit-picklescan/tests/test_call_graph_import_statements.py b/packages/modelaudit-picklescan/tests/test_call_graph_import_statements.py
@@ -83,10 +83,10 @@ def test_wildcard_summary_and_analysis_share_module_parse(
     parse_calls = 0
     real_parse = call_graph.ast.parse
 
-    def tracking_parse(source: str, filename: str = "<unknown>") -> ast.Module:
+    def tracking_parse(source_code: str, filename: str = "<unknown>") -> ast.Module:
         nonlocal parse_calls
         parse_calls += 1
-        return real_parse(source, filename=filename)
+        return real_parse(source_code, filename=filename)
 
     monkeypatch.setattr(
         call_graph, "_resolve_module_source", lambda module_name: module_path if module_name == "module" else None
@@ -124,6 +124,21 @@ def _env_without_pythonpath() -> dict[str, str]:
     return {key: value for key, value in os.environ.items() if key != "PYTHONPATH"}
 
 
+def _pickle_exec_child_code(body: str) -> str:
+    return f"""
+import pickle
+import sys
+from pathlib import Path
+
+module_dir = Path(sys.argv[1])
+marker = Path(sys.argv[2])
+payload = bytes.fromhex(sys.argv[3])
+
+sys.path.insert(0, str(module_dir))
+{body}
+"""
+
+
 def test_iter_call_nodes_reuses_cached_walk(monkeypatch: pytest.MonkeyPatch) -> None:
     module = ast.parse(
         """
@@ -224,9 +239,12 @@ def counting_initial_parameter_controlled_names(
 def test_split_function_name_reuses_cached_resolution(monkeypatch: pytest.MonkeyPatch) -> None:
     analyze_calls: list[str] = []
 
-    def fake_analyze_module(module_name: str) -> object | None:
+    class _AnalyzedModule:
+        pass
+
+    def fake_analyze_module(module_name: str) -> _AnalyzedModule | None:
         analyze_calls.append(module_name)
-        return object() if module_name == "pkg.mod" else None
+        return _AnalyzedModule() if module_name == "pkg.mod" else None
 
     monkeypatch.setattr(call_graph, "_analyze_module", fake_analyze_module)
     call_graph._split_function_name.cache_clear()
@@ -1479,20 +1497,13 @@ def test_scan_bytes_ignores_uninvoked_nested_function_body_calls(
 
     assert report.verdict == SafetyVerdict.CLEAN
     assert not _has_critical_call_graph_finding(report, module_name, function_name, "subprocess.run")
-    child_code = """
-import pickle
-import sys
-from pathlib import Path
-
-module_dir = Path(sys.argv[1])
-marker = Path(sys.argv[2])
-payload = bytes.fromhex(sys.argv[3])
-
-sys.path.insert(0, str(module_dir))
+    child_code = _pickle_exec_child_code(
+        """
 pickle.loads(payload)
 if marker.exists():
     raise SystemExit("nested body unexpectedly executed")
 """
+    )
     result = _run_python_subprocess(
         [sys.executable, "-c", child_code, str(module_dir), str(marker), payload.hex()],
         cwd=tmp_path.parent,
@@ -1529,22 +1540,15 @@ def test_scan_bytes_does_not_treat_newobj_as_init_invocation(
 
     assert report.verdict == SafetyVerdict.CLEAN
     assert not _has_critical_call_graph_finding(report, module_name, "InitImports", "os.system")
-    child_code = """
-import pickle
-import sys
-from pathlib import Path
-
-module_dir = Path(sys.argv[1])
-marker = Path(sys.argv[2])
-payload = bytes.fromhex(sys.argv[3])
-
-sys.path.insert(0, str(module_dir))
+    child_code = _pickle_exec_child_code(
+        """
 result = pickle.loads(payload)
 if getattr(result, "value", None) != "safe":
     raise SystemExit(f"unexpected state: {result.__dict__!r}")
 if marker.exists():
     raise SystemExit("NEWOBJ unexpectedly executed __init__")
 """
+    )
     result = _run_python_subprocess(
         [sys.executable, "-c", child_code, str(module_dir), str(marker), payload.hex()],
         cwd=tmp_path.parent,
@@ -3642,7 +3646,7 @@ def test_scan_bytes_blocks_itertools_adapter_next_call_iterator_consumption_rce(
 
 
 @pytest.mark.parametrize(
-    ("payload", "values_literal", "expected_repr", "requires_python311"),
+    ("payload", "values_literal", "expected_repr", "requires_python_3_11_plus"),
     [
         (
             _builtins_help_call_iterator_stdlib_materializer_payload("array", "array", _unicode_operand("i"), b"h\x00"),
@@ -3859,7 +3863,7 @@ def test_scan_bytes_blocks_stdlib_eager_call_iterator_consumption_rce(
     payload: bytes,
     values_literal: str,
     expected_repr: str,
-    requires_python311: bool,
+    requires_python_3_11_plus: bool,
 ) -> None:
     module_dir = tmp_path / "modules"
     module_dir.mkdir()
@@ -3891,7 +3895,7 @@ def test_scan_bytes_blocks_stdlib_eager_call_iterator_consumption_rce(
     )
 
     assert not marker.exists()
-    if requires_python311 and sys.version_info < (3, 11):
+    if requires_python_3_11_plus and sys.version_info < (3, 11):
         return
     child_code = """
 import ast
@@ -3945,7 +3949,7 @@ def test_scan_bytes_blocks_stdlib_eager_call_iterator_consumption_rce(
 
 
 @pytest.mark.parametrize(
-    ("payload", "values_literal", "expected_repr", "requires_python311"),
+    ("payload", "values_literal", "expected_repr", "requires_python_3_11_plus"),
     [
         (
             _builtins_help_call_iterator_stdlib_materializer_payload(
@@ -3976,7 +3980,7 @@ def test_scan_bytes_blocks_weighted_statistics_call_iterator_consumption_rce(
     payload: bytes,
     values_literal: str,
     expected_repr: str,
-    requires_python311: bool,
+    requires_python_3_11_plus: bool,
 ) -> None:
     module_dir = tmp_path / "modules"
     module_dir.mkdir()
@@ -4008,7 +4012,7 @@ def test_scan_bytes_blocks_weighted_statistics_call_iterator_consumption_rce(
     )
 
     assert not marker.exists()
-    if requires_python311 and sys.version_info < (3, 11):
+    if requires_python_3_11_plus and sys.version_info < (3, 11):
         return
 
     child_code = """
diff --git a/tests/test_lazy_loading_integration.py b/tests/test_lazy_loading_integration.py
@@ -11,11 +11,15 @@
 from modelaudit import core
 from modelaudit.scanners import _registry
 
+MAX_SCANNERS_FOR_SINGLE_FILE_SCAN = 5
+MAX_SCANNERS_FOR_DIRECTORY_SCAN = 10
+MAX_SCANNERS_FOR_INCREMENTAL_SCAN = 15
+
 
 class TestCoreIntegration:
     """Test integration of lazy loading with core scanning functionality."""
 
-    def test_scan_file_uses_lazy_loading(self):
+    def test_scan_file_uses_lazy_loading(self) -> None:
         """Test that scan_file uses lazy loading correctly."""
         # Reset loaded scanners
         _registry._loaded_scanners.clear()
@@ -36,11 +40,11 @@ def test_scan_file_uses_lazy_loading(self):
 
                 # Should have loaded minimal scanners
                 loaded_count = len(_registry._loaded_scanners)
-                assert loaded_count <= 5  # Should be minimal
+                assert loaded_count <= MAX_SCANNERS_FOR_SINGLE_FILE_SCAN
             finally:
                 Path(f.name).unlink(missing_ok=True)
 
-    def test_scan_directory_uses_lazy_loading(self):
+    def test_scan_directory_uses_lazy_loading(self) -> None:
         """Test that directory scanning uses lazy loading efficiently."""
         _registry._loaded_scanners.clear()
 
@@ -61,7 +65,7 @@ def test_scan_directory_uses_lazy_loading(self):
 
             # Should have loaded only necessary scanners
             loaded_count = len(_registry._loaded_scanners)
-            assert loaded_count <= 10  # Should be reasonable
+            assert loaded_count <= MAX_SCANNERS_FOR_DIRECTORY_SCAN
 
     def test_preferred_scanner_lazy_loading(self, tmp_path: Path) -> None:
         """Test that preferred scanner detection uses lazy loading."""
@@ -79,7 +83,7 @@ def test_preferred_scanner_lazy_loading(self, tmp_path: Path) -> None:
         # Should have loaded pickle scanner
         assert "pickle" in _registry._loaded_scanners
 
-    def test_multiple_file_types_incremental_loading(self):
+    def test_multiple_file_types_incremental_loading(self) -> None:
         """Test that scanning multiple file types loads scanners incrementally."""
         _registry._loaded_scanners.clear()
 
@@ -106,13 +110,13 @@ def test_multiple_file_types_incremental_loading(self):
             # Should show incremental loading (or at least not loading everything at once)
             assert loaded_counts[0] > 0  # Some scanners loaded for first file
             # Later scans might load more, but shouldn't load everything
-            assert max(loaded_counts) <= 15  # Reasonable upper bound
+            assert max(loaded_counts) <= MAX_SCANNERS_FOR_INCREMENTAL_SCAN
 
 
 class TestPerformanceCharacteristics:
     """Test performance characteristics of lazy loading."""
 
-    def test_import_performance(self):
+    def test_import_performance(self) -> None:
         """Test that importing scanners is fast with lazy loading."""
         # This test measures import time
         start_time = time.time()
@@ -122,16 +126,18 @@ def test_import_performance(self):
 
         import_time = time.time() - start_time
 
-        # Should be much faster than 1 second (was 7+ seconds before)
+        # The historical eager-loading baseline was 7+ seconds; 1 second leaves
+        # room for local and CI variance while still catching a real regression.
         assert import_time < 1.0
 
         # Accessing the registry should also be fast
         start_time = time.time()
         _ = scanners.SCANNER_REGISTRY
         access_time = time.time() - start_time
 
-        # First access loads scanners, but should still be reasonable
-        assert access_time < 5.0  # Much better than 7+ seconds
+        # First access performs one-time lazy-loading work, so keep this looser
+        # than the import guard while still catching a return to the old 7+ second path.
+        assert access_time < 5.0
 
     def test_single_scanner_access_performance(self) -> None:
         """Test that accessing a single scanner is fast."""
diff --git a/tests/test_perf_workflow.py b/tests/test_perf_workflow.py
@@ -7,7 +7,17 @@
 
 
 def _load_perf_workflow() -> dict[str, Any]:
-    workflow_path = Path(__file__).resolve().parents[1] / ".github" / "workflows" / "perf.yml"
+    current_path = Path(__file__).resolve()
+    workflow_path = next(
+        (
+            candidate_root / ".github" / "workflows" / "perf.yml"
+            for candidate_root in [current_path.parent, *current_path.parents]
+            if (candidate_root / ".github" / "workflows" / "perf.yml").is_file()
+        ),
+        None,
+    )
+    if workflow_path is None:
+        raise AssertionError("Could not locate .github/workflows/perf.yml from test file path")
     workflow = yaml.safe_load(workflow_path.read_text(encoding="utf-8"))
     assert isinstance(workflow, dict)
     return workflow
diff --git a/tests/test_security_enhancements.py b/tests/test_security_enhancements.py
@@ -19,18 +19,21 @@
 class TestJoblibScannerSecurity:
     """Test security enhancements for Joblib scanner."""
 
-    def test_compression_bomb_detection(self, tmp_path):
+    def test_compression_bomb_detection(self, tmp_path: Path) -> None:
         """Test that compression bombs are detected."""
         # Create a compression bomb (large data that compresses well)
         bomb_data = b"A" * (10 * 1024 * 1024)  # 10MB of 'A's
         compressed = zlib.compress(bomb_data, level=9)
+        max_decompression_ratio = 50.0
+        actual_ratio = len(bomb_data) / len(compressed)
+        assert actual_ratio > max_decompression_ratio
 
         # Write to a .joblib file
         joblib_file = tmp_path / "bomb.joblib"
         joblib_file.write_bytes(compressed)
 
         # Configure scanner with low compression ratio limit
-        config = {"max_decompression_ratio": 50.0}  # Lower than actual ratio
+        config = {"max_decompression_ratio": max_decompression_ratio}  # Lower than actual ratio
         scanner = JoblibScanner(config)
 
         result = scanner.scan(str(joblib_file))
@@ -151,11 +154,11 @@ def test_zip_format_joblib(self, tmp_path):
         # Should delegate to ZIP scanner and succeed
         assert result.success is True
 
-    def test_direct_pickle_joblib(self, tmp_path):
+    def test_direct_pickle_joblib(self, tmp_path: Path) -> None:
         """Test joblib files that are direct pickle (not compressed)."""
         # Create direct pickle data with pickle magic bytes
         data = {"test": "direct_pickle"}
-        pickled = pickle.dumps(data, protocol=2)  # Protocol 2 starts with 0x80
+        pickled = pickle.dumps(data, protocol=2)  # Protocol 2 starts with 0x80 0x02
 
         joblib_file = tmp_path / "direct.joblib"
         joblib_file.write_bytes(pickled)
@@ -204,7 +207,7 @@ def test_file_read_chunk_limit(self, tmp_path):
 class TestNumPyScannerSecurity:
     """Test security enhancements for NumPy scanner."""
 
-    def test_negative_dimension_rejection(self, tmp_path):
+    def test_negative_dimension_rejection(self, tmp_path: Path) -> None:
         """Test rejection of arrays with negative dimensions."""
         # We'll need to create a malformed numpy file manually
         # since numpy.save() won't create invalid files
@@ -219,8 +222,8 @@ def test_negative_dimension_rejection(self, tmp_path):
             header_len = len(header)
             f.write(header_len.to_bytes(2, "little"))
             f.write(header.encode("latin1"))
-            # Add some dummy data
-            f.write(b"\x00" * 1600)  # 20 * 8 bytes per float64
+            # The scanner should reject the invalid header before reading a full payload.
+            f.write(b"\x00")
 
         scanner = NumPyScanner()
         result = scanner.scan(str(npy_file))
@@ -273,7 +276,13 @@ def test_dimension_size_limit(self, tmp_path):
         assert len(size_issues) > 0
 
     def test_dangerous_dtype_reports_cve_info(self, tmp_path: Path) -> None:
-        """Object dtype arrays should scan successfully while emitting CVE-2019-6446 info."""
+        """Object dtype arrays should emit informational CVE-2019-6446 context.
+
+        CVE-2019-6446 concerns unsafe loading of NumPy object arrays when pickle
+        deserialization is allowed. Object dtypes can embed pickled Python objects,
+        so the scanner should surface that context while still allowing a clean file
+        to scan successfully.
+        """
         scanner = NumPyScanner()
         npy_file = tmp_path / "object_dtype.npy"
         np.save(npy_file, np.array([{"key": "value"}], dtype=object), allow_pickle=True)
@@ -363,23 +372,15 @@ def test_valid_numpy_array_still_works(self, tmp_path):
         assert "shape" in result.metadata
         assert "dtype" in result.metadata
 
-    def test_numpy_version_2_format(self, tmp_path):
+    def test_numpy_version_2_format(self, tmp_path: Path) -> None:
         """Test NumPy format version 2.0 handling."""
-        # Create array that will use version 2.0 format
-        # Use a very long array description to trigger v2.0 format
-
-        # Create a large 4D array that should trigger version 2.0
-        # due to large header size, not structured dtype
-        arr = np.zeros((100, 50, 20, 10), dtype=np.float64)
-
+        arr = np.zeros((2, 2), dtype=np.float64)
         npy_file = tmp_path / "version2.npy"
-        np.save(npy_file, arr)
+        with npy_file.open("wb") as file_obj:
+            np.lib.format.write_array(file_obj, arr, version=(2, 0))
+        assert npy_file.read_bytes()[6:8] == b"\x02\x00"
 
-        # Allow structured arrays for this test
-        config = {
-            "max_array_bytes": 10 * 1024 * 1024 * 1024,
-        }  # 10GB limit to allow large test array
-        scanner = NumPyScanner(config)
+        scanner = NumPyScanner()
         result = scanner.scan(str(npy_file))
 
         # Should succeed
diff --git a/tests/utils/file/test_advanced_file_handler.py b/tests/utils/file/test_advanced_file_handler.py
@@ -29,10 +29,10 @@ def scan(self, shard_path: str) -> ScanResult:
         return result
 
 
-class FailingShardScanner:
+class OperationalFailureScanner:
     """Scanner that simulates an operational shard scan failure."""
 
-    name = "failing_shard_scanner"
+    name = "operational_failure_scanner"
 
     def scan(self, shard_path: str) -> ScanResult:
         raise RuntimeError(f"cannot scan {Path(shard_path).name}")
@@ -297,6 +297,11 @@ def test_massive_file_without_bounded_support_fails_closed(
         class ScannerWithoutBoundedSupport:
             name = "test_scanner"
 
+            def scan(self, _file_path: str) -> ScanResult:
+                result = ScanResult(scanner_name=self.name)
+                result.finish(success=True)
+                return result
+
         handler = AdvancedFileHandler(str(model_path), ScannerWithoutBoundedSupport())
         result = handler.scan()
 
@@ -356,7 +361,7 @@ def test_parallel_shard_errors_mark_scan_inconclusive(self, tmp_path: Path) -> N
                 "total_shards": 1,
                 "total_size": shard_path.stat().st_size,
             },
-            FailingShardScanner,
+            OperationalFailureScanner,
         )
 
         result = handler.scan_shards()