@@ -20,6 +20,7 @@ def __init__(self, jsonl_dir="data", vec_db_path="./sqlite_data/vectordb.db"):
20
20
os .path .join (jsonl_dir , "archived.jsonl" ),
21
21
os .path .join (jsonl_dir , "deprecated.jsonl" ),
22
22
os .path .join (jsonl_dir , "malicious.jsonl" ),
23
+ os .path .join (jsonl_dir , "vulnerable.jsonl" ),
23
24
]
24
25
self .conn = self ._get_connection ()
25
26
Config .load () # Load the configuration
@@ -48,13 +49,41 @@ def setup_schema(self):
48
49
"""
49
50
)
50
51
52
+ # table for packages that has at least one vulnerability high or critical
53
+ cursor .execute (
54
+ """
55
+ CREATE TABLE cve_packages (
56
+ name TEXT NOT NULL,
57
+ version TEXT NOT NULL,
58
+ type TEXT NOT NULL
59
+ )
60
+ """
61
+ )
62
+
51
63
# Create indexes for faster querying
52
64
cursor .execute ("CREATE INDEX IF NOT EXISTS idx_name ON packages(name)" )
53
65
cursor .execute ("CREATE INDEX IF NOT EXISTS idx_type ON packages(type)" )
54
66
cursor .execute ("CREATE INDEX IF NOT EXISTS idx_status ON packages(status)" )
67
+ cursor .execute ("CREATE INDEX IF NOT EXISTS idx_pkg_cve_name ON cve_packages(name)" )
68
+ cursor .execute ("CREATE INDEX IF NOT EXISTS idx_pkg_cve_type ON cve_packages(type)" )
69
+ cursor .execute ("CREATE INDEX IF NOT EXISTS idx_pkg_cve_version ON cve_packages(version)" )
55
70
56
71
self .conn .commit ()
57
72
73
+ async def process_cve_packages (self , package ):
74
+ cursor = self .conn .cursor ()
75
+ cursor .execute (
76
+ """
77
+ INSERT INTO cve_packages (name, version, type) VALUES (?, ?, ?)
78
+ """ ,
79
+ (
80
+ package ["name" ],
81
+ package ["version" ],
82
+ package ["type" ],
83
+ ),
84
+ )
85
+ self .conn .commit ()
86
+
58
87
async def process_package (self , package ):
59
88
vector_str = generate_vector_string (package )
60
89
vector = await self .inference_engine .embed (
@@ -101,14 +130,19 @@ async def add_data(self):
101
130
package ["status" ] = json_file .split ("/" )[- 1 ].split ("." )[0 ]
102
131
key = f"{ package ['name' ]} /{ package ['type' ]} "
103
132
104
- if key in existing_packages and existing_packages [key ] == {
105
- "status" : package ["status" ],
106
- "description" : package ["description" ],
107
- }:
108
- print ("Package already exists" , key )
109
- continue
110
-
111
- await self .process_package (package )
133
+ if package ["status" ] == "vulnerable" :
134
+ # Process vulnerable packages using the cve flow
135
+ await self .process_cve_packages (package )
136
+ else :
137
+ # For non-vulnerable packages, check for duplicates and process normally
138
+ if key in existing_packages and existing_packages [key ] == {
139
+ "status" : package ["status" ],
140
+ "description" : package ["description" ],
141
+ }:
142
+ print ("Package already exists" , key )
143
+ continue
144
+
145
+ await self .process_package (package )
112
146
113
147
async def run_import (self ):
114
148
self .setup_schema ()
0 commit comments