13
13
14
14
15
15
def setup_schema (client ):
16
- if client .collections .exists ("Package" ):
17
- client .collections .delete ("Package" )
18
- client .collections .create (
19
- "Package" ,
20
- properties = [
21
- Property (name = "name" , data_type = DataType .TEXT ),
22
- Property (name = "type" , data_type = DataType .TEXT ),
23
- Property (name = "status" , data_type = DataType .TEXT ),
24
- Property (name = "description" , data_type = DataType .TEXT ),
25
- ]
26
- )
16
+ if not client .collections .exists ("Package" ):
17
+ client .collections .create (
18
+ "Package" ,
19
+ properties = [
20
+ Property (name = "name" , data_type = DataType .TEXT ),
21
+ Property (name = "type" , data_type = DataType .TEXT ),
22
+ Property (name = "status" , data_type = DataType .TEXT ),
23
+ Property (name = "description" , data_type = DataType .TEXT ),
24
+ ]
25
+ )
27
26
28
27
29
28
def generate_vector_string (package ):
@@ -59,6 +58,17 @@ def generate_vector_string(package):
59
58
def add_data (client ):
60
59
collection = client .collections .get ("Package" )
61
60
61
+ # read all the data from db, we will only add if there is no data, or is different
62
+ existing_packages = list (collection .iterator ())
63
+ packages_dict = {}
64
+ for package in existing_packages :
65
+ key = package .properties ['name' ]+ "/" + package .properties ['type' ]
66
+ value = {
67
+ 'status' : package .properties ['status' ],
68
+ 'description' : package .properties ['description' ],
69
+ }
70
+ packages_dict [key ] = value
71
+
62
72
for json_file in json_files :
63
73
with open (json_file , 'r' ) as f :
64
74
print ("Adding data from" , json_file )
@@ -76,7 +86,15 @@ def add_data(client):
76
86
else :
77
87
package ['status' ] = 'unknown'
78
88
89
+ # check for the existing package and only add if different
90
+ key = package ['name' ]+ "/" + package ['type' ]
91
+ if key in packages_dict :
92
+ if packages_dict [key ]['status' ] == package ['status' ] and packages_dict [key ]['description' ] == package ['description' ]:
93
+ print ("Package already exists" , key )
94
+ continue
95
+
79
96
# prepare the object for embedding
97
+ print ("Generating data for" , key )
80
98
vector_str = generate_vector_string (package )
81
99
vector = generate_embeddings (vector_str )
82
100
0 commit comments