Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 9e3c20d

Browse files
committed
reuse packages if they exist
1 parent 88db14c commit 9e3c20d

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

.github/workflows/import_packages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
# This workflow contains a single job called "greet"
88
sync_db:
99
# The type of runner that the job will run on
10-
runs-on: [codegate-pipeline]
10+
runs-on: ubuntu-latest
1111

1212
# Steps represent a sequence of tasks that will be executed as part of the job
1313
steps:

scripts/import_packages.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,16 @@
1313

1414

1515
def setup_schema(client):
16-
if client.collections.exists("Package"):
17-
client.collections.delete("Package")
18-
client.collections.create(
19-
"Package",
20-
properties=[
21-
Property(name="name", data_type=DataType.TEXT),
22-
Property(name="type", data_type=DataType.TEXT),
23-
Property(name="status", data_type=DataType.TEXT),
24-
Property(name="description", data_type=DataType.TEXT),
25-
]
26-
)
16+
if not client.collections.exists("Package"):
17+
client.collections.create(
18+
"Package",
19+
properties=[
20+
Property(name="name", data_type=DataType.TEXT),
21+
Property(name="type", data_type=DataType.TEXT),
22+
Property(name="status", data_type=DataType.TEXT),
23+
Property(name="description", data_type=DataType.TEXT),
24+
]
25+
)
2726

2827

2928
def generate_vector_string(package):
@@ -59,6 +58,17 @@ def generate_vector_string(package):
5958
def add_data(client):
6059
collection = client.collections.get("Package")
6160

61+
# read all the data from db, we will only add if there is no data, or is different
62+
existing_packages = list(collection.iterator())
63+
packages_dict = {}
64+
for package in existing_packages:
65+
key = package.properties['name']+"/"+package.properties['type']
66+
value = {
67+
'status': package.properties['status'],
68+
'description': package.properties['description'],
69+
}
70+
packages_dict[key] = value
71+
6272
for json_file in json_files:
6373
with open(json_file, 'r') as f:
6474
print("Adding data from", json_file)
@@ -76,7 +86,15 @@ def add_data(client):
7686
else:
7787
package['status'] = 'unknown'
7888

89+
# check for the existing package and only add if different
90+
key = package['name']+"/"+package['type']
91+
if key in packages_dict:
92+
if packages_dict[key]['status'] == package['status'] and packages_dict[key]['description'] == package['description']:
93+
print("Package already exists", key)
94+
continue
95+
7996
# prepare the object for embedding
97+
print("Generating data for", key)
8098
vector_str = generate_vector_string(package)
8199
vector = generate_embeddings(vector_str)
82100

0 commit comments

Comments
 (0)