Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 88db14c

Browse files
committed
changes from review
1 parent 002d470 commit 88db14c

File tree

4 files changed

+39
-14
lines changed

4 files changed

+39
-14
lines changed

.github/workflows/import_packages.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@ jobs:
77
# This workflow contains a single job called "greet"
88
sync_db:
99
# The type of runner that the job will run on
10-
runs-on: ubuntu-latest
10+
runs-on: [codegate-pipeline]
1111

1212
# Steps represent a sequence of tasks that will be executed as part of the job
1313
steps:
1414
- uses: actions/checkout@v3
15+
- uses: actions/setup-python@v5
16+
with:
17+
python-version: '3.12'
1518
- name: Install dependencies
1619
run: |
1720
python -m pip install --upgrade pip

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = "Generative AI CodeGen security gateway"
55
readme = "README.md"
66
authors = []
77
packages = [{include = "codegate", from = "src"}]
8-
requires-python = ">=3.10"
8+
requires-python = ">=3.11"
99

1010
[tool.poetry.dependencies]
1111
python = ">=3.11"

requirements.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

scripts/import_packages.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,45 @@ def setup_schema(client):
2626
)
2727

2828

29+
def generate_vector_string(package):
30+
vector_str = f"{package['name']}"
31+
# add description
32+
package_url = ""
33+
if package["type"] == "pypi":
34+
vector_str += " is a Python package available on PyPI"
35+
package_url = f"https://trustypkg.dev/pypi/{package['name']}"
36+
elif package["type"] == "npm":
37+
vector_str += " is a JavaScript package available on NPM"
38+
package_url = f"https://trustypkg.dev/npm/{package['name']}"
39+
elif package["type"] == "go":
40+
vector_str += " is a Go package. "
41+
package_url = f"https://trustypkg.dev/go/{package['name']}"
42+
elif package["type"] == "crates":
43+
vector_str += " is a Rust package available on Crates. "
44+
package_url = f"https://trustypkg.dev/crates/{package['name']}"
45+
elif package["type"] == "java":
46+
vector_str += " is a Java package. "
47+
package_url = f"https://trustypkg.dev/java/{package['name']}"
48+
49+
# add extra status
50+
if package["status"] == "archived":
51+
vector_str += f". However, this package is found to be archived and no longer maintained. For additional information refer to {package_url}"
52+
elif package["status"] == "deprecated":
53+
vector_str += f". However, this package is found to be deprecated and no longer recommended for use. For additional information refer to {package_url}"
54+
elif package["status"] == "malicious":
55+
vector_str += f". However, this package is found to be malicious. For additional information refer to {package_url}"
56+
return vector_str
57+
58+
2959
def add_data(client):
3060
collection = client.collections.get("Package")
3161

3262
for json_file in json_files:
3363
with open(json_file, 'r') as f:
3464
print("Adding data from", json_file)
35-
counter = 0
3665
with collection.batch.dynamic() as batch:
3766
for line in f:
3867
package = json.loads(line)
39-
counter += 1
40-
if counter > 100:
41-
break
42-
43-
# prepare the object for embedding
44-
vector_str = f"{package['name']} {package['description']}"
45-
vector = generate_embeddings(vector_str)
4668

4769
# now add the status column
4870
if 'archived' in json_file:
@@ -54,6 +76,10 @@ def add_data(client):
5476
else:
5577
package['status'] = 'unknown'
5678

79+
# prepare the object for embedding
80+
vector_str = generate_vector_string(package)
81+
vector = generate_embeddings(vector_str)
82+
5783
batch.add_object(properties=package, vector=vector)
5884

5985

0 commit comments

Comments
 (0)