diff --git a/src/codegate/cli.py b/src/codegate/cli.py index 86a4e6af..fdd5c4e0 100644 --- a/src/codegate/cli.py +++ b/src/codegate/cli.py @@ -11,7 +11,7 @@ from codegate.config import Config, ConfigurationError from codegate.db.connection import init_db_sync from codegate.server import init_app -from src.codegate.storage.utils import restore_storage_backup +from codegate.storage.utils import restore_storage_backup def validate_port(ctx: click.Context, param: click.Parameter, value: int) -> int: diff --git a/src/codegate/llm_utils/__init__.py b/src/codegate/llm_utils/__init__.py index b37716f8..c688370e 100644 --- a/src/codegate/llm_utils/__init__.py +++ b/src/codegate/llm_utils/__init__.py @@ -1,4 +1,4 @@ -from src.codegate.llm_utils.extractor import PackageExtractor -from src.codegate.llm_utils.llmclient import LLMClient +from codegate.llm_utils.extractor import PackageExtractor +from codegate.llm_utils.llmclient import LLMClient __all__ = ["LLMClient", "PackageExtractor"] diff --git a/src/codegate/llm_utils/extractor.py b/src/codegate/llm_utils/extractor.py index e50bdf5e..5635062a 100644 --- a/src/codegate/llm_utils/extractor.py +++ b/src/codegate/llm_utils/extractor.py @@ -4,6 +4,7 @@ from codegate.config import Config from codegate.llm_utils.llmclient import LLMClient +from codegate.storage import StorageEngine logger = structlog.get_logger("codegate") @@ -13,6 +14,9 @@ class PackageExtractor: Utility class to extract package names from code or queries. """ + def __init__(self): + self.storage_engine = StorageEngine() + @staticmethod async def extract_packages( content: str, diff --git a/src/codegate/pipeline/extract_snippets/output.py b/src/codegate/pipeline/extract_snippets/output.py index 16e742dd..bf9243d6 100644 --- a/src/codegate/pipeline/extract_snippets/output.py +++ b/src/codegate/pipeline/extract_snippets/output.py @@ -8,6 +8,7 @@ from codegate.pipeline.base import CodeSnippet, PipelineContext, PipelineSensitiveData from codegate.pipeline.extract_snippets.extract_snippets import extract_snippets from codegate.pipeline.output import OutputPipelineContext, OutputPipelineStep +from codegate.storage import StorageEngine logger = structlog.get_logger("codegate") @@ -16,7 +17,7 @@ class CodeCommentStep(OutputPipelineStep): """Pipeline step that adds comments after code blocks""" def __init__(self): - pass + self._storage_engine = StorageEngine() @property def name(self) -> str: @@ -41,8 +42,7 @@ def _create_chunk(self, original_chunk: ModelResponse, content: str) -> ModelRes object="chat.completion.chunk", ) - @staticmethod - async def _snippet_comment(snippet: CodeSnippet, secrets: PipelineSensitiveData) -> str: + async def _snippet_comment(self, snippet: CodeSnippet, secrets: PipelineSensitiveData) -> str: """Create a comment for a snippet""" snippet.libraries = await PackageExtractor.extract_packages( content=snippet.code, @@ -52,14 +52,33 @@ async def _snippet_comment(snippet: CodeSnippet, secrets: PipelineSensitiveData) base_url=secrets.api_base, ) + libobjects = await self._storage_engine.search_by_property("name", snippet.libraries) + logger.info(f"Found {len(libobjects)} libraries in the storage engine") + libraries_text = "" + warnings = [] + + # Use snippet.libraries to generate a CSV list of libraries if snippet.libraries: - libraries_text = " ".join(f"`{lib}`" for lib in snippet.libraries) - if libraries_text: - comment = f"\nThe above code snippet uses the following libraries: {libraries_text}\n" - else: - comment = "\ncodegate didn't detect any libraries in the snippet\n" - comment += "\n" + libraries_text = ", ".join([f"`{lib}`" for lib in snippet.libraries]) + + for lib in libobjects: + lib_name = lib.properties["name"] + lib_status = lib.properties["status"] + lib_url = f"https://www.insight.stacklok.com/report/{lib.properties['type']}/{lib_name}" + + warnings.append( + f"- The package `{lib_name}` is marked as **{lib_status}**.\n" + f"- More information: [{lib_url}]({lib_url})\n" + ) + + comment = "" + if libraries_text != "": + comment += f"\n\nCodegate detected the following libraries: {libraries_text}\n" + + if warnings: + comment += "\n### 🚨 Warnings\n" + "\n".join(warnings) + "\n" + return comment def _split_chunk_at_code_end(self, content: str) -> tuple[str, str]: diff --git a/src/codegate/storage/storage_engine.py b/src/codegate/storage/storage_engine.py index aa9d38b5..d4421117 100644 --- a/src/codegate/storage/storage_engine.py +++ b/src/codegate/storage/storage_engine.py @@ -1,8 +1,10 @@ +from typing import List + import structlog import weaviate import weaviate.classes as wvc from weaviate.classes.config import DataType -from weaviate.classes.query import MetadataQuery +from weaviate.classes.query import Filter, MetadataQuery from weaviate.embedded import EmbeddedOptions from codegate.config import Config @@ -88,6 +90,36 @@ def setup_schema(self, client): ) logger.info(f"Weaviate schema for class {class_config['name']} setup complete.") + async def search_by_property(self, name: str, properties: List[str]) -> list[object]: + if len(properties) == 0: + return [] + + # Perform the vector search + weaviate_client = self.get_client(self.data_path) + if weaviate_client is None: + logger.error("Could not find client, not returning results.") + return [] + + if not weaviate_client: + logger.error("Invalid client, cannot perform search.") + return [] + + try: + weaviate_client.connect() + packages = weaviate_client.collections.get("Package") + response = packages.query.fetch_objects( + filters=Filter.by_property(name).contains_any(properties), + ) + + if not response: + return [] + return response.objects + except Exception as e: + logger.error(f"An error occurred: {str(e)}") + return [] + finally: + weaviate_client.close() + async def search(self, query: str, limit=5, distance=0.3, packages=None) -> list[object]: """ Search the 'Package' collection based on a query string.