|
| 1 | +import asyncio |
| 2 | +import datetime |
| 3 | +import uuid |
| 4 | +from pathlib import Path |
| 5 | +from typing import Optional |
| 6 | + |
| 7 | +import structlog |
| 8 | +from litellm import ChatCompletionRequest |
| 9 | +from sqlalchemy import create_engine, text |
| 10 | +from sqlalchemy.ext.asyncio import create_async_engine |
| 11 | + |
| 12 | +from codegate.db.models import Prompt |
| 13 | + |
| 14 | +logger = structlog.get_logger("codegate") |
| 15 | + |
| 16 | + |
| 17 | +class DbRecorder: |
| 18 | + |
| 19 | + def __init__(self, sqlite_path: Optional[str] = None): |
| 20 | + # Initialize SQLite database engine with proper async URL |
| 21 | + if not sqlite_path: |
| 22 | + current_dir = Path(__file__).parent |
| 23 | + self._db_path = (current_dir.parent.parent.parent / "codegate.db").absolute() |
| 24 | + else: |
| 25 | + self._db_path = Path(sqlite_path).absolute() |
| 26 | + |
| 27 | + logger.debug(f"Initializing DB from path: {self._db_path}") |
| 28 | + engine_dict = { |
| 29 | + "url": f"sqlite+aiosqlite:///{self._db_path}", |
| 30 | + "echo": True, # Set to False in production |
| 31 | + "isolation_level": "AUTOCOMMIT", # Required for SQLite |
| 32 | + } |
| 33 | + self._async_db_engine = create_async_engine(**engine_dict) |
| 34 | + self._db_engine = create_engine(**engine_dict) |
| 35 | + |
| 36 | + if not self.does_db_exist(): |
| 37 | + logger.info(f"Database does not exist at {self._db_path}. Creating..") |
| 38 | + asyncio.run(self.init_db()) |
| 39 | + |
| 40 | + def does_db_exist(self): |
| 41 | + return self._db_path.is_file() |
| 42 | + |
| 43 | + async def init_db(self): |
| 44 | + """Initialize the database with the schema.""" |
| 45 | + if self.does_db_exist(): |
| 46 | + logger.info("Database already exists. Skipping initialization.") |
| 47 | + return |
| 48 | + |
| 49 | + # Get the absolute path to the schema file |
| 50 | + current_dir = Path(__file__).parent |
| 51 | + schema_path = current_dir.parent.parent.parent / "sql" / "schema" / "schema.sql" |
| 52 | + |
| 53 | + if not schema_path.exists(): |
| 54 | + raise FileNotFoundError(f"Schema file not found at {schema_path}") |
| 55 | + |
| 56 | + # Read the schema |
| 57 | + with open(schema_path, "r") as f: |
| 58 | + schema = f.read() |
| 59 | + |
| 60 | + try: |
| 61 | + # Execute the schema |
| 62 | + async with self._async_db_engine.begin() as conn: |
| 63 | + # Split the schema into individual statements and execute each one |
| 64 | + statements = [stmt.strip() for stmt in schema.split(";") if stmt.strip()] |
| 65 | + for statement in statements: |
| 66 | + # Use SQLAlchemy text() to create executable SQL statements |
| 67 | + await conn.execute(text(statement)) |
| 68 | + finally: |
| 69 | + await self._async_db_engine.dispose() |
| 70 | + |
| 71 | + async def record_request( |
| 72 | + self, |
| 73 | + normalized_request: ChatCompletionRequest, |
| 74 | + is_fim_request: bool, |
| 75 | + provider_str: str |
| 76 | + ) -> Optional[Prompt]: |
| 77 | + # Extract system prompt and user prompt from the messages |
| 78 | + messages = normalized_request.get("messages", []) |
| 79 | + system_prompt = [] |
| 80 | + user_prompt = [] |
| 81 | + |
| 82 | + for msg in messages: |
| 83 | + if msg.get("role") == "system": |
| 84 | + system_prompt.append(msg.get("content")) |
| 85 | + elif msg.get("role") == "user": |
| 86 | + user_prompt.append(msg.get("content")) |
| 87 | + |
| 88 | + # If no user prompt found in messages, try to get from the prompt field |
| 89 | + # (for non-chat completions) |
| 90 | + if not user_prompt: |
| 91 | + prompt = normalized_request.get("prompt") |
| 92 | + if prompt: |
| 93 | + user_prompt.append(prompt) |
| 94 | + |
| 95 | + if not user_prompt: |
| 96 | + logger.warning("No user prompt found in request.") |
| 97 | + return None |
| 98 | + |
| 99 | + # Create a new prompt record |
| 100 | + prompt_params = Prompt( |
| 101 | + id=str(uuid.uuid4()), # Generate a new UUID for the prompt |
| 102 | + timestamp=datetime.datetime.now(datetime.timezone.utc), |
| 103 | + provider=provider_str, |
| 104 | + type="fim" if is_fim_request else "chat", |
| 105 | + user_prompt="<|>".join(user_prompt), |
| 106 | + system_prompt="<|>".join(system_prompt), |
| 107 | + ) |
| 108 | + # There is a `create_prompt` method in queries.py automatically generated by sqlc |
| 109 | + # However, the method is is buggy and doesn't work as expected. |
| 110 | + # Manually writing the SQL query to insert the prompt record. |
| 111 | + async with self._async_db_engine.begin() as conn: |
| 112 | + sql = text( |
| 113 | + """ |
| 114 | + INSERT INTO prompts (id, timestamp, provider, system_prompt, user_prompt, type) |
| 115 | + VALUES (:id, :timestamp, :provider, :system_prompt, :user_prompt, :type) |
| 116 | + RETURNING * |
| 117 | + """ |
| 118 | + ) |
| 119 | + result = await conn.execute(sql, prompt_params.model_dump()) |
| 120 | + row = result.first() |
| 121 | + if row is None: |
| 122 | + return None |
| 123 | + |
| 124 | + return Prompt( |
| 125 | + id=row.id, |
| 126 | + timestamp=row.timestamp, |
| 127 | + provider=row.provider, |
| 128 | + system_prompt=row.system_prompt, |
| 129 | + user_prompt=row.user_prompt, |
| 130 | + type=row.type |
| 131 | + ) |
| 132 | + |
| 133 | + |
| 134 | +def init_db_sync(): |
| 135 | + """DB will be initialized in the constructor in case it doesn't exist.""" |
| 136 | + DbRecorder() |
| 137 | + |
| 138 | + |
| 139 | +if __name__ == "__main__": |
| 140 | + init_db_sync() |
0 commit comments