Skip to content

Commit f8de187

Browse files
authored
Merge pull request #70 from aws-samples/spy_dev
add log store
2 parents b8eb86f + 7522c01 commit f8de187

File tree

6 files changed

+96
-19
lines changed

6 files changed

+96
-19
lines changed

application/api/schemas.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,7 @@ class SQLSearchResult(BaseModel):
6767

6868
class TaskSQLSearchResult(BaseModel):
6969
sub_task_query: str
70-
sql: str
71-
sql_data: list[Any]
72-
data_show_type: str
73-
sql_gen_process: str
74-
data_analyse: str
70+
sql_search_result: SQLSearchResult
7571

7672

7773
class KnowledgeSearchResult(BaseModel):

application/api/service.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from nlq.business.nlq_chain import NLQChain
99
from nlq.business.profile import ProfileManagement
1010
from nlq.business.vector_store import VectorStore
11+
from nlq.business.log_store import LogManagement
1112
from utils.apis import get_sql_result_tool
1213
from utils.database import get_db_url_dialect
1314
from nlq.business.suggested_question import SuggestedQuestionManagement as sqm
@@ -16,6 +17,7 @@
1617
generate_suggested_question, data_visualization
1718
from utils.opensearch import get_retrieve_opensearch
1819
from utils.text_search import normal_text_search, agent_text_search
20+
from utils.tool import generate_log_id, get_current_time
1921
from .schemas import Question, Answer, Example, Option, SQLSearchResult, AgentSearchResult, KnowledgeSearchResult, \
2022
TaskSQLSearchResult
2123
from .exception_handler import BizException
@@ -179,6 +181,10 @@ def ask(question: Question) -> Answer:
179181

180182
filter_deep_dive_sql_result = []
181183

184+
log_id = generate_log_id()
185+
current_time = get_current_time()
186+
log_info = ""
187+
182188
all_profiles = ProfileManagement.get_all_profiles_with_info()
183189
database_profile = all_profiles[selected_profile]
184190

@@ -188,7 +194,7 @@ def ask(question: Question) -> Answer:
188194
sql_gen_process="",
189195
data_analyse="")
190196

191-
agent_search_response = AgentSearchResult(agent_summary="", agent_sql_search_result=[], sub_search_task=[])
197+
agent_search_response = AgentSearchResult(agent_summary="", agent_sql_search_result=[])
192198

193199
knowledge_search_result = KnowledgeSearchResult(knowledge_response="")
194200

@@ -232,6 +238,8 @@ def ask(question: Question) -> Answer:
232238
answer = Answer(query=search_box, query_intent="reject_search", knowledge_search_result=knowledge_search_result,
233239
sql_search_result=sql_search_result, agent_search_result=agent_search_response,
234240
suggested_question=[])
241+
LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql="", query=search_box,
242+
intent="reject_search", log_info="", time_str=current_time)
235243
return answer
236244
elif search_intent_flag:
237245
normal_search_result = normal_text_search(search_box, model_type,
@@ -246,6 +254,10 @@ def ask(question: Question) -> Answer:
246254
knowledge_search_result=knowledge_search_result,
247255
sql_search_result=sql_search_result, agent_search_result=agent_search_response,
248256
suggested_question=[])
257+
258+
LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql="", query=search_box,
259+
intent="knowledge_search", log_info=knowledge_search_result.knowledge_response,
260+
time_str=current_time)
249261
return answer
250262

251263
else:
@@ -298,6 +310,12 @@ def ask(question: Question) -> Answer:
298310
# sql_search_result.sql_data = [list(search_intent_result["data"].columns)] + search_intent_result[
299311
# "data"].values.tolist()
300312

313+
log_info = search_intent_result["error_info"] + ";" + sql_search_result.data_analyse
314+
LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=sql_search_result.sql, query=search_box,
315+
intent="normal_search",
316+
log_info=log_info,
317+
time_str=current_time)
318+
301319
answer = Answer(query=search_box, query_intent="normal_search", knowledge_search_result=knowledge_search_result,
302320
sql_search_result=sql_search_result, agent_search_result=agent_search_response,
303321
suggested_question=generate_suggested_question_list)
@@ -311,13 +329,30 @@ def ask(question: Question) -> Answer:
311329
orient='records')
312330
filter_deep_dive_sql_result.append(agent_search_result[i])
313331
each_task_sql_res = [list(each_task_res["data"].columns)] + each_task_res["data"].values.tolist()
332+
333+
model_select_type, show_select_data = data_visualization(model_type, agent_search_result[i]["query"],
334+
each_task_res["data"],
335+
database_profile['prompt_map'])
336+
337+
each_task_sql_response = agent_search_result[i]["response"]
338+
sub_task_sql_result = SQLSearchResult(sql_data=show_select_data, sql=each_task_res["sql"], data_show_type=model_select_type,
339+
sql_gen_process=each_task_sql_response,
340+
data_analyse="")
341+
314342
each_task_sql_search_result = TaskSQLSearchResult(sub_task_query=agent_search_result[i]["query"],
315-
sql_data=each_task_sql_res,
316-
sql=each_task_res["sql"], data_show_type="table",
317-
sql_gen_process="",
318-
data_analyse="")
343+
sql_search_result=sub_task_sql_result)
319344
agent_sql_search_result.append(each_task_sql_search_result)
345+
320346
sub_search_task.append(agent_search_result[i]["query"])
347+
log_info = ""
348+
else:
349+
log_info = agent_search_result[i]["query"] + "The SQL error Info: " + each_task_res["error_info"] + "。"
350+
log_id = generate_log_id()
351+
LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=each_task_res["sql"],
352+
query=search_box + "; The sub task is " + agent_search_result[i]["query"],
353+
intent="agent_search",
354+
log_info=log_info,
355+
time_str=current_time)
321356
agent_data_analyse_result = data_analyse_tool(model_type, prompt_map, search_box,
322357
json.dumps(filter_deep_dive_sql_result, ensure_ascii=False),
323358
"agent")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import logging
2+
3+
from nlq.data_access.dynamo_query_log import DynamoQueryLogDao
4+
5+
logger = logging.getLogger(__name__)
6+
7+
8+
class LogManagement:
9+
query_log_dao = DynamoQueryLogDao()
10+
11+
@classmethod
12+
def add_log_to_database(cls, log_id, profile_name, sql, query, intent, log_info, time_str):
13+
cls.query_log_dao.add_log(log_id, profile_name, sql, query, intent, log_info, time_str)

application/nlq/data_access/dynamo_query_log.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
DYNAMODB_AWS_REGION = os.environ.get('DYNAMODB_AWS_REGION')
1212

1313

14-
class DynamoQueryLog:
14+
class DynamoQueryLogEntity:
1515
def __init__(self, log_id, profile_name, sql, query, intent, log_info, time_str):
1616
self.log_id = log_id
1717
self.profile_name = profile_name
@@ -101,7 +101,14 @@ def create_table(self):
101101
raise
102102

103103
def add(self, entity):
104-
self.table.put_item(Item=entity.to_dict())
104+
try:
105+
self.table.put_item(Item=entity.to_dict())
106+
except Exception as e:
107+
logger.error("add log entity is error {}",e)
105108

106109
def update(self, entity):
107110
self.table.put_item(Item=entity.to_dict())
111+
112+
def add_log(self, log_id, profile_name, sql, query, intent, log_info, time_str):
113+
entity = DynamoQueryLogEntity(log_id, profile_name, sql, query, intent, log_info, time_str)
114+
self.add(entity)

application/utils/constant.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,5 @@
22
PROFILE_QUESTION_TABLE_NAME = 'NlqSuggestedQuestion'
33
DEFAULT_PROMPT_NAME = 'suggested_question_prompt_default'
44
ACTIVE_PROMPT_NAME = 'suggested_question_prompt_active'
5-
BEDROCK_MODEL_IDS = ['anthropic.claude-3-sonnet-20240229-v1:0', 'anthropic.claude-3-opus-20240229-v1:0',
6-
'anthropic.claude-3-haiku-20240307-v1:0', 'mistral.mixtral-8x7b-instruct-v0:1',
7-
'meta.llama3-70b-instruct-v1:0']
5+
BEDROCK_MODEL_IDS = ['anthropic.claude-3-sonnet-20240229-v1:0', 'anthropic.claude-3-haiku-20240307-v1:0',
6+
'mistral.mixtral-8x7b-instruct-v0:1', 'meta.llama3-70b-instruct-v1:0']

application/utils/tool.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,44 @@
11
import logging
2+
import time
3+
import random
4+
from datetime import datetime
25

36
logger = logging.getLogger(__name__)
47
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
58

69

7-
8-
9-
1010
def get_generated_sql(generated_sql_response):
1111
sql = ""
1212
try:
1313
return generated_sql_response.split("<sql>")[1].split("</sql>")[0]
1414
except IndexError:
1515
logger.error("No SQL found in the LLM's response")
1616
logger.error(generated_sql_response)
17-
return sql
17+
return sql
18+
19+
20+
def generate_log_id():
21+
# 获取当前时间戳,精确到微秒
22+
timestamp = int(time.time() * 1000000)
23+
# 添加随机数以增加唯一性
24+
random_part = random.randint(0, 9999)
25+
# 拼接时间戳和随机数生成logID
26+
log_id = f"{timestamp}{random_part:04d}"
27+
return log_id
28+
29+
30+
def get_current_time():
31+
# 获取当前时间
32+
now = datetime.now()
33+
# 格式化时间,包括毫秒部分
34+
# 注意:strftime默认不直接支持毫秒,需要单独处理
35+
formatted_time = now.strftime('%Y-%m-%d %H:%M:%S')
36+
return formatted_time
37+
38+
39+
def get_generated_sql_explain(generated_sql_response):
40+
index = generated_sql_response.find("</sql>")
41+
if index != -1:
42+
return generated_sql_response[index + len("</sql>"):]
43+
else:
44+
return generated_sql_response

0 commit comments

Comments
 (0)