Skip to content

Commit d4c784f

Browse files
authored
Merge pull request #68 from aws-samples/spy_dev
add delete same query
2 parents b788094 + da9f15e commit d4c784f

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

application/nlq/business/vector_store.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,29 @@ def get_all_agent_cot_samples(cls, profile_name):
6767
def add_sample(cls, profile_name, question, answer):
6868
logger.info(f'add sample question: {question} to profile {profile_name}')
6969
embedding = cls.create_vector_embedding_with_bedrock(question)
70+
has_same_sample = cls.search_same_query(profile_name, 1, 'uba', embedding)
71+
if has_same_sample:
72+
logger.info(f'delete sample sample entity: {question} to profile {profile_name}')
7073
if cls.opensearch_dao.add_sample('uba', profile_name, question, answer, embedding):
7174
logger.info('Sample added')
7275

7376
@classmethod
7477
def add_entity_sample(cls, profile_name, entity, comment):
7578
logger.info(f'add sample entity: {entity} to profile {profile_name}')
7679
embedding = cls.create_vector_embedding_with_bedrock(entity)
80+
has_same_sample = cls.search_same_query(profile_name, 1, 'uba_ner', embedding)
81+
if has_same_sample:
82+
logger.info(f'delete sample sample entity: {entity} to profile {profile_name}')
7783
if cls.opensearch_dao.add_entity_sample('uba_ner', profile_name, entity, comment, embedding):
7884
logger.info('Sample added')
7985

8086
@classmethod
8187
def add_agent_cot_sample(cls, profile_name, entity, comment):
82-
logger.info(f'add sample entity: {entity} to profile {profile_name}')
88+
logger.info(f'add agent sample query: {entity} to profile {profile_name}')
8389
embedding = cls.create_vector_embedding_with_bedrock(entity)
90+
has_same_sample = cls.search_same_query(profile_name, 1, 'uba_agent', embedding)
91+
if has_same_sample:
92+
logger.info(f'delete agent sample sample query: {entity} to profile {profile_name}')
8493
if cls.opensearch_dao.add_agent_cot_sample('uba_agent', profile_name, entity, comment, embedding):
8594
logger.info('Sample added')
8695

@@ -124,3 +133,29 @@ def search_sample(cls, profile_name, top_k, index_name, query):
124133
logger.info(f'search sample question: {query} {index_name} from profile {profile_name}')
125134
sample_list = cls.opensearch_dao.search_sample(profile_name, top_k, index_name, query)
126135
return sample_list
136+
137+
@classmethod
138+
def search_sample_with_embedding(cls, profile_name, top_k, index_name, query_embedding):
139+
sample_list = cls.opensearch_dao.search_sample_with_embedding(profile_name, top_k, index_name, query_embedding)
140+
return sample_list
141+
142+
@classmethod
143+
def search_same_query(cls, profile_name, top_k, index_name, embedding):
144+
search_res = cls.search_sample_with_embedding(profile_name, top_k, index_name, embedding)
145+
if len(search_res) > 0:
146+
similarity_sample = search_res[0]
147+
similarity_score = similarity_sample["_score"]
148+
similarity_id = similarity_sample['_id']
149+
if similarity_score == 1.0:
150+
if index_name == "uba":
151+
cls.delete_sample(profile_name, similarity_id)
152+
return True
153+
elif index_name == "uba_ner":
154+
cls.delete_entity_sample(profile_name, similarity_id)
155+
return True
156+
elif index_name == "uba_agent":
157+
cls.delete_agent_cot_sample(profile_name, similarity_id)
158+
return True
159+
else:
160+
return False
161+
return False

application/nlq/data_access/opensearch.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,10 @@ def delete_sample(self, index_name, profile_name, doc_id):
191191

192192
def search_sample(self, profile_name, top_k, index_name, query):
193193
records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
194+
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])
195+
196+
197+
def search_sample_with_embedding(self, profile_name, top_k, index_name, query_embedding):
194198
search_query = {
195199
"size": top_k, # Adjust the size as needed to retrieve more or fewer results
196200
"query": {
@@ -205,7 +209,7 @@ def search_sample(self, profile_name, top_k, index_name, query):
205209
"knn": {
206210
"vector_field": {
207211
# Make sure 'vector_field' is the name of your vector field in OpenSearch
208-
"vector": records_with_embedding['vector_field'],
212+
"vector": query_embedding,
209213
"k": top_k # Adjust k as needed to retrieve more or fewer nearest neighbors
210214
}
211215
}

0 commit comments

Comments
 (0)