@@ -94,6 +94,7 @@ class EnrichmentContext:
9494 input_source_type : Optional [str ] = None # 'ENTITY', 'URL', or 'TEXT_FROM_COLUMN' (where the input comes from)
9595 input_data : Optional [str ] = None # The actual data to use (e.g., "plaid.com" or text from another cell)
9696 plan : Optional [dict ] = None # Output of the Planner node: structured plan for what to do next
97+ custom_prompt : Optional [str ] = None # Custom prompt/instruction for AI agent
9798
9899 # Existing/legacy fields for enrichment
99100 search_result : Optional [dict ] = None # Results from Tavily search or other sources
@@ -108,23 +109,81 @@ def __init__(self, tavily_client, llm_provider: LLMProvider):
108109 async def search_tavily (self , state : EnrichmentContext ):
109110 """Run Tavily search in a separate thread"""
110111 try :
111- # Use a custom search_query from the plan if available, otherwise fall back to the default pattern
112+ # DEBUG: Log all the state information
113+ logger .info (f"DEBUG search_tavily - column_name: { state .column_name } " )
114+ logger .info (f"DEBUG search_tavily - target_value: { state .target_value } " )
115+ logger .info (f"DEBUG search_tavily - input_source_type: { state .input_source_type } " )
116+ logger .info (f"DEBUG search_tavily - input_data: { state .input_data } " )
117+ logger .info (f"DEBUG search_tavily - custom_prompt: { state .custom_prompt } " )
118+ logger .info (f"DEBUG search_tavily - plan: { state .plan } " )
119+
120+ # Use a custom search_query from the plan if available, otherwise fall back to improved query construction
112121 query = None
113122 if state .plan and isinstance (state .plan , dict ):
114123 query = state .plan .get ("search_query" )
115124 if not query :
116- query = f"{ state .column_name } of { state .target_value } ?"
117- logger .info (f"Searching Tavily with query: { query } " )
125+ # Get current year for time-sensitive searches
126+ import datetime
127+ current_year = datetime .datetime .now ().year
128+
129+ # Normalize company names for better search results
130+ def normalize_company_name (company_name ):
131+ """Normalize company names for better search results"""
132+ if not company_name :
133+ return company_name
134+
135+ # Convert to lowercase for comparison
136+ name_lower = company_name .lower ().strip ()
137+
138+ # Handle common variations
139+ if name_lower in ['linked in' , 'linkedin inc' , 'linkedin corp' ]:
140+ return 'LinkedIn'
141+ elif name_lower in ['amazon inc' , 'amazon.com' , 'amazon com' ]:
142+ return 'Amazon'
143+ elif name_lower in ['microsoft corp' , 'microsoft inc' , 'msft' ]:
144+ return 'Microsoft'
145+ elif name_lower in ['google inc' , 'alphabet inc' , 'alphabet' ]:
146+ return 'Google'
147+ elif name_lower in ['apple inc' , 'apple computer' ]:
148+ return 'Apple'
149+ elif name_lower in ['meta' , 'facebook inc' , 'meta platforms' ]:
150+ return 'Meta'
151+
152+ # Return original with proper capitalization
153+ return company_name .strip ()
154+
155+ # Improved fallback query construction using custom prompt and input data
156+ if state .custom_prompt and state .input_data :
157+ # Use custom prompt with input data for AI agent queries
158+ normalized_company = normalize_company_name (state .input_data )
159+ query = f"{ state .custom_prompt } { normalized_company } current { current_year } "
160+ elif state .custom_prompt :
161+ # Use custom prompt with target value
162+ normalized_company = normalize_company_name (state .target_value )
163+ query = f"{ state .custom_prompt } { normalized_company } current { current_year } "
164+ else :
165+ # Default fallback with current year for time-sensitive information
166+ normalized_company = normalize_company_name (state .target_value )
167+ query = f"current { state .column_name } of { normalized_company } { current_year } "
168+ logger .info (f"FINAL SEARCH QUERY: { query } " )
169+ print (f"🔍 FINAL SEARCH QUERY: { query } " )
170+ print (f"🎯 Query components - custom_prompt: { state .custom_prompt } , input_data: { state .input_data } , target_value: { state .target_value } " )
118171 # Run the Tavily search in a thread to avoid blocking
119172 result = await asyncio .to_thread (
120173 lambda : self .tavily .search (
121- query = query , auto_parameters = True , search_depth = "advanced" , max_results = 5 , include_raw_content = True
174+ query = query ,
175+ auto_parameters = True ,
176+ search_depth = "advanced" ,
177+ max_results = 5 ,
178+ include_raw_content = True ,
179+ days = 365 # Search within the last year for more recent information
122180 )
123181 )
124182 print (result ["auto_parameters" ])
125183 logger .info (f"Tavily search result: { result } " )
126184 # Return the search result to be added to the context
127185 return {"search_result" : result }
186+ l
128187 except Exception as e :
129188 logger .error (f"Error in search_tavily: { str (e )} " )
130189 raise
@@ -147,22 +206,35 @@ async def extract_minimal_answer( self, state: EnrichmentContext ) -> dict:
147206 content = "\n \n ---\n \n " .join (result_contents )
148207 print (f"Content: { content } " )
149208 try :
209+ # Get current year for time-sensitive extraction
210+ import datetime
211+ current_year = datetime .datetime .now ().year
212+
150213 # Build the prompt for the LLM, instructing it to extract only the direct answer
151214 extraction_instructions = ""
152215 if state .plan and isinstance (state .plan , dict ):
153216 extraction_instructions = state .plan .get ("extraction_instructions" , "" )
154217 prompt = f"""
155- Extract the { state .column_name } of { state .target_value } from this search result:
218+ Extract the CURRENT { state .column_name } of { state .target_value } from this search result:
156219
157220 { content }
158221
159222 Rules:
160- 1. Provide ONLY the direct answer - no explanations
161- 2. Be concise
162- 3. If not found, respond \" Information not found\"
163- 4. No citations or references
223+ 1. PRIORITIZE the most recent information (look for dates from { current_year } or late 2023)
224+ 2. If there are multiple entries, choose the most recent one
225+ 3. For leadership positions (CEO, President, etc.), always look for "current" or most recent information
226+ 4. Provide ONLY the direct answer - no explanations
227+ 5. Be concise
228+ 6. If not found, respond \" Information not found\"
229+ 7. No citations or references
230+
231+ Example:
232+ Search result: "Amazon's CEO is Andy Jassy as of 2021. Previously, Jeff Bezos was CEO until July 2021."
233+ Direct Answer: Andy Jassy
234+
164235 { extraction_instructions }
165- Direct Answer:
236+
237+ Current { state .column_name } of { state .target_value } :
166238 """
167239 logger .info (f"Extracting answer for { state .target_value } " )
168240
@@ -181,18 +253,33 @@ async def generate_plan(self, state: EnrichmentContext) -> dict:
181253 Planner node: Uses the LLM to analyze the user's request and input data, and produces a structured plan.
182254 The plan determines the next action (e.g., analyze a URL, search the web, analyze text, etc.).
183255 """
256+ # Get custom prompt and context for better planning
257+ custom_prompt = state .custom_prompt
258+ context_info = ""
259+ if state .context_values :
260+ context_info = f"Additional context: { state .context_values } "
261+
262+ # Get current year for time-sensitive searches
263+ import datetime
264+ current_year = datetime .datetime .now ().year
265+
184266 # Compose a prompt for the LLM to analyze the user's intent and input
185267 prompt = f"""
186268 You are an AI research agent planner. Your job is to analyze the user's enrichment request and input data, and return a JSON plan for the next step.
187269
188270 User's column/question: { state .column_name }
189271 Input source type: { state .input_source_type }
190272 Input data: { state .input_data }
273+ Custom prompt/instruction: { custom_prompt or "None" }
274+ { context_info }
191275
192276 Instructions:
193277 - If the input source type is 'URL', and the question is about analyzing or extracting from that URL, set action to 'analyze_url' and include 'source_url' and 'extraction_instructions'.
194278 - If the input source type is 'TEXT_FROM_COLUMN', and the question is about analyzing or extracting from that text, set action to 'analyze_text' and include 'source_text' and 'extraction_instructions'.
195279 - If the input source type is 'ENTITY' or the question is general, set action to 'search_web' and include 'search_query' and 'extraction_instructions'.
280+ - When creating search_query, ALWAYS include "current { current_year } " or "latest" for time-sensitive information like CEO, leadership, current status, etc.
281+ - For leadership/CEO queries, include both "current" and "{ current_year } " in the search query.
282+ - If custom prompt is provided, incorporate it into the search query and extraction instructions.
196283 - Always include a concise 'extraction_instructions' field that tells the next node what to extract or analyze.
197284 - Respond ONLY with a valid JSON object, no explanation.
198285
@@ -205,8 +292,8 @@ async def generate_plan(self, state: EnrichmentContext) -> dict:
205292 or
206293 {{
207294 "action": "search_web",
208- "search_query": "CEO of Amazon",
209- "extraction_instructions": "Extract the name of the CEO."
295+ "search_query": "current CEO of Amazon { current_year } latest ",
296+ "extraction_instructions": "Extract the name of the current CEO as of { current_year } ."
210297 }}
211298 or
212299 {{
@@ -219,15 +306,17 @@ async def generate_plan(self, state: EnrichmentContext) -> dict:
219306 """
220307 try :
221308 # Use the LLM to generate the plan as a JSON string
309+ #plan_json = await self.llm.generate(prompt)
222310 plan_json = await self .llm .generate (prompt )
223311 import json
224312 plan = json .loads (plan_json )
225313 # Attach the plan to the state
226314 return {"plan" : plan }
227315 except Exception as e :
228316 logger .error (f"Error in generate_plan: { str (e )} " )
229- # Fallback: default to web search
230- return {"plan" : {"action" : "search_web" , "search_query" : state .column_name , "extraction_instructions" : f"Extract the answer to: { state .column_name } " }}
317+ # Fallback: default to web search with current year
318+ fallback_query = f"current { state .column_name } of { state .target_value } { current_year } "
319+ return {"plan" : {"action" : "search_web" , "search_query" : fallback_query , "extraction_instructions" : f"Extract the current answer to: { state .column_name } " }}
231320
232321 async def scrape_website (self , state : EnrichmentContext ):
233322 """
@@ -345,7 +434,10 @@ async def enrich_cell_with_graph(
345434 target_value : str ,
346435 context_values : Dict [str , str ],
347436 tavily_client ,
348- llm_provider : LLMProvider
437+ llm_provider : LLMProvider ,
438+ input_source_type : Optional [str ] = "ENTITY" ,
439+ input_data : Optional [str ] = None ,
440+ custom_prompt : Optional [str ] = None
349441) -> Dict :
350442 """Helper function to enrich a single cell using langgraph."""
351443 try :
@@ -357,6 +449,9 @@ async def enrich_cell_with_graph(
357449 column_name = column_name ,
358450 target_value = target_value ,
359451 context_values = context_values ,
452+ input_source_type = input_source_type ,
453+ input_data = input_data or target_value ,
454+ custom_prompt = custom_prompt ,
360455 search_result = None ,
361456 answer = None
362457 )
@@ -367,7 +462,7 @@ async def enrich_cell_with_graph(
367462 return result #, result['urls']
368463 except Exception as e :
369464 logger .error (f"Error in enrich_cell_with_graph: { str (e )} " )
370- return " Error during enrichment"
465+ return { "answer" : " Error during enrichment", "search_result" : None }
371466
372467# Example usage block for running the enrichment pipeline directly
373468if __name__ == "__main__" :
@@ -388,7 +483,7 @@ async def enrich_cell_with_graph(
388483 openai_client = AsyncOpenAI (api_key = os .getenv ("OPENAI_API_KEY" ))
389484 # Initialize Gemini model (Google Generative AI)
390485 gemini_model = GenerativeModel (model_name = "gemini-1.5-flash" )
391-
486+ #gemini_model = GenerativeModel(api_key=os.getenv("GEMINI_API_KEY"))
392487 # Example: Create OpenAI provider and pipeline
393488 openai_provider = OpenAIProvider (openai_client )
394489 pipeline_openai = EnrichmentPipeline (tavily_client , openai_provider )
0 commit comments