Skip to content
This repository was archived by the owner on Nov 23, 2025. It is now read-only.

Commit 257d8c2

Browse files
authored
Merge pull request #19 from pwnyprod/feature/adding-openai-provider
Feature/adding OpenAI provider and some refactoring
2 parents a39d2cf + 4c2958c commit 257d8c2

17 files changed

+855
-621
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ final_json.json
66
./app.db
77
./instance
88
app.db
9-
thumbnails
9+
thumbnails
10+
docker-compose.override.yml

docker-compose.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
version: "3"
2-
31
services:
42
app:
53
build: .
@@ -14,5 +12,8 @@ services:
1412
- LANGUAGE_CODE=de
1513
# your db secret (random string)
1614
- DB_SECRET=
15+
# the AI provider to use (duckai, openai) (defaults to duckai)
16+
# - AI_MODULE=openai
17+
# - OPENAI_API_KEY=
1718
volumes:
1819
- ./app.db:/app/app.db

main.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import argparse
22
import re
33
from dotenv import load_dotenv
4-
from scrapers.scrape_for_mealie import scrape_recipe_for_mealie
5-
from scrapers.scrape_for_tandoor import scrape_recipe_for_tandoor
4+
from scrapers.scraper_service import ScraperService
65

76
load_dotenv()
87

@@ -45,12 +44,19 @@ def main():
4544
if not is_valid_url(args.url, args.platform):
4645
raise ValueError("Invalid URL. Please provide a valid post URL.")
4746

48-
if args.mode == 'mealie' or args.mode == 'm':
49-
scrape_recipe_for_mealie(args.url, args.platform)
50-
elif args.mode == 'tandoor' or args.mode == 't':
51-
scrape_recipe_for_tandoor(args.url, args.platform)
52-
else:
47+
# Setze Provider-ENV entsprechend CLI-Mode
48+
import os
49+
mode_map = {
50+
'mealie': 'mealie',
51+
'm': 'mealie',
52+
'tandoor': 'tandoor',
53+
't': 'tandoor'
54+
}
55+
provider = mode_map.get(args.mode.lower())
56+
if not provider:
5357
raise ValueError("Invalid mode. Please specify either 'mealie'/'m' or 'tandoor'/'t'")
58+
os.environ['RECIPE_PROVIDER'] = provider
59+
ScraperService.scrape_recipe(args.url, args.platform)
5460

5561
if __name__ == '__main__':
5662
main()

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
openai>=1.0.0
12
aiodns==3.2.0
23
aiohappyeyeballs==2.4.4
34
aiohttp==3.11.11
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from abc import ABC, abstractmethod
2+
3+
class AIModuleInterface(ABC):
4+
@abstractmethod
5+
def initialize_chat(self, context):
6+
pass
7+
8+
@abstractmethod
9+
def send_raw_prompt(self, prompt):
10+
pass
11+
12+
@abstractmethod
13+
def send_json_prompt(self, prompt):
14+
pass
15+
16+
@abstractmethod
17+
def get_number_of_steps(self, caption=None):
18+
pass
19+
20+
@abstractmethod
21+
def process_recipe_part(self, part, mode="", step_number=None, context=None):
22+
pass

scrapers/ai_modules/chat_gpt.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import os
2+
import json
3+
import re
4+
import openai
5+
from .ai_module_interface import AIModuleInterface
6+
7+
class ChatGPTModule(AIModuleInterface):
8+
def __init__(self, api_key=None, model="gpt-5"):
9+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
10+
self.model = model
11+
openai.api_key = self.api_key
12+
self.context = None
13+
14+
def initialize_chat(self, context):
15+
self.context = context
16+
return True
17+
18+
def send_raw_prompt(self, prompt):
19+
messages = []
20+
if self.context:
21+
messages.append({"role": "system", "content": f"Recipe context: {self.context}"})
22+
messages.append({"role": "user", "content": prompt})
23+
response = openai.chat.completions.create(
24+
model=self.model,
25+
messages=messages
26+
)
27+
return response.choices[0].message.content
28+
29+
def send_json_prompt(self, prompt):
30+
raw = self.send_raw_prompt(prompt)
31+
# print(f"[DEBUG] GPT raw response:\n{raw}")
32+
# Extrahiere JSON aus Antwort (triple backticks oder code block)
33+
match = re.search(r"```json\s*(.*?)```", raw, re.DOTALL)
34+
if not match:
35+
match = re.search(r"({.*})", raw, re.DOTALL) # mit Gruppe
36+
match_content = match.group(1) if match and match.lastindex == 1 else None
37+
# print(f"[DEBUG] Regex match: {match_content}")
38+
if match_content:
39+
try:
40+
parsed = json.loads(match_content)
41+
# print(f"[DEBUG] Parsed JSON: {parsed}")
42+
return parsed
43+
except Exception as e:
44+
print(f"[DEBUG] JSON parsing error: {e}")
45+
return None
46+
print("[DEBUG] No valid JSON found in response.")
47+
return None
48+
49+
def get_number_of_steps(self, caption=None):
50+
self.initialize_chat(caption)
51+
prompt = (
52+
"How many steps are in this recipe? Respond only with a single integer. "
53+
"Do not include any explanation, text, units, or formatting. Only reply with the number."
54+
)
55+
max_attempts = 3
56+
for attempt in range(max_attempts):
57+
raw = self.send_raw_prompt(prompt)
58+
# print(f"[DEBUG] get_number_of_steps attempt {attempt+1}: {raw}")
59+
# Nur eine reine Zahl akzeptieren
60+
match = re.fullmatch(r"\s*(\d+)\s*", raw)
61+
if match:
62+
return int(match.group(1))
63+
# Fallback: Zahl irgendwo im Text suchen
64+
numbers = re.findall(r"\d+", raw)
65+
if numbers:
66+
return int(numbers[0])
67+
print("[DEBUG] Failed to extract number of steps after 3 attempts.")
68+
return None
69+
70+
def process_recipe_part(self, part, mode="", step_number=None, context=None):
71+
# Kontext einfügen
72+
context_str = ""
73+
if context:
74+
if isinstance(context, dict):
75+
context_str = f"Recipe context (JSON): {json.dumps(context, ensure_ascii=False)}\n"
76+
else:
77+
context_str = f"Recipe context: {context}\n"
78+
if mode == "step" or step_number is not None:
79+
prompt = (
80+
f"{context_str}"
81+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
82+
f"Fill out the following fields for step {step_number} of the recipe: 'name', 'instruction', 'ingredients', 'time', 'order', 'show_as_header', 'show_ingredients_table'.\n"
83+
f"- 'name' should be the step number, e.g. 'name': '{step_number}.'\n"
84+
f"- 'instruction' should be a clear, short description of the step.\n"
85+
f"- 'ingredients' should be a list of ingredient objects (max 3 per step).\n"
86+
f"- 'amount' must be a whole number or decimal, NOT a fraction.\n"
87+
f"- Do NOT repeat ingredients from previous steps.\n"
88+
f"- Example format: ```json {{'name': '1.', 'instruction': 'Chop onions.', 'ingredients': [{{'food': {{'name': 'onion'}}, 'amount': '1', ...}}], 'time': 5, 'order': 1, 'show_as_header': false, 'show_ingredients_table': true}}```\n"
89+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
90+
f"JSON template: {part}"
91+
)
92+
elif mode == "info":
93+
prompt = (
94+
f"{context_str}"
95+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
96+
f"Fill out the fields: 'author', 'description', 'recipeYield', 'prepTime', 'cooktime'.\n"
97+
f"- 'prepTime' and 'cooktime' format: PT1H for one hour, PT15M for 15 minutes.\n"
98+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
99+
f"JSON template: {part}"
100+
)
101+
elif mode == "ingredients":
102+
prompt = (
103+
f"{context_str}"
104+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
105+
f"Append the ingredients to the 'recipeIngredient' list. One ingredient per line.\n"
106+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
107+
f"JSON template: {part}"
108+
)
109+
elif mode == "name":
110+
prompt = (
111+
f"{context_str}"
112+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
113+
f"Fill out the field 'name' with a short, clear recipe name.\n"
114+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
115+
f"JSON template: {part}"
116+
)
117+
elif mode == "nutrition":
118+
prompt = (
119+
f"{context_str}"
120+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
121+
f"Fill out the fields: 'calories' and 'fatContent' as strings.\n"
122+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
123+
f"JSON template: {part}"
124+
)
125+
elif mode == "instructions":
126+
prompt = (
127+
f"{context_str}"
128+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
129+
f"Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here.\n"
130+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
131+
f"JSON template: {part}"
132+
)
133+
else:
134+
prompt = (
135+
f"{context_str}"
136+
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
137+
f"Fill out the specified sections of the document.\n"
138+
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
139+
f"JSON template: {part}"
140+
)
141+
return self.send_json_prompt(prompt)

scrapers/ai_modules/duck_ai.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import os
2+
import re
3+
import json
4+
from bs4 import BeautifulSoup
5+
from logs import setup_logging
6+
from selenium.webdriver.common.by import By
7+
from selenium.webdriver.common.keys import Keys
8+
from selenium.webdriver.support import expected_conditions as EC
9+
from selenium.webdriver.support.ui import WebDriverWait
10+
from .ai_module_interface import AIModuleInterface
11+
12+
class DuckAIModule(AIModuleInterface):
13+
def __init__(self, browser):
14+
self.browser = browser
15+
self.logger = setup_logging("duck_ai")
16+
17+
def initialize_chat(self, caption):
18+
self.logger.info("Initializing chat with recipe context...")
19+
try:
20+
textarea = WebDriverWait(self.browser, 10).until(
21+
EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
22+
)
23+
context_prompt = f"I'm going to ask you questions about this recipe. Please use this recipe information as context for all your responses: {caption}"
24+
textarea.send_keys(context_prompt)
25+
textarea.send_keys(Keys.RETURN)
26+
WebDriverWait(self.browser, 60).until(EC.presence_of_element_located((By.XPATH, "//button[@type='submit' and @disabled]")))
27+
WebDriverWait(self.browser, 60).until_not(EC.presence_of_element_located((By.XPATH, "//button//rect[@width='10' and @height='10']")))
28+
self.logger.info("Chat initialized successfully with recipe context")
29+
return True
30+
except Exception as e:
31+
self.logger.error(f"Failed to initialize chat: {e}", exc_info=True)
32+
return False
33+
34+
def send_raw_prompt(self, prompt):
35+
self.logger.info(f"Sending raw prompt: {prompt[:50]}...")
36+
try:
37+
textarea = WebDriverWait(self.browser, 15).until(
38+
EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
39+
)
40+
WebDriverWait(self.browser, 15).until(
41+
EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
42+
)
43+
textarea.clear()
44+
textarea.send_keys(prompt)
45+
textarea.send_keys(Keys.RETURN)
46+
WebDriverWait(self.browser, 60).until(
47+
EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
48+
)
49+
self.logger.info("Response generation completed")
50+
response = self.browser.page_source
51+
return response
52+
except Exception as e:
53+
self.logger.error(f"Failed to send prompt: {e}", exc_info=True)
54+
return None
55+
56+
def extract_json_from_response(self, response):
57+
if not response:
58+
return None
59+
try:
60+
soup = BeautifulSoup(response, 'html.parser')
61+
code_blocks = soup.find_all('code', {'class': 'language-json'})
62+
if code_blocks:
63+
json_response = code_blocks[-1].get_text()
64+
return json.loads(json_response)
65+
else:
66+
self.logger.warning("No JSON code block found in the response")
67+
return None
68+
except Exception as e:
69+
self.logger.error(f"Failed to extract JSON: {e}", exc_info=True)
70+
return None
71+
72+
def send_json_prompt(self, prompt):
73+
response = self.send_raw_prompt(prompt)
74+
return self.extract_json_from_response(response)
75+
76+
def get_number_of_steps(self, caption=None):
77+
self.logger.info("Getting number of recipe steps...")
78+
try:
79+
prompt = "How many steps are in this recipe? Please respond with only a number."
80+
response = self.send_raw_prompt(prompt)
81+
if response:
82+
soup = BeautifulSoup(response, 'html.parser')
83+
response_divs = soup.find_all('div', {'class': 'VrBPSncUavA1d7C9kAc5'})
84+
if response_divs:
85+
last_response_div = response_divs[-1]
86+
paragraph = last_response_div.find('p')
87+
if paragraph:
88+
text = paragraph.get_text().strip()
89+
numbers = re.findall(r'\d+', text)
90+
if numbers:
91+
number_of_steps = int(numbers[0])
92+
self.logger.info(f"Found {number_of_steps} steps in the recipe")
93+
return number_of_steps
94+
else:
95+
self.logger.warning(f"No number found in response: {text}")
96+
else:
97+
self.logger.warning("No paragraph found in response")
98+
else:
99+
self.logger.warning("No response divs found")
100+
self.logger.warning("Could not determine number of steps")
101+
return None
102+
except Exception as e:
103+
self.logger.error(f"Error in get_number_of_steps: {e}", exc_info=True)
104+
return None
105+
106+
def process_recipe_part(self, part, mode="", step_number=None, context=None):
107+
try:
108+
if mode == "step" or step_number is not None:
109+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections. Only complete step {step_number} of the recipe. If the step has more than 3 ingredients, only complete the first 3 and finish the JSON object. The name of the step should be the step number e.g. 'name': '{step_number}.'. Only include the current instruction description in the instruction field. The amount value of the ingredient can only be a whole number or a decimal NOT A FRACTION (convert it to a decimal). If an ingredient has already been mentioned in a previous step, do not include it again as an ingredient in this step. Respond with a JSON code block enclosed in triple backticks (```json)."
110+
elif mode == "info":
111+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out author, description, recipeYield, prepTime and cooktime. The cooktime and pretime should have the format e.g. PT1H for one hour or PT15M for 15 Minutes."
112+
elif mode == "ingredients":
113+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Append the ingredients to the 'recipeIngredient' list. One ingredient per line."
114+
elif mode == "name":
115+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Keep the name of the recipe short."
116+
elif mode == "nutrition":
117+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out calories and fatContent with a string."
118+
elif mode == "instructions":
119+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here. JSON FORMAT IN CODE WINDOW!"
120+
else:
121+
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections of the document. Ensure the response is formatted as a JSON code block enclosed in triple backticks (```json)."
122+
result = self.send_json_prompt(prompt)
123+
if result:
124+
self.logger.info(f"{mode if mode else 'General'} data processed successfully")
125+
return result
126+
else:
127+
self.logger.warning(f"No valid response for {mode if mode else 'general'} data")
128+
return None
129+
except Exception as e:
130+
self.logger.error(f"Error processing {mode if mode else 'recipe part'}: {e}", exc_info=True)
131+
return None

0 commit comments

Comments
 (0)