Skip to content
This repository was archived by the owner on Nov 23, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ final_json.json
./app.db
./instance
app.db
thumbnails
thumbnails
docker-compose.override.yml
5 changes: 3 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: "3"

services:
app:
build: .
Expand All @@ -14,5 +12,8 @@ services:
- LANGUAGE_CODE=de
# your db secret (random string)
- DB_SECRET=
# the AI provider to use (duckai, openai) (defaults to duckai)
# - AI_MODULE=openai
# - OPENAI_API_KEY=
volumes:
- ./app.db:/app/app.db
20 changes: 13 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse
import re
from dotenv import load_dotenv
from scrapers.scrape_for_mealie import scrape_recipe_for_mealie
from scrapers.scrape_for_tandoor import scrape_recipe_for_tandoor
from scrapers.scraper_service import ScraperService

load_dotenv()

Expand Down Expand Up @@ -45,12 +44,19 @@ def main():
if not is_valid_url(args.url, args.platform):
raise ValueError("Invalid URL. Please provide a valid post URL.")

if args.mode == 'mealie' or args.mode == 'm':
scrape_recipe_for_mealie(args.url, args.platform)
elif args.mode == 'tandoor' or args.mode == 't':
scrape_recipe_for_tandoor(args.url, args.platform)
else:
# Setze Provider-ENV entsprechend CLI-Mode
import os
mode_map = {
'mealie': 'mealie',
'm': 'mealie',
'tandoor': 'tandoor',
't': 'tandoor'
}
provider = mode_map.get(args.mode.lower())
if not provider:
raise ValueError("Invalid mode. Please specify either 'mealie'/'m' or 'tandoor'/'t'")
os.environ['RECIPE_PROVIDER'] = provider
ScraperService.scrape_recipe(args.url, args.platform)

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
openai>=1.0.0
aiodns==3.2.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
Expand Down
22 changes: 22 additions & 0 deletions scrapers/ai_modules/ai_module_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from abc import ABC, abstractmethod

class AIModuleInterface(ABC):
@abstractmethod
def initialize_chat(self, context):
pass

@abstractmethod
def send_raw_prompt(self, prompt):
pass

@abstractmethod
def send_json_prompt(self, prompt):
pass

@abstractmethod
def get_number_of_steps(self, caption=None):
pass

@abstractmethod
def process_recipe_part(self, part, mode="", step_number=None, context=None):
pass
141 changes: 141 additions & 0 deletions scrapers/ai_modules/chat_gpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import os
import json
import re
import openai
from .ai_module_interface import AIModuleInterface

class ChatGPTModule(AIModuleInterface):
def __init__(self, api_key=None, model="gpt-5"):
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
self.model = model
openai.api_key = self.api_key
self.context = None

def initialize_chat(self, context):
self.context = context
return True

def send_raw_prompt(self, prompt):
messages = []
if self.context:
messages.append({"role": "system", "content": f"Recipe context: {self.context}"})
messages.append({"role": "user", "content": prompt})
response = openai.chat.completions.create(
model=self.model,
messages=messages
)
return response.choices[0].message.content

def send_json_prompt(self, prompt):
raw = self.send_raw_prompt(prompt)
# print(f"[DEBUG] GPT raw response:\n{raw}")
# Extrahiere JSON aus Antwort (triple backticks oder code block)
match = re.search(r"```json\s*(.*?)```", raw, re.DOTALL)
if not match:
match = re.search(r"({.*})", raw, re.DOTALL) # mit Gruppe
match_content = match.group(1) if match and match.lastindex == 1 else None
# print(f"[DEBUG] Regex match: {match_content}")
if match_content:
try:
parsed = json.loads(match_content)
# print(f"[DEBUG] Parsed JSON: {parsed}")
return parsed
except Exception as e:
print(f"[DEBUG] JSON parsing error: {e}")
return None
print("[DEBUG] No valid JSON found in response.")
return None

def get_number_of_steps(self, caption=None):
self.initialize_chat(caption)
prompt = (
"How many steps are in this recipe? Respond only with a single integer. "
"Do not include any explanation, text, units, or formatting. Only reply with the number."
)
max_attempts = 3
for attempt in range(max_attempts):
raw = self.send_raw_prompt(prompt)
# print(f"[DEBUG] get_number_of_steps attempt {attempt+1}: {raw}")
# Nur eine reine Zahl akzeptieren
match = re.fullmatch(r"\s*(\d+)\s*", raw)
if match:
return int(match.group(1))
# Fallback: Zahl irgendwo im Text suchen
numbers = re.findall(r"\d+", raw)
if numbers:
return int(numbers[0])
print("[DEBUG] Failed to extract number of steps after 3 attempts.")
return None

def process_recipe_part(self, part, mode="", step_number=None, context=None):
# Kontext einfügen
context_str = ""
if context:
if isinstance(context, dict):
context_str = f"Recipe context (JSON): {json.dumps(context, ensure_ascii=False)}\n"
else:
context_str = f"Recipe context: {context}\n"
if mode == "step" or step_number is not None:
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Fill out the following fields for step {step_number} of the recipe: 'name', 'instruction', 'ingredients', 'time', 'order', 'show_as_header', 'show_ingredients_table'.\n"
f"- 'name' should be the step number, e.g. 'name': '{step_number}.'\n"
f"- 'instruction' should be a clear, short description of the step.\n"
f"- 'ingredients' should be a list of ingredient objects (max 3 per step).\n"
f"- 'amount' must be a whole number or decimal, NOT a fraction.\n"
f"- Do NOT repeat ingredients from previous steps.\n"
f"- Example format: ```json {{'name': '1.', 'instruction': 'Chop onions.', 'ingredients': [{{'food': {{'name': 'onion'}}, 'amount': '1', ...}}], 'time': 5, 'order': 1, 'show_as_header': false, 'show_ingredients_table': true}}```\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
elif mode == "info":
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Fill out the fields: 'author', 'description', 'recipeYield', 'prepTime', 'cooktime'.\n"
f"- 'prepTime' and 'cooktime' format: PT1H for one hour, PT15M for 15 minutes.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
elif mode == "ingredients":
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Append the ingredients to the 'recipeIngredient' list. One ingredient per line.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
elif mode == "name":
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Fill out the field 'name' with a short, clear recipe name.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
elif mode == "nutrition":
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Fill out the fields: 'calories' and 'fatContent' as strings.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
elif mode == "instructions":
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
else:
prompt = (
f"{context_str}"
f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
f"Fill out the specified sections of the document.\n"
f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
f"JSON template: {part}"
)
return self.send_json_prompt(prompt)
131 changes: 131 additions & 0 deletions scrapers/ai_modules/duck_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import os
import re
import json
from bs4 import BeautifulSoup
from logs import setup_logging
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from .ai_module_interface import AIModuleInterface

class DuckAIModule(AIModuleInterface):
def __init__(self, browser):
self.browser = browser
self.logger = setup_logging("duck_ai")

def initialize_chat(self, caption):
self.logger.info("Initializing chat with recipe context...")
try:
textarea = WebDriverWait(self.browser, 10).until(
EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
)
context_prompt = f"I'm going to ask you questions about this recipe. Please use this recipe information as context for all your responses: {caption}"
textarea.send_keys(context_prompt)
textarea.send_keys(Keys.RETURN)
WebDriverWait(self.browser, 60).until(EC.presence_of_element_located((By.XPATH, "//button[@type='submit' and @disabled]")))
WebDriverWait(self.browser, 60).until_not(EC.presence_of_element_located((By.XPATH, "//button//rect[@width='10' and @height='10']")))
self.logger.info("Chat initialized successfully with recipe context")
return True
except Exception as e:
self.logger.error(f"Failed to initialize chat: {e}", exc_info=True)
return False

def send_raw_prompt(self, prompt):
self.logger.info(f"Sending raw prompt: {prompt[:50]}...")
try:
textarea = WebDriverWait(self.browser, 15).until(
EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
)
WebDriverWait(self.browser, 15).until(
EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
)
textarea.clear()
textarea.send_keys(prompt)
textarea.send_keys(Keys.RETURN)
WebDriverWait(self.browser, 60).until(
EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
)
self.logger.info("Response generation completed")
response = self.browser.page_source
return response
except Exception as e:
self.logger.error(f"Failed to send prompt: {e}", exc_info=True)
return None

def extract_json_from_response(self, response):
if not response:
return None
try:
soup = BeautifulSoup(response, 'html.parser')
code_blocks = soup.find_all('code', {'class': 'language-json'})
if code_blocks:
json_response = code_blocks[-1].get_text()
return json.loads(json_response)
else:
self.logger.warning("No JSON code block found in the response")
return None
except Exception as e:
self.logger.error(f"Failed to extract JSON: {e}", exc_info=True)
return None

def send_json_prompt(self, prompt):
response = self.send_raw_prompt(prompt)
return self.extract_json_from_response(response)

def get_number_of_steps(self, caption=None):
self.logger.info("Getting number of recipe steps...")
try:
prompt = "How many steps are in this recipe? Please respond with only a number."
response = self.send_raw_prompt(prompt)
if response:
soup = BeautifulSoup(response, 'html.parser')
response_divs = soup.find_all('div', {'class': 'VrBPSncUavA1d7C9kAc5'})
if response_divs:
last_response_div = response_divs[-1]
paragraph = last_response_div.find('p')
if paragraph:
text = paragraph.get_text().strip()
numbers = re.findall(r'\d+', text)
if numbers:
number_of_steps = int(numbers[0])
self.logger.info(f"Found {number_of_steps} steps in the recipe")
return number_of_steps
else:
self.logger.warning(f"No number found in response: {text}")
else:
self.logger.warning("No paragraph found in response")
else:
self.logger.warning("No response divs found")
self.logger.warning("Could not determine number of steps")
return None
except Exception as e:
self.logger.error(f"Error in get_number_of_steps: {e}", exc_info=True)
return None

def process_recipe_part(self, part, mode="", step_number=None, context=None):
try:
if mode == "step" or step_number is not None:
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections. Only complete step {step_number} of the recipe. If the step has more than 3 ingredients, only complete the first 3 and finish the JSON object. The name of the step should be the step number e.g. 'name': '{step_number}.'. Only include the current instruction description in the instruction field. The amount value of the ingredient can only be a whole number or a decimal NOT A FRACTION (convert it to a decimal). If an ingredient has already been mentioned in a previous step, do not include it again as an ingredient in this step. Respond with a JSON code block enclosed in triple backticks (```json)."
elif mode == "info":
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out author, description, recipeYield, prepTime and cooktime. The cooktime and pretime should have the format e.g. PT1H for one hour or PT15M for 15 Minutes."
elif mode == "ingredients":
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Append the ingredients to the 'recipeIngredient' list. One ingredient per line."
elif mode == "name":
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Keep the name of the recipe short."
elif mode == "nutrition":
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out calories and fatContent with a string."
elif mode == "instructions":
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here. JSON FORMAT IN CODE WINDOW!"
else:
prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections of the document. Ensure the response is formatted as a JSON code block enclosed in triple backticks (```json)."
result = self.send_json_prompt(prompt)
if result:
self.logger.info(f"{mode if mode else 'General'} data processed successfully")
return result
else:
self.logger.warning(f"No valid response for {mode if mode else 'general'} data")
return None
except Exception as e:
self.logger.error(f"Error processing {mode if mode else 'recipe part'}: {e}", exc_info=True)
return None
Loading