doen1el · doen1el · Nov 18, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@ final_json.json
 ./app.db
 ./instance
 app.db
-thumbnails
+thumbnails
+docker-compose.override.yml
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: "3"
-
 services:
   app:
     build: .
@@ -14,5 +12,8 @@ services:
       - LANGUAGE_CODE=de
       # your db secret (random string)
       - DB_SECRET=
+      # the AI provider to use (duckai, openai) (defaults to duckai)
+      # - AI_MODULE=openai
+      # - OPENAI_API_KEY=
     volumes:
       - ./app.db:/app/app.db
diff --git a/main.py b/main.py
@@ -1,8 +1,7 @@
 import argparse
 import re
 from dotenv import load_dotenv
-from scrapers.scrape_for_mealie import scrape_recipe_for_mealie
-from scrapers.scrape_for_tandoor import scrape_recipe_for_tandoor
+from scrapers.scraper_service import ScraperService
 
 load_dotenv()
 
@@ -45,12 +44,19 @@ def main():
     if not is_valid_url(args.url, args.platform):
         raise ValueError("Invalid URL. Please provide a valid post URL.")
 
-    if args.mode == 'mealie' or args.mode == 'm':
-        scrape_recipe_for_mealie(args.url, args.platform)
-    elif args.mode == 'tandoor' or args.mode == 't':
-        scrape_recipe_for_tandoor(args.url, args.platform)
-    else:
+    # Setze Provider-ENV entsprechend CLI-Mode
+    import os
+    mode_map = {
+        'mealie': 'mealie',
+        'm': 'mealie',
+        'tandoor': 'tandoor',
+        't': 'tandoor'
+    }
+    provider = mode_map.get(args.mode.lower())
+    if not provider:
         raise ValueError("Invalid mode. Please specify either 'mealie'/'m' or 'tandoor'/'t'")
+    os.environ['RECIPE_PROVIDER'] = provider
+    ScraperService.scrape_recipe(args.url, args.platform)
 
 if __name__ == '__main__':
     main()
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+openai>=1.0.0
 aiodns==3.2.0
 aiohappyeyeballs==2.4.4
 aiohttp==3.11.11

diff --git a/scrapers/ai_modules/ai_module_interface.py b/scrapers/ai_modules/ai_module_interface.py
@@ -0,0 +1,22 @@
+from abc import ABC, abstractmethod
+
+class AIModuleInterface(ABC):
+	@abstractmethod
+	def initialize_chat(self, context):
+		pass
+
+	@abstractmethod
+	def send_raw_prompt(self, prompt):
+		pass
+
+	@abstractmethod
+	def send_json_prompt(self, prompt):
+		pass
+
+	@abstractmethod
+	def get_number_of_steps(self, caption=None):
+		pass
+
+	@abstractmethod
+	def process_recipe_part(self, part, mode="", step_number=None, context=None):
+		pass
diff --git a/scrapers/ai_modules/chat_gpt.py b/scrapers/ai_modules/chat_gpt.py
@@ -0,0 +1,141 @@
+import os
+import json
+import re
+import openai
+from .ai_module_interface import AIModuleInterface
+
+class ChatGPTModule(AIModuleInterface):
+    def __init__(self, api_key=None, model="gpt-5"):
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        self.model = model
+        openai.api_key = self.api_key
+        self.context = None
+
+    def initialize_chat(self, context):
+        self.context = context
+        return True
+
+    def send_raw_prompt(self, prompt):
+        messages = []
+        if self.context:
+            messages.append({"role": "system", "content": f"Recipe context: {self.context}"})
+        messages.append({"role": "user", "content": prompt})
+        response = openai.chat.completions.create(
+            model=self.model,
+            messages=messages
+        )
+        return response.choices[0].message.content
+
+    def send_json_prompt(self, prompt):
+        raw = self.send_raw_prompt(prompt)
+        # print(f"[DEBUG] GPT raw response:\n{raw}")
+        # Extrahiere JSON aus Antwort (triple backticks oder code block)
+        match = re.search(r"```json\s*(.*?)```", raw, re.DOTALL)
+        if not match:
+            match = re.search(r"({.*})", raw, re.DOTALL)  # mit Gruppe
+        match_content = match.group(1) if match and match.lastindex == 1 else None
+        # print(f"[DEBUG] Regex match: {match_content}")
+        if match_content:
+            try:
+                parsed = json.loads(match_content)
+                # print(f"[DEBUG] Parsed JSON: {parsed}")
+                return parsed
+            except Exception as e:
+                print(f"[DEBUG] JSON parsing error: {e}")
+                return None
+        print("[DEBUG] No valid JSON found in response.")
+        return None
+
+    def get_number_of_steps(self, caption=None):
+        self.initialize_chat(caption)
+        prompt = (
+            "How many steps are in this recipe? Respond only with a single integer. "
+            "Do not include any explanation, text, units, or formatting. Only reply with the number."
+        )
+        max_attempts = 3
+        for attempt in range(max_attempts):
+            raw = self.send_raw_prompt(prompt)
+            # print(f"[DEBUG] get_number_of_steps attempt {attempt+1}: {raw}")
+            # Nur eine reine Zahl akzeptieren
+            match = re.fullmatch(r"\s*(\d+)\s*", raw)
+            if match:
+                return int(match.group(1))
+            # Fallback: Zahl irgendwo im Text suchen
+            numbers = re.findall(r"\d+", raw)
+            if numbers:
+                return int(numbers[0])
+        print("[DEBUG] Failed to extract number of steps after 3 attempts.")
+        return None
+
+    def process_recipe_part(self, part, mode="", step_number=None, context=None):
+        # Kontext einfügen
+        context_str = ""
+        if context:
+            if isinstance(context, dict):
+                context_str = f"Recipe context (JSON): {json.dumps(context, ensure_ascii=False)}\n"
+            else:
+                context_str = f"Recipe context: {context}\n"
+        if mode == "step" or step_number is not None:
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Fill out the following fields for step {step_number} of the recipe: 'name', 'instruction', 'ingredients', 'time', 'order', 'show_as_header', 'show_ingredients_table'.\n"
+                f"- 'name' should be the step number, e.g. 'name': '{step_number}.'\n"
+                f"- 'instruction' should be a clear, short description of the step.\n"
+                f"- 'ingredients' should be a list of ingredient objects (max 3 per step).\n"
+                f"- 'amount' must be a whole number or decimal, NOT a fraction.\n"
+                f"- Do NOT repeat ingredients from previous steps.\n"
+                f"- Example format: ```json {{'name': '1.', 'instruction': 'Chop onions.', 'ingredients': [{{'food': {{'name': 'onion'}}, 'amount': '1', ...}}], 'time': 5, 'order': 1, 'show_as_header': false, 'show_ingredients_table': true}}```\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        elif mode == "info":
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Fill out the fields: 'author', 'description', 'recipeYield', 'prepTime', 'cooktime'.\n"
+                f"- 'prepTime' and 'cooktime' format: PT1H for one hour, PT15M for 15 minutes.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        elif mode == "ingredients":
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Append the ingredients to the 'recipeIngredient' list. One ingredient per line.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        elif mode == "name":
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Fill out the field 'name' with a short, clear recipe name.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        elif mode == "nutrition":
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Fill out the fields: 'calories' and 'fatContent' as strings.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        elif mode == "instructions":
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        else:
+            prompt = (
+                f"{context_str}"
+                f"Please respond ONLY with a valid JSON code block (```json ... ```).\n"
+                f"Fill out the specified sections of the document.\n"
+                f"Language: {os.getenv('LANGUAGE_CODE', 'en')}\n"
+                f"JSON template: {part}"
+            )
+        return self.send_json_prompt(prompt)
diff --git a/scrapers/ai_modules/duck_ai.py b/scrapers/ai_modules/duck_ai.py
@@ -0,0 +1,131 @@
+import os
+import re
+import json
+from bs4 import BeautifulSoup
+from logs import setup_logging
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+from .ai_module_interface import AIModuleInterface
+
+class DuckAIModule(AIModuleInterface):
+	def __init__(self, browser):
+		self.browser = browser
+		self.logger = setup_logging("duck_ai")
+
+	def initialize_chat(self, caption):
+		self.logger.info("Initializing chat with recipe context...")
+		try:
+			textarea = WebDriverWait(self.browser, 10).until(
+				EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
+			)
+			context_prompt = f"I'm going to ask you questions about this recipe. Please use this recipe information as context for all your responses: {caption}"
+			textarea.send_keys(context_prompt)
+			textarea.send_keys(Keys.RETURN)
+			WebDriverWait(self.browser, 60).until(EC.presence_of_element_located((By.XPATH, "//button[@type='submit' and @disabled]")))
+			WebDriverWait(self.browser, 60).until_not(EC.presence_of_element_located((By.XPATH, "//button//rect[@width='10' and @height='10']")))
+			self.logger.info("Chat initialized successfully with recipe context")
+			return True
+		except Exception as e:
+			self.logger.error(f"Failed to initialize chat: {e}", exc_info=True)
+			return False
+
+	def send_raw_prompt(self, prompt):
+		self.logger.info(f"Sending raw prompt: {prompt[:50]}...")
+		try:
+			textarea = WebDriverWait(self.browser, 15).until(
+				EC.presence_of_element_located((By.XPATH, "//textarea[@name='user-prompt']"))
+			)
+			WebDriverWait(self.browser, 15).until(
+				EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
+			)
+			textarea.clear()
+			textarea.send_keys(prompt)
+			textarea.send_keys(Keys.RETURN)
+			WebDriverWait(self.browser, 60).until(
+				EC.element_to_be_clickable((By.XPATH, "//textarea[@name='user-prompt']"))
+			)
+			self.logger.info("Response generation completed")
+			response = self.browser.page_source
+			return response
+		except Exception as e:
+			self.logger.error(f"Failed to send prompt: {e}", exc_info=True)
+			return None
+
+	def extract_json_from_response(self, response):
+		if not response:
+			return None
+		try:
+			soup = BeautifulSoup(response, 'html.parser')
+			code_blocks = soup.find_all('code', {'class': 'language-json'})
+			if code_blocks:
+				json_response = code_blocks[-1].get_text()
+				return json.loads(json_response)
+			else:
+				self.logger.warning("No JSON code block found in the response")
+				return None
+		except Exception as e:
+			self.logger.error(f"Failed to extract JSON: {e}", exc_info=True)
+			return None
+
+	def send_json_prompt(self, prompt):
+		response = self.send_raw_prompt(prompt)
+		return self.extract_json_from_response(response)
+
+	def get_number_of_steps(self, caption=None):
+		self.logger.info("Getting number of recipe steps...")
+		try:
+			prompt = "How many steps are in this recipe? Please respond with only a number."
+			response = self.send_raw_prompt(prompt)
+			if response:
+				soup = BeautifulSoup(response, 'html.parser')
+				response_divs = soup.find_all('div', {'class': 'VrBPSncUavA1d7C9kAc5'})
+				if response_divs:
+					last_response_div = response_divs[-1]
+					paragraph = last_response_div.find('p')
+					if paragraph:
+						text = paragraph.get_text().strip()
+						numbers = re.findall(r'\d+', text)
+						if numbers:
+							number_of_steps = int(numbers[0])
+							self.logger.info(f"Found {number_of_steps} steps in the recipe")
+							return number_of_steps
+						else:
+							self.logger.warning(f"No number found in response: {text}")
+					else:
+						self.logger.warning("No paragraph found in response")
+				else:
+					self.logger.warning("No response divs found")
+			self.logger.warning("Could not determine number of steps")
+			return None
+		except Exception as e:
+			self.logger.error(f"Error in get_number_of_steps: {e}", exc_info=True)
+			return None
+
+	def process_recipe_part(self, part, mode="", step_number=None, context=None):
+		try:
+			if mode == "step" or step_number is not None:
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections. Only complete step {step_number} of the recipe. If the step has more than 3 ingredients, only complete the first 3 and finish the JSON object. The name of the step should be the step number e.g. 'name': '{step_number}.'. Only include the current instruction description in the instruction field. The amount value of the ingredient can only be a whole number or a decimal NOT A FRACTION (convert it to a decimal). If an ingredient has already been mentioned in a previous step, do not include it again as an ingredient in this step. Respond with a JSON code block enclosed in triple backticks (```json)."
+			elif mode == "info":
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out author, description, recipeYield, prepTime and cooktime. The cooktime and pretime should have the format e.g. PT1H for one hour or PT15M for 15 Minutes."
+			elif mode == "ingredients":
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Append the ingredients to the 'recipeIngredient' list. One ingredient per line."
+			elif mode == "name":
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Keep the name of the recipe short."
+			elif mode == "nutrition":
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Only fill out calories and fatContent with a string."
+			elif mode == "instructions":
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part} Write the instruction as one long string. No string separation, just one long text! Don't add ingredients here. JSON FORMAT IN CODE WINDOW!"
+			else:
+				prompt = f"Write your Response in the language {os.getenv('LANGUAGE_CODE', 'en')}. Please fill out this JSON document {part}. Only complete the specified sections of the document. Ensure the response is formatted as a JSON code block enclosed in triple backticks (```json)."
+			result = self.send_json_prompt(prompt)
+			if result:
+				self.logger.info(f"{mode if mode else 'General'} data processed successfully")
+				return result
+			else:
+				self.logger.warning(f"No valid response for {mode if mode else 'general'} data")
+				return None
+		except Exception as e:
+			self.logger.error(f"Error processing {mode if mode else 'recipe part'}: {e}", exc_info=True)
+			return None