Skip to content

Content Aggregator

Content Aggregator #16

Workflow file for this run

name: Content Aggregator
on:
workflow_dispatch:
schedule:
- cron: '20 8 * * *'
jobs:
summarize:
runs-on: ubuntu-latest
timeout-minutes: 120 # 2 hour timeout
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install system dependencies
run: |
# TODO:check if all dependencies are indeed needed
sudo apt-get update
sudo apt-get install -y \
python3-dev \
libxml2-dev \
libxslt-dev \
chromium-chromedriver \
jq
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Install Playwright browsers
run: |
playwright install chromium
playwright install-deps
- name: Run summarizer
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GEMINI_MODEL_SUMMARIZE: ${{ secrets.GEMINI_MODEL_SUMMARIZE }}
GEMINI_MODEL_DATE_EXTRACT: ${{ secrets.GEMINI_MODEL_DATE_EXTRACT }}
DEBUG: "false"
ARTICLES_LIMIT: "500"
run: python main.py
- name: Upload output files
uses: actions/upload-artifact@v4
with:
name: summaries
path: |
outputs/*.json
outputs/*.txt
retention-days: 1
- name: Create GitHub Issue
if: success() # Only run if previous steps succeeded
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
FILE_NAME="issue.md"
./scripts/create_issue_body.sh $FILE_NAME
cat $FILE_NAME
# Create issue using GitHub CLI
gh issue create \
--title "Daily Content Summary $(date +'%Y-%m-%d')" \
--body-file $FILE_NAME \
--label automated