Online search for vulns #38
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Online search for vulns | |
| on: | |
| schedule: | |
| - cron: '42 8 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| model: | |
| description: 'Claude model to use (cron runs default to Sonnet)' | |
| required: false | |
| type: choice | |
| default: claude-sonnet-4-6 | |
| options: | |
| - claude-sonnet-4-6 | |
| - claude-opus-4-7 | |
| - claude-haiku-4-5-20251001 | |
| window_hours: | |
| description: 'Lookback window in hours (cron runs use 25)' | |
| required: false | |
| type: string | |
| default: '25' | |
| reconsider_age_days: | |
| description: 'Only reconsider backlog entries last reviewed ≥ N days ago (0 = all, default 7)' | |
| required: false | |
| type: string | |
| default: '7' | |
| permissions: | |
| contents: read | |
| actions: read # needed to list/download previous run artifacts | |
| id-token: write # needed by claude-code-action for OIDC auth | |
| concurrency: | |
| group: vuln-watch | |
| cancel-in-progress: true | |
| jobs: | |
| watch: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| steps: | |
| # The scripts driving this workflow live on the `vuln-watch` branch so | |
| # they don't clutter master (which is what ships to production). The | |
| # workflow file itself MUST stay on the default branch, as GitHub only | |
| # honors `schedule:` triggers on the default branch. | |
| - name: Checkout vuln-watch branch (scripts + prompt) | |
| uses: actions/checkout@v5 | |
| with: | |
| ref: vuln-watch | |
| fetch-depth: 1 | |
| persist-credentials: false | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Install Python dependencies | |
| run: python -m pip install --quiet feedparser | |
| # ---- Load previous state --------------------------------------------- | |
| # Find the most recent successful run of THIS workflow (other than the | |
| # current one) and pull its `vuln-watch-state` artifact. On the very | |
| # first run there will be none — that's fine, we start empty. | |
| - name: Find previous successful run id | |
| id: prev | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -e | |
| run_id=$(gh run list \ | |
| --workflow="${{ github.workflow }}" \ | |
| --status=success \ | |
| --limit 1 \ | |
| --json databaseId \ | |
| --jq '.[0].databaseId // empty') | |
| echo "run_id=${run_id}" >> "$GITHUB_OUTPUT" | |
| if [ -n "$run_id" ]; then | |
| echo "Found previous successful run: $run_id" | |
| else | |
| echo "No previous successful run — starting from empty state." | |
| fi | |
| - name: Download previous state artifact | |
| if: steps.prev.outputs.run_id != '' | |
| uses: actions/download-artifact@v5 | |
| continue-on-error: true # tolerate retention expiry | |
| with: | |
| name: vuln-watch-state | |
| path: state/ | |
| run-id: ${{ steps.prev.outputs.run_id }} | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| # ---- Fetch + diff (token-free; runs every time) --------------------- | |
| # Performs conditional GETs (ETag / If-Modified-Since) against every | |
| # source, parses RSS/Atom/HTML, dedups against state.seen + state.aliases, | |
| # applies the time-window filter, and emits new_items.json. | |
| # Updates state.sources (HTTP cache metadata + per-source high-water | |
| # marks) in place so the cache survives even when Claude doesn't run. | |
| - name: Fetch + diff all sources | |
| id: diff | |
| env: | |
| SCAN_DATE: ${{ github.run_started_at }} | |
| # Cron runs have no `inputs` context, so the fallback kicks in. | |
| WINDOW_HOURS: ${{ inputs.window_hours || '25' }} | |
| RECONSIDER_AGE_DAYS: ${{ inputs.reconsider_age_days || '7' }} | |
| run: python -m scripts.vuln_watch.fetch_and_diff | |
| # ---- Fetch checker code so Claude can grep it for coverage --------- | |
| # The orphan vuln-watch branch has none of the actual checker code, | |
| # so we pull the `test` branch (the dev branch where coded-but- | |
| # unreleased CVE checks live) into ./checker/. The prompt tells | |
| # Claude this is the canonical source of truth for "is CVE-X already | |
| # implemented?". Only fetched on days with something to classify. | |
| - name: Checkout checker code (test branch) for coverage grep | |
| if: steps.diff.outputs.new_count != '0' || steps.diff.outputs.reconsider_count != '0' | |
| uses: actions/checkout@v5 | |
| with: | |
| ref: test | |
| path: checker | |
| fetch-depth: 1 | |
| persist-credentials: false | |
| # ---- Classify new items with Claude (skipped when nothing is new) --- | |
| # Model selection: a manual workflow_dispatch run picks from a dropdown | |
| # (defaulting to Sonnet). Scheduled cron runs have no `inputs` context, | |
| # so the `|| 'claude-sonnet-4-6'` fallback kicks in — cron always uses | |
| # Sonnet to keep the daily cost floor low. | |
| - name: Run classifier with Claude | |
| id: classify | |
| if: steps.diff.outputs.new_count != '0' || steps.diff.outputs.reconsider_count != '0' | |
| uses: anthropics/claude-code-action@v1 | |
| env: | |
| SCAN_DATE: ${{ github.run_started_at }} | |
| with: | |
| prompt: | | |
| Read the full task instructions from scripts/daily_vuln_watch_prompt.md | |
| and execute them end-to-end. Your input is new_items.json (already | |
| deduped, windowed, and pre-filtered — do NOT re-fetch sources). | |
| Write the three watch_${TODAY}_*.md files and classifications.json. | |
| Use $SCAN_DATE as the canonical timestamp. | |
| claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} | |
| # model + tool allowlist pass through claude_args (v1 dropped the | |
| # dedicated `model:` and `allowed_tools:` inputs). Job-level | |
| # `timeout-minutes: 20` above bounds total runtime. | |
| claude_args: | | |
| --model ${{ inputs.model || 'claude-sonnet-4-6' }} | |
| --allowedTools "Read,Write,Edit,Bash,Grep,Glob,WebFetch" | |
| - name: Upload Claude execution log | |
| if: ${{ always() && steps.classify.outputs.execution_file != '' }} | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: claude-execution-log-${{ github.run_id }} | |
| path: ${{ steps.classify.outputs.execution_file }} | |
| retention-days: 30 | |
| if-no-files-found: warn | |
| # ---- Merge classifications back into state -------------------------- | |
| # Also writes stub watch_*.md files if the classify step was skipped, so | |
| # the report artifact is consistent across runs. | |
| - name: Merge classifications into state | |
| if: always() | |
| env: | |
| SCAN_DATE: ${{ github.run_started_at }} | |
| run: python -m scripts.vuln_watch.merge_state | |
| - name: Upload new state artifact | |
| if: always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: vuln-watch-state | |
| path: state/seen.json | |
| retention-days: 90 | |
| if-no-files-found: error | |
| - name: Upload daily report | |
| if: always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: vuln-watch-report-${{ github.run_id }} | |
| path: | | |
| watch_*.md | |
| current_toimplement.md | |
| current_tocheck.md | |
| new_items.json | |
| classifications.json | |
| retention-days: 90 | |
| if-no-files-found: warn |