Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/workflows/validate_apis_free_tier.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: "Validate API health and free-tier"

on:
workflow_dispatch:
schedule:
- cron: '0 3 * * 1'

env:
FILENAME: README.md

jobs:
validate_apis:
name: "Check API links and free-tier signal"
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: python -m pip install -r scripts/requirements.txt

- name: Remove stale or non-free API rows
id: validate_api_health
continue-on-error: true
run: python scripts/validate/api_health.py ${FILENAME} --write

- name: Create pull request for removals
if: steps.validate_api_health.outcome == 'success'
uses: peter-evans/create-pull-request@v7
with:
commit-message: "chore: remove APIs failing health/free-tier checks"
branch: chore/auto-prune-apis
title: "chore: remove APIs failing health/free-tier checks"
body: |
Automated cleanup generated by `validate_apis_free_tier` workflow.

This PR removes README entries where:
- the API documentation link check failed, or
- no free-tier signal could be detected from README metadata.

Please review all removals before merging.
delete-branch: true
64 changes: 64 additions & 0 deletions scripts/tests/test_validate_api_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-

import unittest
from unittest.mock import patch

from validate.api_health import ApiEntry
from validate.api_health import has_free_tier_signal
from validate.api_health import find_entries_to_remove
from validate.api_health import parse_api_entries
from validate.api_health import remove_entries_from_lines


class TestValidateApis(unittest.TestCase):

def test_parse_api_entries(self):
lines = [
'| [Cat Facts](https://example.com/cats) | Free cat facts | No | Yes | Yes |',
'| [Dog Facts](https://example.com/dogs) | Premium data only | `apiKey` | Yes | Yes |',
'| Not an API line |',
]

entries = parse_api_entries(lines)

self.assertEqual(len(entries), 2)
self.assertEqual(entries[0].title, 'Cat Facts')
self.assertEqual(entries[0].link, 'https://example.com/cats')
self.assertEqual(entries[0].auth, 'no')

def test_has_free_tier_signal(self):
free_entry_auth = ApiEntry(1, '', 'A', 'Paid service', 'no', 'https://example.com')
free_entry_desc = ApiEntry(2, '', 'B', 'This API has a free tier for hobby usage', 'apikey', 'https://example.com')
paid_entry = ApiEntry(3, '', 'C', 'Enterprise pricing only', 'apikey', 'https://example.com')

self.assertTrue(has_free_tier_signal(free_entry_auth))
self.assertTrue(has_free_tier_signal(free_entry_desc))
self.assertFalse(has_free_tier_signal(paid_entry))


@patch('validate.api_health.check_if_link_is_working', return_value=(True, 'ERR'))
def test_find_entries_to_remove_counts_link_failures(self, _):
entries = [
ApiEntry(1, '', 'A', 'Free tier', 'apikey', 'https://example.com/a'),
ApiEntry(2, '', 'B', 'Paid only', 'apikey', 'https://example.com/b'),
]

to_remove, report, link_failures = find_entries_to_remove(entries)

self.assertEqual(len(to_remove), 2)
self.assertEqual(len(report), 2)
self.assertEqual(link_failures, 2)

def test_remove_entries_from_lines(self):
lines = ['line 1', 'line 2', 'line 3']
entries_to_remove = [
ApiEntry(2, 'line 2', 'X', 'Y', 'no', 'https://example.com')
]

result = remove_entries_from_lines(lines, entries_to_remove)

self.assertEqual(result, ['line 1', 'line 3'])


if __name__ == '__main__':
unittest.main()
48 changes: 48 additions & 0 deletions scripts/tests/test_validate_apis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-

import unittest

from validate.apis import ApiEntry
from validate.apis import has_free_tier_signal
from validate.apis import parse_api_entries
from validate.apis import remove_entries_from_lines


class TestValidateApis(unittest.TestCase):

def test_parse_api_entries(self):
lines = [
'| [Cat Facts](https://example.com/cats) | Free cat facts | No | Yes | Yes |',
'| [Dog Facts](https://example.com/dogs) | Premium data only | `apiKey` | Yes | Yes |',
'| Not an API line |',
]

entries = parse_api_entries(lines)

self.assertEqual(len(entries), 2)
self.assertEqual(entries[0].title, 'Cat Facts')
self.assertEqual(entries[0].link, 'https://example.com/cats')
self.assertEqual(entries[0].auth, 'no')

def test_has_free_tier_signal(self):
free_entry_auth = ApiEntry(1, '', 'A', 'Paid service', 'no', 'https://example.com')
free_entry_desc = ApiEntry(2, '', 'B', 'This API has a free tier for hobby usage', 'apikey', 'https://example.com')
paid_entry = ApiEntry(3, '', 'C', 'Enterprise pricing only', 'apikey', 'https://example.com')

self.assertTrue(has_free_tier_signal(free_entry_auth))
self.assertTrue(has_free_tier_signal(free_entry_desc))
self.assertFalse(has_free_tier_signal(paid_entry))

def test_remove_entries_from_lines(self):
lines = ['line 1', 'line 2', 'line 3']
entries_to_remove = [
ApiEntry(2, 'line 2', 'X', 'Y', 'no', 'https://example.com')
]

result = remove_entries_from_lines(lines, entries_to_remove)

self.assertEqual(result, ['line 1', 'line 3'])


if __name__ == '__main__':
unittest.main()
64 changes: 64 additions & 0 deletions scripts/validate/api_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-

import unittest
from unittest.mock import patch

from validate.api_health import ApiEntry
from validate.api_health import has_free_tier_signal
from validate.api_health import find_entries_to_remove
from validate.api_health import parse_api_entries
from validate.api_health import remove_entries_from_lines


class TestValidateApis(unittest.TestCase):

def test_parse_api_entries(self):
lines = [
'| [Cat Facts](https://example.com/cats) | Free cat facts | No | Yes | Yes |',
'| [Dog Facts](https://example.com/dogs) | Premium data only | `apiKey` | Yes | Yes |',
'| Not an API line |',
]

entries = parse_api_entries(lines)

self.assertEqual(len(entries), 2)
self.assertEqual(entries[0].title, 'Cat Facts')
self.assertEqual(entries[0].link, 'https://example.com/cats')
self.assertEqual(entries[0].auth, 'no')

def test_has_free_tier_signal(self):
free_entry_auth = ApiEntry(1, '', 'A', 'Paid service', 'no', 'https://example.com')
free_entry_desc = ApiEntry(2, '', 'B', 'This API has a free tier for hobby usage', 'apikey', 'https://example.com')
paid_entry = ApiEntry(3, '', 'C', 'Enterprise pricing only', 'apikey', 'https://example.com')

self.assertTrue(has_free_tier_signal(free_entry_auth))
self.assertTrue(has_free_tier_signal(free_entry_desc))
self.assertFalse(has_free_tier_signal(paid_entry))


@patch('validate.api_health.check_if_link_is_working', return_value=(True, 'ERR'))
def test_find_entries_to_remove_counts_link_failures(self, _):
entries = [
ApiEntry(1, '', 'A', 'Free tier', 'apikey', 'https://example.com/a'),
ApiEntry(2, '', 'B', 'Paid only', 'apikey', 'https://example.com/b'),
]

to_remove, report, link_failures = find_entries_to_remove(entries)

self.assertEqual(len(to_remove), 2)
self.assertEqual(len(report), 2)
self.assertEqual(link_failures, 2)

def test_remove_entries_from_lines(self):
lines = ['line 1', 'line 2', 'line 3']
entries_to_remove = [
ApiEntry(2, 'line 2', 'X', 'Y', 'no', 'https://example.com')
]

result = remove_entries_from_lines(lines, entries_to_remove)

self.assertEqual(result, ['line 1', 'line 3'])


if __name__ == '__main__':
unittest.main()
157 changes: 157 additions & 0 deletions scripts/validate/apis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-

"""Validate API entries from README and optionally remove stale entries.

This script checks each API table row in README.md for:
1) Documentation/API link availability
2) A best-effort free-tier signal

If `--write` is enabled, rows that fail either check are removed from the file.
"""

from __future__ import annotations

import argparse
import re
from dataclasses import dataclass
from pathlib import Path
from typing import List, Sequence, Tuple

from validate.links import check_if_link_is_working

FREE_TIER_KEYWORDS = (
'free tier',
'free plan',
'free to use',
'free api',
'free',
'public',
'open',
'no auth',
'no api key',
'without api key',
)


@dataclass
class ApiEntry:
line_number: int
raw_line: str
title: str
description: str
auth: str
link: str


def parse_api_entries(lines: Sequence[str]) -> List[ApiEntry]:
"""Parse README table rows that represent API entries."""
entries: List[ApiEntry] = []

for index, raw_line in enumerate(lines, start=1):
line = raw_line.strip()

if not line.startswith('| ['):
continue

segments = [segment.strip() for segment in line.split('|')]
if len(segments) < 7:
continue

title_column = segments[1]
description = segments[2]
auth = segments[3]

title_match = re.search(r'\[(.*?)\]', title_column)
link_match = re.search(r'\((https?://[^)\s]+)\)', title_column)

if not title_match or not link_match:
continue

entries.append(
ApiEntry(
line_number=index,
raw_line=raw_line,
title=title_match.group(1).strip(),
description=description,
auth=auth.strip('` ').lower(),
link=link_match.group(1).strip(),
)
)

return entries


def has_free_tier_signal(entry: ApiEntry) -> bool:
"""Best-effort free tier detector from metadata available in README."""
desc = entry.description.lower()
if entry.auth == 'no':
return True

for keyword in FREE_TIER_KEYWORDS:
if keyword in desc:
return True

return False


def find_entries_to_remove(entries: Sequence[ApiEntry]) -> Tuple[List[ApiEntry], List[str]]:
"""Return API entries that should be removed and a human-readable report."""
to_remove: List[ApiEntry] = []
report: List[str] = []

for entry in entries:
link_has_error, link_error = check_if_link_is_working(entry.link)
free_tier_ok = has_free_tier_signal(entry)

reasons: List[str] = []

if link_has_error:
reasons.append(f'link check failed ({link_error})')

if not free_tier_ok:
reasons.append('no free-tier signal')

if reasons:
to_remove.append(entry)
report.append(f'line {entry.line_number} - {entry.title}: {", ".join(reasons)}')

return to_remove, report


def remove_entries_from_lines(lines: Sequence[str], entries_to_remove: Sequence[ApiEntry]) -> List[str]:
remove_lines = {entry.line_number for entry in entries_to_remove}
return [line for index, line in enumerate(lines, start=1) if index not in remove_lines]


def main(readme_path: str, write_changes: bool) -> int:
path = Path(readme_path)
lines = path.read_text(encoding='utf-8').splitlines()

entries = parse_api_entries(lines)
to_remove, report = find_entries_to_remove(entries)

print(f'Checked {len(entries)} API entries.')

if not to_remove:
print('No entries flagged for removal.')
return 0

print(f'Flagged {len(to_remove)} entries for removal:')
for item in report:
print(f'- {item}')

if write_changes:
new_lines = remove_entries_from_lines(lines, to_remove)
path.write_text('\n'.join(new_lines) + '\n', encoding='utf-8')
print(f'Updated {readme_path} by removing {len(to_remove)} rows.')

return 0


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Check API rows and remove rows that fail link/free-tier checks.')
parser.add_argument('filename', help='README file to validate')
parser.add_argument('--write', action='store_true', help='Apply removals directly to file')
args = parser.parse_args()

raise SystemExit(main(args.filename, args.write))