|
| 1 | +from json import dump, load |
| 2 | +from os import environ, walk |
| 3 | +from pathlib import Path |
| 4 | +from subprocess import run |
| 5 | +from typing import Dict |
| 6 | + |
| 7 | +from google.genai import Client |
| 8 | + |
| 9 | + |
| 10 | +gemini = Client(api_key=environ["GEMINI_API_KEY"]) |
| 11 | + |
| 12 | + |
| 13 | +def _is_ignored(root: Path, target: Path, ignored: list[Path]) -> bool: |
| 14 | + """Check if Git is ignoring the path.""" |
| 15 | + # Ignore Git's directory itself. |
| 16 | + if str(target).lower().endswith(".git"): |
| 17 | + return True |
| 18 | + # Check if this path matches something in ``.gitignore``. |
| 19 | + command = ["git", "-C", str(root), "check-ignore", str(target)] |
| 20 | + result = run(command, capture_output=True, text=True) |
| 21 | + return str(target) in result.stdout |
| 22 | + |
| 23 | + |
| 24 | +def _is_in_ignored_dir(target: Path, ignored: list[Path]): |
| 25 | + """Check if this path is in an ignored directory.""" |
| 26 | + for maybe_parent_dir in ignored: |
| 27 | + if str(maybe_parent_dir) in str(target): |
| 28 | + return True |
| 29 | + return False |
| 30 | + |
| 31 | + |
| 32 | +def collect(root: Path) -> (list[Path], int): |
| 33 | + """Collect all paths in the repository.""" |
| 34 | + paths: list[Path] = [] |
| 35 | + ignored: list[Path] = [] |
| 36 | + tokens = 0 |
| 37 | + for current_working_dir, _, files in walk(root): |
| 38 | + cwd = Path(current_working_dir) |
| 39 | + if _is_in_ignored_dir(cwd, ignored): |
| 40 | + print(f"ignoring dir: {str(cwd)}") |
| 41 | + continue |
| 42 | + if _is_ignored(Path(root), cwd, ignored): |
| 43 | + print(f"ignoring dir: {str(cwd)}") |
| 44 | + ignored.append(cwd) |
| 45 | + continue |
| 46 | + for file in files: |
| 47 | + path = cwd / Path(file) |
| 48 | + if _is_ignored(Path(root), path, ignored): |
| 49 | + print(f"ignoring file: {str(path)}") |
| 50 | + ignored.append(path) |
| 51 | + continue |
| 52 | + paths.append(path) |
| 53 | + with open(path, "r") as f: |
| 54 | + try: |
| 55 | + contents = f.read() |
| 56 | + except UnicodeDecodeError as e: |
| 57 | + continue |
| 58 | + print(f"counting tokens: {str(path)}") |
| 59 | + response = gemini.models.count_tokens( |
| 60 | + model="gemini-2.5-flash", contents=contents |
| 61 | + ) |
| 62 | + tokens += response.total_tokens |
| 63 | + return (paths, tokens) |
| 64 | + |
| 65 | + |
| 66 | +def main(): |
| 67 | + root = Path(".") |
| 68 | + paths, tokens = collect(root) |
| 69 | + print("*" * 80) |
| 70 | + print(f"file count: {len(paths)}") |
| 71 | + print(f"tokens: {tokens}") |
| 72 | + |
| 73 | + |
| 74 | +if __name__ == "__main__": |
| 75 | + main() |
0 commit comments