Skip to content

Commit 359db71

Browse files
author
Kayce Basques
committed
Add token count post
1 parent 095e778 commit 359db71

File tree

4 files changed

+87
-1
lines changed

4 files changed

+87
-1
lines changed

BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ sphinx_docs_library(
4343
"ai/agents/agents.png",
4444
"ai/agents/index.rst",
4545
"ai/index.rst",
46+
"ai/tokens.py",
47+
"ai/tokens.rst",
4648
"analytics/index.rst",
4749
"analytics/sheets.rst",
4850
"index.rst",

ai/index.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ Artificial intelligence
66
:hidden:
77
:maxdepth: 1
88

9-
agents/index
9+
agents/index
10+
tokens

ai/tokens.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from json import dump, load
2+
from os import environ, walk
3+
from pathlib import Path
4+
from subprocess import run
5+
from typing import Dict
6+
7+
from google.genai import Client
8+
9+
10+
gemini = Client(api_key=environ["GEMINI_API_KEY"])
11+
12+
13+
def _is_ignored(root: Path, target: Path, ignored: list[Path]) -> bool:
14+
"""Check if Git is ignoring the path."""
15+
# Ignore Git's directory itself.
16+
if str(target).lower().endswith(".git"):
17+
return True
18+
# Check if this path matches something in ``.gitignore``.
19+
command = ["git", "-C", str(root), "check-ignore", str(target)]
20+
result = run(command, capture_output=True, text=True)
21+
return str(target) in result.stdout
22+
23+
24+
def _is_in_ignored_dir(target: Path, ignored: list[Path]):
25+
"""Check if this path is in an ignored directory."""
26+
for maybe_parent_dir in ignored:
27+
if str(maybe_parent_dir) in str(target):
28+
return True
29+
return False
30+
31+
32+
def collect(root: Path) -> (list[Path], int):
33+
"""Collect all paths in the repository."""
34+
paths: list[Path] = []
35+
ignored: list[Path] = []
36+
tokens = 0
37+
for current_working_dir, _, files in walk(root):
38+
cwd = Path(current_working_dir)
39+
if _is_in_ignored_dir(cwd, ignored):
40+
print(f"ignoring dir: {str(cwd)}")
41+
continue
42+
if _is_ignored(Path(root), cwd, ignored):
43+
print(f"ignoring dir: {str(cwd)}")
44+
ignored.append(cwd)
45+
continue
46+
for file in files:
47+
path = cwd / Path(file)
48+
if _is_ignored(Path(root), path, ignored):
49+
print(f"ignoring file: {str(path)}")
50+
ignored.append(path)
51+
continue
52+
paths.append(path)
53+
with open(path, "r") as f:
54+
try:
55+
contents = f.read()
56+
except UnicodeDecodeError as e:
57+
continue
58+
print(f"counting tokens: {str(path)}")
59+
response = gemini.models.count_tokens(
60+
model="gemini-2.5-flash", contents=contents
61+
)
62+
tokens += response.total_tokens
63+
return (paths, tokens)
64+
65+
66+
def main():
67+
root = Path(".")
68+
paths, tokens = collect(root)
69+
print("*" * 80)
70+
print(f"file count: {len(paths)}")
71+
print(f"tokens: {tokens}")
72+
73+
74+
if __name__ == "__main__":
75+
main()

ai/tokens.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
================================
2+
Git-aware token count for a repo
3+
================================
4+
5+
"Git-aware" means that this script ignores files that match a ``.gitignore`` pattern.
6+
7+
.. literalinclude:: ./tokens.py
8+
:language: py

0 commit comments

Comments
 (0)