Skip to content

Commit a5e98d5

Browse files
authored
fix(v0.4.2): code-fence truncation + changelog page (#71)
Fixes two user-reported issues: 1) "Full Directory Tree is not being displayed properly" — the converter's truncate_chars / truncate_lines cut content mid-code-block and left the opening ``` without a matching close fence. The markdown parser then swallowed everything below as one giant code block. Fixed by counting unbalanced fences in the kept portion and injecting a closing fence before the truncation marker. Also hardened parse_jsonl to survive corrupt UTF-8 bytes (errors="replace") and stray non-dict records that used to crash filter_records. 2) "add changelog page" — CHANGELOG.md now renders as a first-class page at /changelog.html with its own nav link, narrow reading column, keep-a-changelog typography, and the same theme/print styles as the rest of the wiki. Test delta: +5 truncation tests, +5 changelog page tests. 163 passing total (was 158). 30 previously-mangled session files regenerated via llmwiki sync.
1 parent f57a764 commit a5e98d5

5 files changed

Lines changed: 231 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ Versions below 1.0 are pre-production — API and file formats may change.
1111
### Added
1212

1313
- **Session metrics frontmatter** (#63) — converter now emits five new keys per session as JSON inline: `tool_counts`, `token_totals` (input / cache_creation / cache_read / output), `turn_count`, `hour_buckets` (UTC-normalised ISO-hour → activity count), and `duration_seconds`. Foundation for the v0.8 visualization stack (#64 heatmap / #65 tool chart / #66 token card). Stdlib-only; byte-identical on re-run. 24 new tests.
14+
- **Changelog page** (#72) — `CHANGELOG.md` now renders as a first-class page at `site/changelog.html` with a nav-bar link, narrow reading column, keep-a-changelog typography, and the same theme/print styles as the rest of the wiki.
15+
16+
### Fixed
17+
18+
- **Code-fence truncation eating pages** (#72) — `truncate_chars` / `truncate_lines` used to cut content mid-code-block, leaving the opening ` ``` ` without a closing fence. The markdown parser then swallowed everything that followed as one giant block (user-visible example: the "Full Directory Tree" section on subagent pages). Fixed by counting unbalanced fences in the kept portion and injecting a closing fence before the truncation marker. 5 new tests; 30 previously-mangled session files regenerated.
19+
- **Sync crash on corrupt JSONL bytes** (#72) — a single stray non-UTF-8 byte in a session transcript used to abort the entire `llmwiki sync` run with `UnicodeDecodeError`. `parse_jsonl` now opens with `errors="replace"` and silently drops non-dict records (rare stray scalars from partial writes that previously crashed `filter_records` with `AttributeError`).
1420

1521
## [0.4.0] — 2026-04-08
1622

llmwiki/build.py

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ def link(href: str, label: str, key: str) -> str:
337337
{link("index.html", "Home", "home")}
338338
{link("projects/index.html", "Projects", "projects")}
339339
{link("sessions/index.html", "Sessions", "sessions")}
340+
{link("changelog.html", "Changelog", "changelog")}
340341
<button class="nav-search-btn" id="open-palette" aria-label="Open command palette">
341342
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>
342343
<span>Search</span>
@@ -839,6 +840,61 @@ def render_index(
839840
return out_path
840841

841842

843+
# ─── changelog page ────────────────────────────────────────────────────────
844+
845+
def render_changelog(out_dir: Path) -> Optional[Path]:
846+
"""Render ``CHANGELOG.md`` (repo root) to ``site/changelog.html``.
847+
848+
Returns None if CHANGELOG.md is missing. Shown as its own top-level page
849+
so visitors can see what's new / what shipped without clicking through to
850+
GitHub. Keep-a-changelog headings become an in-page TOC via the existing
851+
`toc` markdown extension.
852+
"""
853+
src = REPO_ROOT / "CHANGELOG.md"
854+
if not src.exists():
855+
return None
856+
raw = src.read_text(encoding="utf-8")
857+
858+
# Pull the top H1 ("Changelog") and use it as the hero title; render
859+
# everything else as the body. Strip the leading H1 line to avoid a
860+
# duplicate title.
861+
body_md = raw
862+
lines = raw.splitlines()
863+
if lines and lines[0].lstrip().startswith("# "):
864+
body_md = "\n".join(lines[1:]).lstrip("\n")
865+
866+
content_html = md_to_html(body_md)
867+
868+
body = f"""<section class="section changelog-body">
869+
<div class="container narrow">
870+
<article class="article">
871+
{content_html}
872+
</article>
873+
</div>
874+
</section>
875+
</main>
876+
"""
877+
878+
page = (
879+
page_head(
880+
"Changelog — LLM Wiki",
881+
"Release notes for llmwiki — features, fixes, and version history.",
882+
css_prefix="",
883+
)
884+
+ nav_bar("changelog", link_prefix="")
885+
+ hero(
886+
"Changelog",
887+
"Every release, every fix. Keep-a-changelog format, semver.",
888+
)
889+
+ body
890+
+ page_foot(js_prefix="")
891+
)
892+
893+
out_path = out_dir / "changelog.html"
894+
out_path.write_text(page, encoding="utf-8")
895+
return out_path
896+
897+
842898
# ─── search index ──────────────────────────────────────────────────────────
843899

844900
def build_search_index(
@@ -1182,6 +1238,22 @@ def build_search_index(
11821238
.footer { padding: 32px 0; border-top: 1px solid var(--border); margin-top: 48px; background: var(--bg-alt); }
11831239
.footer p { font-size: 0.85rem; color: var(--text-muted); text-align: center; }
11841240
1241+
/* Changelog page — narrow reading column + keep-a-changelog typography */
1242+
.container.narrow { max-width: 760px; }
1243+
.changelog-body { padding: 40px 0 64px; }
1244+
.changelog-body .article h2 { margin-top: 48px; padding-bottom: 8px; border-bottom: 1px solid var(--border); font-size: 1.5rem; }
1245+
.changelog-body .article h2:first-child { margin-top: 0; }
1246+
.changelog-body .article h3 { margin-top: 28px; font-size: 1.1rem; color: var(--text-secondary); text-transform: uppercase; letter-spacing: 0.04em; }
1247+
.changelog-body .article h4 { margin-top: 20px; font-size: 0.98rem; }
1248+
.changelog-body .article ul { margin: 12px 0 20px; padding-left: 22px; }
1249+
.changelog-body .article li { margin: 6px 0; line-height: 1.6; }
1250+
.changelog-body .article li > code,
1251+
.changelog-body .article p > code { font-size: 0.86rem; padding: 1px 6px; background: var(--bg-code); border-radius: 4px; }
1252+
.changelog-body .article p { line-height: 1.7; }
1253+
.changelog-body .article a { color: var(--accent); }
1254+
.changelog-body .article hr { margin: 36px 0; border: 0; border-top: 1px solid var(--border); }
1255+
.changelog-body .article blockquote { margin: 16px 0; padding: 8px 16px; border-left: 3px solid var(--accent); color: var(--text-secondary); background: var(--bg-alt); border-radius: 0 4px 4px 0; }
1256+
11851257
/* v0.4: Related pages panel */
11861258
.related-pages { margin-top: 48px; padding-top: 24px; border-top: 1px solid var(--border); }
11871259
.related-pages h3 { font-size: 1.05rem; color: var(--text-secondary); margin-bottom: 12px; }
@@ -2243,7 +2315,11 @@ def build_site(
22432315
render_projects_index(groups, out_dir)
22442316
render_sessions_index(sources, groups, out_dir)
22452317
render_index(groups, sources, out_dir, synthesis=synthesis)
2246-
print(" wrote index.html, projects/index.html, sessions/index.html")
2318+
cl_path = render_changelog(out_dir)
2319+
print(
2320+
" wrote index.html, projects/index.html, sessions/index.html"
2321+
+ (", changelog.html" if cl_path else "")
2322+
)
22472323

22482324
# Search index
22492325
idx_path = build_search_index(sources, groups, out_dir)

llmwiki/convert.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,23 @@ def __len__(self) -> int:
197197
def parse_jsonl(path: Path) -> list[dict[str, Any]]:
198198
out: list[dict[str, Any]] = []
199199
try:
200-
with path.open(encoding="utf-8") as f:
200+
# ``errors="replace"`` lets us survive the occasional corrupt byte in a
201+
# session transcript (e.g. a truncated UTF-8 sequence from a killed
202+
# tool). Before the fix a single bad byte would abort the whole sync.
203+
with path.open(encoding="utf-8", errors="replace") as f:
201204
for line_no, line in enumerate(f, 1):
202205
line = line.strip()
203206
if not line:
204207
continue
205208
try:
206-
out.append(json.loads(line))
209+
rec = json.loads(line)
207210
except json.JSONDecodeError:
208-
pass
211+
continue
212+
# Only keep dict-shaped records. JSONL files occasionally
213+
# contain stray scalars (e.g. numbers, strings) from partial
214+
# writes, which used to crash downstream filter_records.
215+
if isinstance(rec, dict):
216+
out.append(rec)
209217
except OSError:
210218
pass
211219
return out
@@ -283,10 +291,26 @@ def __call__(self, text: str) -> str:
283291
return text
284292

285293

294+
def _close_open_fence(text: str) -> str:
295+
"""If ``text`` contains an odd number of ``\\`\\`\\``` fence markers,
296+
append a closing fence so downstream markdown parsers don't swallow the
297+
rest of the page as one giant code block. Counts only lines whose first
298+
non-whitespace characters are triple backticks (real fences, not inline
299+
code). See #72 — truncated tool results used to eat everything below them.
300+
"""
301+
fence_count = sum(
302+
1 for line in text.splitlines() if line.lstrip().startswith("```")
303+
)
304+
if fence_count % 2 == 1:
305+
return text + "\n```"
306+
return text
307+
308+
286309
def truncate_chars(text: str, max_chars: int) -> str:
287310
if not text or len(text) <= max_chars:
288311
return text
289-
return text[:max_chars] + f"\n…(truncated, {len(text) - max_chars} more chars)"
312+
kept = _close_open_fence(text[:max_chars])
313+
return kept + f"\n…(truncated, {len(text) - max_chars} more chars)"
290314

291315

292316
def truncate_lines(text: str, max_lines: int) -> str:
@@ -295,7 +319,7 @@ def truncate_lines(text: str, max_lines: int) -> str:
295319
lines = text.splitlines()
296320
if len(lines) <= max_lines:
297321
return text
298-
kept = "\n".join(lines[:max_lines])
322+
kept = _close_open_fence("\n".join(lines[:max_lines]))
299323
return kept + f"\n…(truncated, {len(lines) - max_lines} more lines)"
300324

301325

tests/test_changelog_page.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Tests for the first-class /changelog page (#72).
2+
3+
``llmwiki.build.render_changelog`` reads ``CHANGELOG.md`` at the repo root and
4+
renders it to ``site/changelog.html``. These tests pin the contract so a
5+
future refactor doesn't silently drop the page or mangle its layout.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
from pathlib import Path
11+
12+
import pytest
13+
14+
from llmwiki.build import render_changelog
15+
16+
17+
@pytest.fixture
18+
def tmp_out(tmp_path: Path) -> Path:
19+
out = tmp_path / "site"
20+
out.mkdir()
21+
return out
22+
23+
24+
def test_render_changelog_writes_file(tmp_out: Path):
25+
out = render_changelog(tmp_out)
26+
assert out is not None
27+
assert out.exists()
28+
assert out.name == "changelog.html"
29+
30+
31+
def test_render_changelog_contains_hero_and_nav(tmp_out: Path):
32+
render_changelog(tmp_out)
33+
html = (tmp_out / "changelog.html").read_text(encoding="utf-8")
34+
# Hero
35+
assert "Changelog" in html
36+
# Nav link is marked active
37+
assert 'class="active"' in html
38+
# No duplicate top-level "# Changelog" H1 inside article (we strip it
39+
# so the hero owns the title).
40+
assert html.count("<h1>Changelog</h1>") == 1
41+
42+
43+
def test_render_changelog_renders_markdown_headings(tmp_out: Path):
44+
render_changelog(tmp_out)
45+
html = (tmp_out / "changelog.html").read_text(encoding="utf-8")
46+
# Keep-a-changelog headings come through as <h2>
47+
assert "[Unreleased]" in html or "Unreleased" in html
48+
assert "<h2" in html
49+
50+
51+
def test_render_changelog_returns_none_when_missing(tmp_path: Path, monkeypatch):
52+
# Point REPO_ROOT at an empty tmp dir so CHANGELOG.md is missing.
53+
import llmwiki.build as build
54+
55+
empty = tmp_path / "empty_repo"
56+
empty.mkdir()
57+
monkeypatch.setattr(build, "REPO_ROOT", empty)
58+
out_dir = tmp_path / "site"
59+
out_dir.mkdir()
60+
assert render_changelog(out_dir) is None
61+
assert not (out_dir / "changelog.html").exists()
62+
63+
64+
def test_render_changelog_is_well_formed_html(tmp_out: Path):
65+
render_changelog(tmp_out)
66+
html = (tmp_out / "changelog.html").read_text(encoding="utf-8")
67+
assert html.startswith("<!DOCTYPE html>")
68+
assert html.rstrip().endswith("</html>")
69+
# Narrow reading column class applied so the changelog doesn't span
70+
# the full 1080px content width.
71+
assert "container narrow" in html

tests/test_convert.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,54 @@ def test_truncate_lines_long():
4343
assert "truncated" in out
4444

4545

46+
# ─── #72: code-fence balance preservation ───────────────────────────────
47+
# When truncate_chars / truncate_lines cuts mid-code-block, the opening
48+
# ``` must get a matching close fence so downstream markdown parsers
49+
# don't consume the entire rest of the page.
50+
51+
52+
def test_truncate_chars_closes_open_fence():
53+
src = "```\nline1\nline2\nline3\nline4\nline5\nline6\nline7\n"
54+
out = truncate_chars(src, 20)
55+
# fence count in the returned text should be even (open + auto-close)
56+
fences = [ln for ln in out.splitlines() if ln.lstrip().startswith("```")]
57+
assert len(fences) % 2 == 0
58+
assert len(fences) >= 2 # at least the original open + one close
59+
assert "truncated" in out
60+
61+
62+
def test_truncate_lines_closes_open_fence():
63+
src = "```\nroot/\n├── a\n├── b\n├── c\n├── d\n"
64+
out = truncate_lines(src, 3)
65+
fences = [ln for ln in out.splitlines() if ln.lstrip().startswith("```")]
66+
assert len(fences) % 2 == 0
67+
assert "truncated" in out
68+
69+
70+
def test_truncate_chars_balanced_fence_unchanged():
71+
# Already balanced ``` open + close — truncation should NOT add extras.
72+
src = "```\nshort\n```\nmore text that pushes over the char budget"
73+
out = truncate_chars(src, 20)
74+
fences = [ln for ln in out.splitlines() if ln.lstrip().startswith("```")]
75+
# Only the original two fences should be present; no phantom third.
76+
assert len(fences) == 2
77+
78+
79+
def test_truncate_chars_no_fence_no_change():
80+
# Plain text without any fence — no injected close.
81+
src = "a" * 100
82+
out = truncate_chars(src, 10)
83+
assert "```" not in out
84+
85+
86+
def test_truncate_chars_fenced_lang_marker():
87+
# Fence with a language marker (```python) must still be detected.
88+
src = "```python\n" + "x = 1\n" * 50
89+
out = truncate_chars(src, 30)
90+
fences = [ln for ln in out.splitlines() if ln.lstrip().startswith("```")]
91+
assert len(fences) % 2 == 0
92+
93+
4694
def test_redactor_username_in_path():
4795
config = {"redaction": {"real_username": "alice", "replacement_username": "USER", "extra_patterns": []}}
4896
r = Redactor(config)

0 commit comments

Comments
 (0)