-
Notifications
You must be signed in to change notification settings - Fork 153
fix config caching #1129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix config caching #1129
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
import {createHash} from "node:crypto"; | ||
import {existsSync, readFileSync} from "node:fs"; | ||
import {stat} from "node:fs/promises"; | ||
import op from "node:path"; | ||
|
@@ -8,7 +9,7 @@ import type MarkdownIt from "markdown-it"; | |
import {LoaderResolver} from "./dataloader.js"; | ||
import {visitMarkdownFiles} from "./files.js"; | ||
import {formatIsoDate, formatLocaleDate} from "./format.js"; | ||
import {createMarkdownIt, parseMarkdown} from "./markdown.js"; | ||
import {createMarkdownIt, parseMarkdownMetadata} from "./markdown.js"; | ||
import {isAssetPath, parseRelativeUrl, resolvePath} from "./path.js"; | ||
import {resolveTheme} from "./theme.js"; | ||
|
||
|
@@ -88,18 +89,29 @@ export async function readDefaultConfig(root?: string): Promise<Config> { | |
return normalizeConfig(await importConfig(tsPath), root); | ||
} | ||
|
||
let cachedPages: {key: string; pages: Page[]} | null = null; | ||
|
||
function readPages(root: string, md: MarkdownIt): Page[] { | ||
const pages: Page[] = []; | ||
const files: {file: string; source: string}[] = []; | ||
const hash = createHash("sha256"); | ||
for (const file of visitMarkdownFiles(root)) { | ||
if (file === "index.md" || file === "404.md") continue; | ||
const source = readFileSync(join(root, file), "utf8"); | ||
const parsed = parseMarkdown(source, {path: file, md}); | ||
files.push({file, source}); | ||
hash.update(file).update(source); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also tried hashing the mtime of the file instead of the source, which is slightly faster in the cached case. But I don’t think it’s fast enough that it matters, and this felt simpler. |
||
} | ||
const key = hash.digest("hex"); | ||
if (cachedPages?.key === key) return cachedPages.pages; | ||
const pages: Page[] = []; | ||
for (const {file, source} of files) { | ||
const parsed = parseMarkdownMetadata(source, {path: file, md}); | ||
if (parsed?.data?.draft) continue; | ||
const name = basename(file, ".md"); | ||
const page = {path: join("/", dirname(file), name), name: parsed.title ?? "Untitled"}; | ||
if (name === "index") pages.unshift(page); | ||
else pages.push(page); | ||
} | ||
cachedPages = {key, pages}; | ||
return pages; | ||
} | ||
|
||
|
@@ -109,7 +121,15 @@ export function setCurrentDate(date = new Date()): void { | |
currentDate = date; | ||
} | ||
|
||
// The config is used as a cache key for other operations; for example the pages | ||
// are used as a cache key for search indexing and the previous & next links in | ||
// the footer. When given the same spec (because import returned the same | ||
// module), we want to return the same Config instance. | ||
const configCache = new WeakMap<any, Config>(); | ||
|
||
export function normalizeConfig(spec: any = {}, defaultRoot = "docs"): Config { | ||
const cachedConfig = configCache.get(spec); | ||
if (cachedConfig) return cachedConfig; | ||
Comment on lines
+131
to
+132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you prefer this pattern over:
For performance maybe? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For TypeScript. (Though, it could be faster, too.) Previously you had declared the When we declare the cache as ![]() TypeScript isn’t smart enough to know that if |
||
let { | ||
root = defaultRoot, | ||
output = "dist", | ||
|
@@ -168,6 +188,7 @@ export function normalizeConfig(spec: any = {}, defaultRoot = "docs"): Config { | |
}; | ||
if (pages === undefined) Object.defineProperty(config, "pages", {get: () => readPages(root, md)}); | ||
if (sidebar === undefined) Object.defineProperty(config, "sidebar", {get: () => config.pages.length > 0}); | ||
configCache.set(spec, config); | ||
return config; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,11 @@ export function* visitMarkdownFiles(root: string): Generator<string> { | |
export function* visitFiles(root: string): Generator<string> { | ||
const visited = new Set<number>(); | ||
const queue: string[] = [(root = normalize(root))]; | ||
try { | ||
visited.add(statSync(join(root, ".observablehq")).ino); | ||
} catch { | ||
// ignore the .observablehq directory, if it exists | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that if you have a |
||
} | ||
for (const path of queue) { | ||
const status = statSync(path); | ||
if (status.isDirectory()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,8 +9,8 @@ import {parseMarkdown} from "./markdown.js"; | |
import {faint, strikethrough} from "./tty.js"; | ||
|
||
// Avoid reindexing too often in preview. | ||
const indexCache = new WeakMap(); | ||
const reindexingDelay = 10 * 60 * 1000; // 10 minutes | ||
const indexCache = new WeakMap<Config["pages"], {json: string; freshUntil: number}>(); | ||
const reindexDelay = 10 * 60 * 1000; // 10 minutes | ||
|
||
export interface SearchIndexEffects { | ||
logger: Logger; | ||
|
@@ -19,17 +19,18 @@ export interface SearchIndexEffects { | |
const defaultEffects: SearchIndexEffects = {logger: console}; | ||
|
||
const indexOptions = { | ||
fields: ["title", "text", "keywords"], | ||
fields: ["title", "text", "keywords"], // fields to return with search results | ||
storeFields: ["title"], | ||
processTerm(term) { | ||
return term.match(/\p{N}/gu)?.length > 6 ? null : term.slice(0, 15).toLowerCase(); // fields to return with search results | ||
processTerm(term: string) { | ||
return (term.match(/\p{N}/gu)?.length ?? 0) > 6 ? null : term.slice(0, 15).toLowerCase(); | ||
} | ||
}; | ||
|
||
export async function searchIndex(config: Config, effects = defaultEffects): Promise<string> { | ||
const {root, pages, search, md} = config; | ||
if (!search) return "{}"; | ||
if (indexCache.has(config) && indexCache.get(config).freshUntil > +new Date()) return indexCache.get(config).json; | ||
const cached = indexCache.get(pages); | ||
if (cached && cached.freshUntil > Date.now()) return cached.json; | ||
|
||
// Get all the listed pages (which are indexed by default) | ||
const pagePaths = new Set(["/index"]); | ||
|
@@ -84,7 +85,7 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro | |
) | ||
); | ||
|
||
indexCache.set(config, {json, freshUntil: +new Date() + reindexingDelay}); | ||
indexCache.set(pages, {json, freshUntil: Date.now() + reindexDelay}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cache is keyed by the |
||
return json; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ import {normalizeConfig as config, mergeToc, readConfig, setCurrentDate} from ". | |
import {LoaderResolver} from "../src/dataloader.js"; | ||
|
||
describe("readConfig(undefined, root)", () => { | ||
before(() => setCurrentDate(new Date("2024-01-11T01:02:03"))); | ||
before(() => setCurrentDate(new Date("2024-01-10T16:00:00"))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is to match the build tests — since the config is now cached, we want the footer to have the same data, so the behavior of the tests is the same independent of order. |
||
it("imports the config file at the specified root", async () => { | ||
const {md, loaders, ...config} = await readConfig(undefined, "test/input/build/config"); | ||
assert(md instanceof MarkdownIt); | ||
|
@@ -28,7 +28,7 @@ describe("readConfig(undefined, root)", () => { | |
head: "", | ||
header: "", | ||
footer: | ||
'Built with <a href="https://observablehq.com/" target="_blank">Observable</a> on <a title="2024-01-11T01:02:03">Jan 11, 2024</a>.', | ||
'Built with <a href="https://observablehq.com/" target="_blank">Observable</a> on <a title="2024-01-10T16:00:00">Jan 10, 2024</a>.', | ||
deploy: { | ||
workspace: "acme", | ||
project: "bi" | ||
|
@@ -54,7 +54,7 @@ describe("readConfig(undefined, root)", () => { | |
head: "", | ||
header: "", | ||
footer: | ||
'Built with <a href="https://observablehq.com/" target="_blank">Observable</a> on <a title="2024-01-11T01:02:03">Jan 11, 2024</a>.', | ||
'Built with <a href="https://observablehq.com/" target="_blank">Observable</a> on <a title="2024-01-10T16:00:00">Jan 10, 2024</a>.', | ||
deploy: null, | ||
search: false | ||
}); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m only storing a single value (the most recent value) in the cache for
readPages
, just to keep things simple. I don’t expect that there would be more than one set of pages needed simultaneously.