diff --git a/edge-middleware/agent-analytics-starter/.env.example b/edge-middleware/agent-analytics-starter/.env.example new file mode 100644 index 0000000000..56afbc1130 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/.env.example @@ -0,0 +1,9 @@ +# PostHog project API key — the public key used by the JS SDK. Find it at: +# https://app.posthog.com/project/settings +NEXT_PUBLIC_POSTHOG_KEY= + +# PostHog ingestion host. Use: +# https://us.i.posthog.com — PostHog US cloud +# https://eu.i.posthog.com — PostHog EU cloud +# https://svc.yoursite.com — your own reverse-proxy (dodges ad-blockers) +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com diff --git a/edge-middleware/agent-analytics-starter/.eslintrc.json b/edge-middleware/agent-analytics-starter/.eslintrc.json new file mode 100644 index 0000000000..a2569c2c7c --- /dev/null +++ b/edge-middleware/agent-analytics-starter/.eslintrc.json @@ -0,0 +1,4 @@ +{ + "root": true, + "extends": "next/core-web-vitals" +} diff --git a/edge-middleware/agent-analytics-starter/.gitignore b/edge-middleware/agent-analytics-starter/.gitignore new file mode 100644 index 0000000000..bab93402c8 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/.gitignore @@ -0,0 +1,11 @@ +node_modules +.next +out +dist +.DS_Store +*.log +.env +.env.local +.env*.local +.vercel +.turbo diff --git a/edge-middleware/agent-analytics-starter/.npmrc b/edge-middleware/agent-analytics-starter/.npmrc new file mode 100644 index 0000000000..2d087c8dd0 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/.npmrc @@ -0,0 +1,2 @@ +# Enabled to avoid deps failing to use next@canary in the monorepo. +legacy-peer-deps=true diff --git a/edge-middleware/agent-analytics-starter/LICENSE b/edge-middleware/agent-analytics-starter/LICENSE new file mode 100644 index 0000000000..67c36bf9ad --- /dev/null +++ b/edge-middleware/agent-analytics-starter/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Apideck + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/edge-middleware/agent-analytics-starter/README.md b/edge-middleware/agent-analytics-starter/README.md new file mode 100644 index 0000000000..548312faea --- /dev/null +++ b/edge-middleware/agent-analytics-starter/README.md @@ -0,0 +1,181 @@ +--- +name: Agent Analytics Starter +slug: agent-analytics-starter +description: Track AI agent traffic (ClaudeBot, GPTBot, Perplexity, and 20+ more) in PostHog via middleware. Serves clean Markdown to agents on the same URLs — @apideck/agent-analytics wired in. +framework: Next.js +useCase: Edge Middleware +css: Plain CSS +deployUrl: https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fapideck-libraries%2Fagent-analytics-nextjs-starter&env=NEXT_PUBLIC_POSTHOG_KEY,NEXT_PUBLIC_POSTHOG_HOST&envDescription=PostHog%20project%20API%20key%20and%20host&envLink=https%3A%2F%2Fgithub.com%2Fapideck-libraries%2Fagent-analytics-nextjs-starter%23environment-variables&project-name=agent-analytics-starter&repository-name=agent-analytics-starter +demoUrl: https://agent-analytics-nextjs-starter.vercel.app +relatedTemplates: + - bot-protection-datadome + - bot-protection-botd +--- + +
+ +Agent Analytics — see the agents your JavaScript can't. + +# Agent Analytics — Next.js starter + +### Next.js 15 starter that tracks AI agent traffic in PostHog — drop in your API key, deploy, watch ClaudeBot show up in your dashboard. + +**One-click deploy.** Sample `/docs/` routes that serve as HTML to browsers and **clean Markdown to AI agents**. Every Markdown fetch fires a `doc_view` event with `is_ai_bot`, `source`, and `user_agent` — ready to segment in PostHog. + +[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fapideck-libraries%2Fagent-analytics-nextjs-starter&env=NEXT_PUBLIC_POSTHOG_KEY,NEXT_PUBLIC_POSTHOG_HOST&envDescription=PostHog%20project%20API%20key%20and%20host&envLink=https%3A%2F%2Fgithub.com%2Fapideck-libraries%2Fagent-analytics-nextjs-starter%23environment-variables&project-name=agent-analytics-starter&repository-name=agent-analytics-starter) + +[**Live demo**](https://agent-analytics-nextjs-starter.vercel.app) · [**@apideck/agent-analytics**](https://github.com/apideck-libraries/agent-analytics) · [**The pattern, explained**](https://addyosmani.com/blog/agentic-engine-optimization/) + +
+ +--- + +## What this template does + +AI crawlers don't run JavaScript — so your client-side analytics never see them. This template closes that gap: + +1. **`middleware.ts`** uses [`@apideck/agent-analytics`](https://www.npmjs.com/package/@apideck/agent-analytics) to detect 20+ known AI bots (ClaudeBot, GPTBot, PerplexityBot, Google-Extended, Applebot-Extended, Bytespider, Cursor, Windsurf, and more) and capture a `doc_view` event in PostHog on every request. +2. **`/docs/` routes** are served as clean Markdown when an agent asks (via `.md` suffix, `Accept: text/markdown`, or a known bot UA) — otherwise HTML. Same URL, two representations. +3. **Every Markdown response** carries `Content-Signal`, `Vary: accept`, and `x-markdown-tokens` headers so agents can budget context before parsing. + +Runs on Vercel's Fluid Compute. Zero infrastructure to manage, events land in PostHog seconds after deploy. + +## Quick start + +### 1. Deploy + +Click the Deploy button above, or: + +```bash +npx create-next-app --example https://github.com/apideck-libraries/agent-analytics-nextjs-starter my-app +cd my-app +vercel --prod +``` + +### 2. Set env vars + +In the Vercel deploy prompt (or your project's env settings): + +| Variable | Required | Example | +| -------------------------- | ------------------------- | --------------------------------------------------------------------------------- | +| `NEXT_PUBLIC_POSTHOG_KEY` | yes | `phc_xxxxxxxx` — from PostHog project settings | +| `NEXT_PUBLIC_POSTHOG_HOST` | no (defaults to US cloud) | `https://us.i.posthog.com`, `https://eu.i.posthog.com`, or your own reverse-proxy | + +If `NEXT_PUBLIC_POSTHOG_KEY` is absent the middleware silently no-ops — nothing breaks, events just don't flow. + +### 3. Verify + +```bash +# From your local terminal, pointed at the deployment: +curl -A "ClaudeBot/1.0 probe-$(date +%s)" https://.vercel.app/docs/intro +``` + +Open PostHog → Activity and filter events by `event = doc_view`. You should see one event with `is_ai_bot: true`, `source: ua-rewrite`, and the probe UA you sent. + +--- + +## How it works + +``` + Agent / Browser middleware.ts PostHog +──────────────── ──────────────────────────────────────── ────────── + │ │ + │ GET /docs/intro │ + │ Accept: text/markdown (or .md suffix, or AI-bot UA) │ + ├──────────────────────┐ │ + │ ▼ │ + │ markdownServeDecision(req) → reason │ + │ │ │ + │ ┌────────────────┴───────────────┐ │ + │ ▼ ▼ │ + │ trackDocView(req, { NextResponse.rewrite( │ + │ analytics, req.nextUrl → /md/... │ + │ source: reason, ) │ + │ properties: {...} │ + │ }) ────fire-and-forget──────keepalive fetch──►───────────► │ + │ │ + │ ◄──── 200 text/markdown ──────────────── │ + │ Content-Signal, x-markdown-tokens │ + │ │ +``` + +Key properties: + +- **Fire-and-forget** — the capture is non-blocking. `keepalive: true` lets it survive after the response returns. +- **No person profiles** — `$process_person_profile: false` tells PostHog not to create one per unique bot fingerprint. +- **Stable anon distinct_id** — djb2 hash of `ip:ua` collapses repeat fetches from the same agent into one visitor. + +## Structure + +``` +. +├── middleware.ts # The star of the show +├── app/ +│ ├── layout.tsx +│ ├── page.tsx # Landing page with probe instructions +│ ├── globals.css +│ └── docs/[slug]/page.tsx # Human-facing docs (HTML) +├── public/ +│ ├── md/docs/ +│ │ ├── intro.md # Agent-facing Markdown mirror +│ │ └── usage.md +│ └── llms.txt # Agent-friendly site index +├── .env.example +├── package.json +└── README.md +``` + +## Customising + +**Add a new `/docs/` page**: + +1. Create `public/md/docs/.md` — what agents see. +2. Add an entry to the `DOCS` object in `app/docs/[slug]/page.tsx` — what browsers see. +3. Update `public/llms.txt` so agents can discover it. + +**Extend the mirror to cover other routes** (e.g. `/blog/*`, `/guides/*`): + +Edit `resolveMirrorPath` in `middleware.ts`: + +```ts +function resolveMirrorPath(pathname: string): string | null { + if (pathname.startsWith('/docs/')) return `/md${pathname}.md` + if (pathname.startsWith('/blog/')) return `/md${pathname}.md` // ← add this + return null +} +``` + +Then create matching `public/md/blog/*.md` files. + +**Swap analytics backends** — replace the PostHog adapter with a webhook, Mixpanel, or your own callback: + +```ts +import { trackDocView, webhookAnalytics } from '@apideck/agent-analytics' + +const analytics = webhookAnalytics({ + url: 'https://collector.example.com/events', + headers: { Authorization: `Bearer ${process.env.COLLECTOR_TOKEN}` }, +}) +``` + +Any `{ capture(event) }` object is a valid adapter. + +## Environment variables + + + +| Variable | Description | +| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `NEXT_PUBLIC_POSTHOG_KEY` | PostHog project API key (the public key used by the JS SDK). Find it under _Project settings → Project API Key_. | +| `NEXT_PUBLIC_POSTHOG_HOST` | PostHog ingestion host. Defaults to `https://us.i.posthog.com`. Set to `https://eu.i.posthog.com` for EU cloud, or your own reverse-proxy domain (e.g. `https://svc.example.com`) to dodge ad-blockers. | + +## Learn more + +- [`@apideck/agent-analytics` on GitHub](https://github.com/apideck-libraries/agent-analytics) — the library powering this template +- [Agentic Engine Optimization](https://addyosmani.com/blog/agentic-engine-optimization/) — the case for agent-ready docs sites +- [contentsignals.org](https://contentsignals.org) — the `Content-Signal` header spec +- [developers.apideck.com](https://developers.apideck.com) — the production docs site this pattern was extracted from + +## License + +MIT © Apideck diff --git a/edge-middleware/agent-analytics-starter/app/docs/[slug]/page.tsx b/edge-middleware/agent-analytics-starter/app/docs/[slug]/page.tsx new file mode 100644 index 0000000000..e91daef53d --- /dev/null +++ b/edge-middleware/agent-analytics-starter/app/docs/[slug]/page.tsx @@ -0,0 +1,66 @@ +import Link from 'next/link' +import { notFound } from 'next/navigation' + +const DOCS: Record = { + intro: { + title: 'Intro', + intro: + 'This page is served as HTML for browsers, and as clean Markdown for AI agents. The switch happens in middleware.ts — no framework changes, no JSX-in-Markdown dance.', + }, + usage: { + title: 'Usage', + intro: + 'Spoof any of ClaudeBot, GPTBot, PerplexityBot, Google-Extended, Applebot-Extended, Cursor, or Windsurf as the User-Agent and you\u2019ll receive the Markdown mirror. A `doc_view` event fires into PostHog on every Markdown fetch.', + }, +} + +export function generateStaticParams() { + return Object.keys(DOCS).map((slug) => ({ slug })) +} + +export default async function DocPage({ + params, +}: { + params: Promise<{ slug: string }> +}) { + const { slug } = await params + const doc = DOCS[slug] + if (!doc) notFound() + + return ( +
+

+ ← Back +

+ human view (HTML) +

{doc.title}

+

{doc.intro}

+ +

See the agent view

+

+ Same URL, append .md or send{' '} + Accept: text/markdown: +

+
+        {`curl /docs/${slug}.md
+curl -H "Accept: text/markdown" /docs/${slug}
+curl -A "ClaudeBot/1.0" /docs/${slug}`}
+      
+ +

+ The agent gets the pre-built Markdown in{' '} + public/md/docs/{slug}.md, with{' '} + Content-Type: text/markdown, Content-Signal, + and x-markdown-tokens headers. A doc_view{' '} + event is captured in PostHog with source ={' '} + md-suffix, accept-header, or{' '} + ua-rewrite depending on how the agent arrived. +

+ +
+ Edit app/docs/[slug]/page.tsx to customize this template, + and public/md/docs/*.md to change what agents see. +
+
+ ) +} diff --git a/edge-middleware/agent-analytics-starter/app/globals.css b/edge-middleware/agent-analytics-starter/app/globals.css new file mode 100644 index 0000000000..365d15bb6f --- /dev/null +++ b/edge-middleware/agent-analytics-starter/app/globals.css @@ -0,0 +1,139 @@ +:root { + color-scheme: light dark; + --fg: #0a0a0a; + --bg: #ffffff; + --muted: #6b7280; + --border: #e5e7eb; + --accent: #2563eb; + --code-bg: #f6f8fa; +} + +@media (prefers-color-scheme: dark) { + :root { + --fg: #f5f5f5; + --bg: #0a0a0a; + --muted: #9ca3af; + --border: #27272a; + --accent: #60a5fa; + --code-bg: #111113; + } +} + +* { + box-sizing: border-box; +} + +html, +body { + margin: 0; + padding: 0; + background: var(--bg); + color: var(--fg); + font-family: + ui-sans-serif, + system-ui, + -apple-system, + 'Segoe UI', + Roboto, + sans-serif; + line-height: 1.55; + -webkit-font-smoothing: antialiased; +} + +main { + max-width: 760px; + margin: 0 auto; + padding: 4rem 1.5rem 6rem; +} + +h1 { + font-size: 2.25rem; + font-weight: 600; + letter-spacing: -0.02em; + margin: 0 0 0.5rem; +} + +h2 { + font-size: 1.25rem; + font-weight: 600; + letter-spacing: -0.01em; + margin: 2.5rem 0 0.75rem; +} + +p, +li { + color: var(--fg); +} + +.muted { + color: var(--muted); +} + +a { + color: var(--accent); + text-decoration: none; + border-bottom: 1px solid color-mix(in oklab, var(--accent) 40%, transparent); +} +a:hover { + border-bottom-color: var(--accent); +} + +pre { + background: var(--code-bg); + border: 1px solid var(--border); + border-radius: 8px; + padding: 1rem 1.25rem; + overflow-x: auto; + font-size: 0.85rem; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + line-height: 1.5; +} + +code { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 0.88em; + background: var(--code-bg); + border: 1px solid var(--border); + border-radius: 4px; + padding: 0.1em 0.35em; +} + +pre code { + background: transparent; + border: 0; + padding: 0; +} + +.badge { + display: inline-block; + padding: 0.15rem 0.6rem; + font-size: 0.72rem; + font-weight: 600; + letter-spacing: 0.04em; + text-transform: uppercase; + border-radius: 999px; + background: color-mix(in oklab, var(--accent) 12%, transparent); + color: var(--accent); + border: 1px solid color-mix(in oklab, var(--accent) 25%, transparent); +} + +ul { + padding-left: 1.25rem; +} +li { + margin: 0.2rem 0; +} + +hr { + border: 0; + border-top: 1px solid var(--border); + margin: 2.5rem 0; +} + +footer { + margin-top: 4rem; + padding-top: 1.5rem; + border-top: 1px solid var(--border); + font-size: 0.85rem; + color: var(--muted); +} diff --git a/edge-middleware/agent-analytics-starter/app/layout.tsx b/edge-middleware/agent-analytics-starter/app/layout.tsx new file mode 100644 index 0000000000..059aba2315 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/app/layout.tsx @@ -0,0 +1,22 @@ +import type { Metadata, Viewport } from 'next' +import type { ReactNode } from 'react' +import './globals.css' + +export const metadata: Metadata = { + title: 'Agent Analytics Starter', + description: + 'Next.js starter that tracks AI agent traffic (ClaudeBot, GPTBot, Perplexity, and more) in PostHog via @apideck/agent-analytics.', +} + +export const viewport: Viewport = { + width: 'device-width', + initialScale: 1, +} + +export default function RootLayout({ children }: { children: ReactNode }) { + return ( + + {children} + + ) +} diff --git a/edge-middleware/agent-analytics-starter/app/page.tsx b/edge-middleware/agent-analytics-starter/app/page.tsx new file mode 100644 index 0000000000..a960c22eb6 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/app/page.tsx @@ -0,0 +1,93 @@ +import Link from 'next/link' + +export default function Home() { + return ( +
+ agent-analytics starter +

See the agents your JavaScript can't.

+

+ This page was just tracked. If your NEXT_PUBLIC_POSTHOG_KEY{' '} + is set and an AI bot fetches it, you'll see a doc_view{' '} + event land in PostHog within seconds — with is_ai_bot: true + , the agent's UA, the path, and a stable anonymous distinct id. +

+ +

Try it

+

Spoof a bot UA against this deployment and watch PostHog:

+
+        {`# From the command line:
+curl -A "ClaudeBot/1.0" https://.vercel.app/
+
+# Then in PostHog → Activity, filter events by:
+#   event = doc_view  AND  is_ai_bot = true`}
+      
+ +

Markdown mirror demo

+

+ The /docs/ routes are served as Markdown when an agent asks + (either via Accept: text/markdown, a .md{' '} + suffix, or a known AI-bot user agent). +

+
    +
  • + /docs/intro — human view (HTML) +
  • +
  • + /docs/intro.md — agent view (Markdown) +
  • +
+
+        {`# Same page, two representations:
+curl https://.vercel.app/docs/intro.md
+curl -H "Accept: text/markdown" https://.vercel.app/docs/intro
+curl -A "ClaudeBot/1.0" https://.vercel.app/docs/intro`}
+      
+ +

What's in this template

+
    +
  • + middleware.ts — fires trackDocView on every + Markdown serve, plus the full Markdown-mirror rewrite logic +
  • +
  • + public/md/docs/ — pre-built Markdown files served to + agents +
  • +
  • + app/docs/[slug]/page.tsx — human-facing rendering of the + same content +
  • +
+ +

Next steps

+
    +
  • + Set NEXT_PUBLIC_POSTHOG_KEY in your Vercel project env + vars +
  • +
  • + Edit the resolveMirrorPath function in{' '} + middleware.ts to cover your own content routes +
  • +
  • + Add Markdown files to public/md/ to match +
  • +
  • + Browse the{' '} + + @apideck/agent-analytics docs + {' '} + for webhook / Mixpanel / Amplitude adapters +
  • +
+ + +
+ ) +} diff --git a/edge-middleware/agent-analytics-starter/middleware.ts b/edge-middleware/agent-analytics-starter/middleware.ts new file mode 100644 index 0000000000..e225461d48 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/middleware.ts @@ -0,0 +1,75 @@ +import { NextResponse, type NextRequest } from 'next/server' +import { trackVisit, posthogAnalytics } from '@apideck/agent-analytics' +import { + markdownServeDecision, + markdownHeaders, + synthesizeMarkdownPointer, +} from '@apideck/agent-analytics/markdown' + +const ORIGIN = process.env.NEXT_PUBLIC_SITE_ORIGIN || 'http://localhost:3000' + +const analytics = posthogAnalytics({ + apiKey: process.env.NEXT_PUBLIC_POSTHOG_KEY || '', + host: process.env.NEXT_PUBLIC_POSTHOG_HOST, +}) + +// Map public URLs to pre-built Markdown files under /md/. Extend this as you +// add more content — any path not covered here gets a synthesized pointer +// document so the Accept: text/markdown contract holds site-wide. +function resolveMirrorPath(pathname: string): string | null { + if (pathname.startsWith('/docs/')) return `/md${pathname}.md` + return null +} + +export function middleware(req: NextRequest) { + const { pathname } = req.nextUrl + + if (pathname.startsWith('/md/') || pathname.startsWith('/_next/')) { + return NextResponse.next() + } + + const decision = markdownServeDecision(req) + + if (decision) { + // Track every Markdown fetch with the source label (ua-rewrite, + // md-suffix, accept-header). Errors are swallowed — analytics can + // never break the response. + void trackVisit(req, { + analytics, + source: decision.reason, + properties: { site: 'starter' }, + }) + + const target = resolveMirrorPath(decision.strippedPath) + if (target) { + const url = req.nextUrl.clone() + url.pathname = target + const response = NextResponse.rewrite(url) + for (const [k, v] of Object.entries(markdownHeaders())) { + response.headers.set(k, v) + } + return response + } + + // No mirror for this path — return a pointer document so agents get + // something parseable instead of HTML or a 404. + const body = synthesizeMarkdownPointer({ + origin: ORIGIN, + pathname: decision.strippedPath, + llmsTxtUrl: `${ORIGIN}/llms.txt`, + }) + return new NextResponse(body, { + status: 200, + headers: markdownHeaders({ tokens: Math.ceil(body.length / 4) }), + }) + } + + // Plain HTML response — nothing special to do. + return NextResponse.next() +} + +export const config = { + matcher: [ + '/((?!_next/|md/|api/|favicon\\.ico|.*\\.(?:js|mjs|css|png|jpe?g|gif|svg|webp|avif|ico|woff2?|ttf|eot|map|json|xml)).*)', + ], +} diff --git a/edge-middleware/agent-analytics-starter/next-env.d.ts b/edge-middleware/agent-analytics-starter/next-env.d.ts new file mode 100644 index 0000000000..830fb594ca --- /dev/null +++ b/edge-middleware/agent-analytics-starter/next-env.d.ts @@ -0,0 +1,6 @@ +/// +/// +/// + +// NOTE: This file should not be edited +// see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/edge-middleware/agent-analytics-starter/next.config.js b/edge-middleware/agent-analytics-starter/next.config.js new file mode 100644 index 0000000000..767719fc4f --- /dev/null +++ b/edge-middleware/agent-analytics-starter/next.config.js @@ -0,0 +1,4 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = {} + +module.exports = nextConfig diff --git a/edge-middleware/agent-analytics-starter/package.json b/edge-middleware/agent-analytics-starter/package.json new file mode 100644 index 0000000000..fd1261228d --- /dev/null +++ b/edge-middleware/agent-analytics-starter/package.json @@ -0,0 +1,25 @@ +{ + "name": "agent-analytics-starter", + "version": "0.1.0", + "private": true, + "license": "MIT", + "description": "Next.js starter that tracks AI agent traffic (ClaudeBot, GPTBot, PerplexityBot, and more) in PostHog via @apideck/agent-analytics.", + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "@apideck/agent-analytics": "^0.3.0", + "next": "^15.0.0", + "react": "^19.0.0", + "react-dom": "^19.0.0" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "@types/react": "^19.0.0", + "@types/react-dom": "^19.0.0", + "typescript": "^5.6.0" + } +} diff --git a/edge-middleware/agent-analytics-starter/public/hero.svg b/edge-middleware/agent-analytics-starter/public/hero.svg new file mode 100644 index 0000000000..edff2c2cd5 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/public/hero.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @APIDECK/AGENT-ANALYTICS + + + + See the agents your + JavaScript can't. + + + Drop-in Next.js middleware that tracks ClaudeBot, GPTBot, + Perplexity, and 20+ AI crawlers in PostHog — or any analytics. + + + + ClaudeBot + GPTBot + PerplexityBot + Google-Extended + Cursor + + + + Windsurf + Applebot + Meta-ExternalAgent + Bytespider + + 12 + + + + + + + + + + middleware.ts + + + + import { + trackDocView, posthogAnalytics + } from '@apideck/agent-analytics' + + const analytics = posthogAnalytics({ + apiKey: process.env.POSTHOG_KEY + }) + + export function middleware(req) { + void trackDocView(req, { analytics }) + return NextResponse.next() + } + + + + + + + zero deps + · node 18+ · edge · bun + · postHog · webhook · custom + · markdown mirror helpers + + + diff --git a/edge-middleware/agent-analytics-starter/public/llms.txt b/edge-middleware/agent-analytics-starter/public/llms.txt new file mode 100644 index 0000000000..0139314216 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/public/llms.txt @@ -0,0 +1,14 @@ +# Agent Analytics Starter + +> A Next.js starter that tracks AI agent traffic in PostHog via @apideck/agent-analytics. +> Every page is reachable as clean Markdown at the same URL; every Markdown fetch fires a doc_view event. + +## Docs + +- [Intro](/docs/intro.md): How the Markdown mirror works and how agents ask for it +- [Usage](/docs/usage.md): Full request lifecycle, event shape, and customisation steps + +## Related + +- [@apideck/agent-analytics on GitHub](https://github.com/apideck-libraries/agent-analytics): The library that powers this template +- [Agentic Engine Optimization](https://addyosmani.com/blog/agentic-engine-optimization/): The case for agent-ready docs diff --git a/edge-middleware/agent-analytics-starter/public/md/docs/intro.md b/edge-middleware/agent-analytics-starter/public/md/docs/intro.md new file mode 100644 index 0000000000..dc2443e8b1 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/public/md/docs/intro.md @@ -0,0 +1,36 @@ +# Intro + +This page is served as **clean Markdown** when an AI agent asks for it. + +## How to ask for it + +Any of the following produce this Markdown response: + +```bash +# .md suffix +curl https://your-deployment.vercel.app/docs/intro.md + +# Accept header +curl -H "Accept: text/markdown" https://your-deployment.vercel.app/docs/intro + +# AI-bot user agent +curl -A "ClaudeBot/1.0" https://your-deployment.vercel.app/docs/intro +``` + +## What the middleware does + +1. Detects the request as agent-bound (by suffix, header, or UA). +2. Rewrites the route to `/md/docs/intro.md` under the hood — same URL, different body. +3. Sets `Content-Type: text/markdown`, `Content-Signal: search=yes, ai-input=yes, ai-train=no`, and `x-markdown-tokens` on the response. +4. Fires a `doc_view` event to PostHog (fire-and-forget, `keepalive: true`). + +## Agent-friendly defaults + +- No sidebar, footer, or JS bundle — just content. +- `Content-Signal` mirrors `robots.txt` so policy is consistent whether the agent reads it or not. +- `x-markdown-tokens` gives agents a context-budget hint before they parse the body. +- `distinct_id` in PostHog is a hash of `ip:ua` — same agent collapses into one anon visitor, no person profile is created. + +## Next + +See [/docs/usage](/docs/usage.md) for a walkthrough of the full request flow. diff --git a/edge-middleware/agent-analytics-starter/public/md/docs/usage.md b/edge-middleware/agent-analytics-starter/public/md/docs/usage.md new file mode 100644 index 0000000000..61aa8a4720 --- /dev/null +++ b/edge-middleware/agent-analytics-starter/public/md/docs/usage.md @@ -0,0 +1,65 @@ +# Usage + +Full request lifecycle when an agent hits this deployment. + +## Detection triggers + +The middleware recognises three ways an agent can ask for Markdown: + +| Trigger | Example | `source` property on the event | +| ------------------------------ | --------------------------------------------- | ------------------------------ | +| Known AI-bot UA on any URL | `curl -A "ClaudeBot/1.0" /docs/intro` | `ua-rewrite` | +| `.md` suffix on the URL | `curl /docs/intro.md` | `md-suffix` | +| `Accept: text/markdown` header | `curl -H "Accept: text/markdown" /docs/intro` | `accept-header` | + +URLs without a pre-built mirror (anything outside `/docs/`) get a synthesised pointer document instead of a 404, so the contract still holds — `source` becomes `accept-header-synthesized`. + +## Event shape + +Every Markdown fetch fires one event into PostHog: + +```jsonc +{ + "event": "doc_view", + "distinct_id": "anon_7f3a1b2c", + "timestamp": "2026-04-19T08:30:00.000Z", + "properties": { + "$process_person_profile": false, + "$current_url": "https://your-deployment.vercel.app/docs/usage", + "path": "/docs/usage", + "user_agent": "ClaudeBot/1.0 (+https://claude.ai/bot)", + "is_ai_bot": true, + "referer": "https://claude.ai/", + "source": "ua-rewrite", + "site": "starter", + }, +} +``` + +## Customising the content + +1. **Add a slug**: create `public/md/docs/.md` with the Markdown you want agents to receive. +2. **Add a human view**: add an entry to the `DOCS` object in `app/docs/[slug]/page.tsx` so browsers get a matching HTML page. +3. **Extend coverage**: edit `resolveMirrorPath` in `middleware.ts` to cover paths outside `/docs/` (e.g. `/guides/`, `/api/`, `/blog/`). + +## Configuring PostHog + +Set these environment variables in your Vercel project: + +- `NEXT_PUBLIC_POSTHOG_KEY` — your project API key from PostHog settings +- `NEXT_PUBLIC_POSTHOG_HOST` — either `https://us.i.posthog.com`, `https://eu.i.posthog.com`, or your own reverse-proxy domain + +If `NEXT_PUBLIC_POSTHOG_KEY` is absent the middleware silently no-ops on capture — no errors, responses unaffected. + +## Swapping backends + +Replace the PostHog adapter in `middleware.ts` with any backend by importing `webhookAnalytics` or `customAnalytics` from `@apideck/agent-analytics`: + +```ts +import { webhookAnalytics } from '@apideck/agent-analytics' + +const analytics = webhookAnalytics({ + url: 'https://collector.example.com/events', + headers: { Authorization: `Bearer ${process.env.COLLECTOR_TOKEN}` }, +}) +``` diff --git a/edge-middleware/agent-analytics-starter/tsconfig.json b/edge-middleware/agent-analytics-starter/tsconfig.json new file mode 100644 index 0000000000..afedc7443c --- /dev/null +++ b/edge-middleware/agent-analytics-starter/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [{ "name": "next" }], + "paths": { "@/*": ["./*"] } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/edge-middleware/agent-analytics-starter/turbo.json b/edge-middleware/agent-analytics-starter/turbo.json new file mode 100644 index 0000000000..7492a7f2cd --- /dev/null +++ b/edge-middleware/agent-analytics-starter/turbo.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turborepo.com/schema.json", + "pipeline": { + "build": { + "outputs": [".next/**", "!.next/cache/**"] + }, + "lint": {} + } +}