Skip to content

Commit 2933f89

Browse files
authored
feat(route/thinkingmachines): add news route for Thinking Machines Lab (#21609)
* route(thinkingmachines): add news route for Thinking Machines Lab Add route for Thinking Machines Lab (thinkingmachines.ai) news page. Founded by Mira Murati (ex-OpenAI CTO), the lab publishes news about their AI research and products. Closes #0 * fix: use import type and sort imports for oxlint * fix: correct import sort order per simple-import-sort * fix: strip title/author/pubDate heading from description, remove unnecessary try/catch Address review feedback: - Remove .post-heading block from description (title, author, pubDate have dedicated fields) - Remove try/catch wrapper (requests do not throw errors) * fix: remove article fallback and strip paginator from description
1 parent d8eaee5 commit 2933f89

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: 'Thinking Machines Lab',
5+
url: 'thinkingmachines.ai',
6+
};
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import { load } from 'cheerio';
2+
3+
import type { Route } from '@/types';
4+
import cache from '@/utils/cache';
5+
import ofetch from '@/utils/ofetch';
6+
import { parseDate } from '@/utils/parse-date';
7+
8+
export const route: Route = {
9+
path: '/news',
10+
name: 'News',
11+
url: 'thinkingmachines.ai/news',
12+
maintainers: ['w3nhao'],
13+
example: '/thinkingmachines/news',
14+
categories: ['programming'],
15+
features: {
16+
requireConfig: false,
17+
requirePuppeteer: false,
18+
antiCrawler: false,
19+
},
20+
radar: [
21+
{
22+
source: ['thinkingmachines.ai/news', 'thinkingmachines.ai/news/'],
23+
target: '/news',
24+
},
25+
],
26+
handler,
27+
};
28+
29+
async function handler() {
30+
const baseUrl = 'https://thinkingmachines.ai';
31+
const listUrl = `${baseUrl}/news/`;
32+
33+
const response = await ofetch(listUrl);
34+
const $ = load(response);
35+
36+
const items = $('main li a')
37+
.toArray()
38+
.map((el) => {
39+
const $el = $(el);
40+
const title = $el.find('.post-title').text().trim();
41+
const dateStr = $el.find('time.desktop-time').text().trim();
42+
const href = $el.attr('href') || '';
43+
const link = href.startsWith('http') ? href : `${baseUrl}${href}`;
44+
45+
return { title, dateStr, link };
46+
})
47+
.filter((item) => item.title && item.link);
48+
49+
const fullItems = await Promise.all(
50+
items.map((item) =>
51+
cache.tryGet(item.link, async () => {
52+
const articleResponse = await ofetch(item.link);
53+
const $article = load(articleResponse);
54+
55+
// Remove non-content elements
56+
$article('nav, footer, header, script, style').remove();
57+
// Remove heading (title, author, pubDate) and paginator
58+
$article('.post-heading, #post-prev-link, #post-next-link').remove();
59+
60+
const description = $article('main').html()?.trim() || '';
61+
62+
return {
63+
title: item.title,
64+
link: item.link,
65+
pubDate: parseDate(item.dateStr, 'MMM D, YYYY'),
66+
description,
67+
};
68+
})
69+
)
70+
);
71+
72+
return {
73+
title: 'Thinking Machines Lab - News',
74+
link: listUrl,
75+
item: fullItems,
76+
};
77+
}

0 commit comments

Comments
 (0)