-
Notifications
You must be signed in to change notification settings - Fork 9.5k
feat(route): Add Archdaily Search Support #21511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 8 commits
a686fc5
dbce373
976c6c8
63bfdf6
41656ee
dbd5515
8ba793a
31231d0
02f633a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| import type { Namespace } from '@/types'; | ||
|
|
||
| export const namespace: Namespace = { | ||
| name: 'ArchDaily', | ||
| url: 'archdaily.com', | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,222 @@ | ||
| import { load } from 'cheerio'; | ||
|
|
||
| import type { Route } from '@/types'; | ||
| import cache from '@/utils/cache'; | ||
| import got from '@/utils/got'; | ||
| import ofetch from '@/utils/ofetch'; | ||
| import { parseDate } from '@/utils/parse-date'; | ||
|
|
||
| export const route: Route = { | ||
| path: '/search/:category/:search', | ||
| categories: ['journal'], | ||
| example: '/archdaily/search/projects/Urban Design', | ||
| parameters: { | ||
| category: 'The category to search in, including "all", "projects", "products", "folders", "articles", "competitions" and "events"', | ||
| search: 'The search query', | ||
| }, | ||
| features: { | ||
| requireConfig: false, | ||
| }, | ||
| radar: [ | ||
| { | ||
| source: ['www.archdaily.com/search/:category'], | ||
| target: '/search/:category/:search', | ||
| }, | ||
| ], | ||
| name: 'Search', | ||
| maintainers: ['Friday_Anubis'], | ||
| handler, | ||
| }; | ||
|
|
||
| async function handler(ctx) { | ||
| const { category, search } = ctx.req.param(); | ||
|
|
||
| const baseUrl = 'https://www.archdaily.com'; | ||
| const allowedCategories = new Set(['all', 'projects', 'products', 'folders', 'articles', 'competitions', 'events']); | ||
| const finalCategory = allowedCategories.has(category) ? category : 'all'; | ||
|
|
||
| if (finalCategory === 'competitions' || finalCategory === 'events') { | ||
| const pageCategory = finalCategory; | ||
| const listingUrl = `${baseUrl}/search/${pageCategory}/text/${encodeURIComponent(search)}`; | ||
| const { data: response } = await got(listingUrl); | ||
| const $ = load(response); | ||
|
|
||
| const seen = new Set<string>(); | ||
| const list = $('li.afd-search-list__item') | ||
| .toArray() | ||
| .map((item) => { | ||
| const element = $(item); | ||
| const linkElement = element.find('a.afd-search-list__link').first(); | ||
| const href = linkElement.attr('href'); | ||
| const titleElement = element.find('h2.afd-search-list__title').first(); | ||
| const imageElement = element.find('img.afd-search-list__img').first(); | ||
|
|
||
| const title = titleElement.text().trim() || imageElement.attr('alt')?.trim(); | ||
| if (!href || !title) { | ||
| return; | ||
| } | ||
|
|
||
| const link = normalizeArchdailyLink(href, baseUrl); | ||
| if (!link || !link.startsWith('https://www.archdaily.com/') || link.includes('/search/') || seen.has(link)) { | ||
| return; | ||
| } | ||
| seen.add(link); | ||
|
|
||
| const articleId = getArchdailyArticleId(link); | ||
|
|
||
| return { | ||
| title, | ||
| link, | ||
| image: normalizeImageUrl(imageElement.attr('src')), | ||
| guid: articleId ? `archdaily-${pageCategory}-${articleId}` : `${pageCategory}:${link}`, | ||
| }; | ||
| }) | ||
| .filter(Boolean); | ||
|
|
||
| const items = await Promise.all( | ||
| list.map((item) => | ||
| cache.tryGet(item.guid, async () => { | ||
| const detail = await getCompetitionMeta(item.link); | ||
|
|
||
| return { | ||
| title: item.title, | ||
| link: item.link, | ||
| guid: item.guid, | ||
| description: `${item.image ? `<img src="${item.image}"><br>` : ''}${detail.description ?? ''}`, | ||
| pubDate: detail.pubDate, | ||
| }; | ||
| }) | ||
| ) | ||
| ); | ||
|
|
||
| return { | ||
| title: `ArchDaily - ${search} in ${pageCategory}`, | ||
| link: listingUrl, | ||
| description: `Search results for "${search}" in ${pageCategory} on ArchDaily`, | ||
| item: items, | ||
| }; | ||
| } | ||
|
|
||
| const response = await ofetch<{ results?: any[] }>(`${baseUrl}/search/api/v1/us/${finalCategory}?q=${encodeURIComponent(search)}`); | ||
|
|
||
| const items = (response?.results ?? []) | ||
| .map((item) => { | ||
| const title = item?.title || item?.name; | ||
| const link = item?.url; | ||
| if (!title || !link) { | ||
| return; | ||
| } | ||
|
|
||
| if (finalCategory === 'folders') { | ||
| const images = (item?.images ?? []).map((image) => normalizeImageUrl(image)).filter(Boolean); | ||
| const author = item?.user?.slug_name; | ||
| const profileUrl = item?.user?.profile_url; | ||
| const folderTitle = author ? `${title} by ${author}` : title; | ||
|
|
||
| return { | ||
| title: folderTitle, | ||
| link, | ||
| guid: item?.id ? `archdaily-folder-${item.id}` : undefined, | ||
| description: [ | ||
| `<p><strong>${folderTitle}</strong></p>`, | ||
| item?.last_update ? `<p>Updated: ${item.last_update}</p>` : undefined, | ||
| profileUrl ? `<p><a href="${profileUrl}">Uploader Profile</a></p>` : undefined, | ||
| images.map((image) => `<img src="${image}">`).join('<br>'), | ||
| ] | ||
| .filter(Boolean) | ||
| .join(''), | ||
| pubDate: item?.last_update ? parseDate(item.last_update) : undefined, | ||
| author, | ||
| category: ['folders', title], | ||
| }; | ||
| } | ||
|
|
||
| const image = normalizeImageUrl(item?.featured_images?.url_large || item?.featured_images?.url_medium || item?.featured_images?.url_small); | ||
| const itemCategory = (item?.tags ?? []).map((tag) => tag?.name).filter(Boolean); | ||
|
|
||
| return { | ||
| title, | ||
| link, | ||
| description: `${image ? `<img src="${image}"><br>` : ''}${item?.meta_description ?? ''}`, | ||
| pubDate: item?.publication_date ? parseDate(item.publication_date) : undefined, | ||
| author: item?.author?.name, | ||
| category: itemCategory, | ||
| }; | ||
| }) | ||
| .filter(Boolean); | ||
|
|
||
| const seenOutput = new Set<string>(); | ||
| const dedupedItems = items.filter((item) => { | ||
| const key = item?.guid || item?.link; | ||
| if (!key || seenOutput.has(key)) { | ||
| return false; | ||
| } | ||
| seenOutput.add(key); | ||
| return true; | ||
| }); | ||
|
|
||
| return { | ||
| title: `ArchDaily - ${search}${finalCategory === 'all' ? '' : ` in ${finalCategory}`}`, | ||
| link: `${baseUrl}/search/${finalCategory}?q=${encodeURIComponent(search)}`, | ||
| description: `Search results for "${search}" on ArchDaily`, | ||
| item: dedupedItems, | ||
| }; | ||
| } | ||
|
|
||
| function normalizeImageUrl(url?: string) { | ||
| if (!url) { | ||
| return; | ||
| } | ||
| if (url.startsWith('//')) { | ||
| return `https:${url}`; | ||
| } | ||
| return url; | ||
| } | ||
|
|
||
| function normalizeArchdailyLink(url: string, baseUrl: string) { | ||
|
||
| const normalize = (input: string) => { | ||
| const parsed = new URL(input); | ||
| parsed.search = ''; | ||
| parsed.hash = ''; | ||
| return parsed.toString(); | ||
| }; | ||
|
|
||
| if (url.startsWith('/')) { | ||
| return normalize(`${baseUrl}${url}`); | ||
| } | ||
| if (url.startsWith('https://www.archdaily.com/')) { | ||
| return normalize(url); | ||
| } | ||
| } | ||
|
|
||
| function getArchdailyArticleId(link: string) { | ||
| const matched = link.match(/archdaily\.com\/(\d+)\//); | ||
| return matched ? matched[1] : undefined; | ||
| } | ||
|
|
||
| async function getCompetitionMeta(link: string) { | ||
| const { data } = await got(link); | ||
| const $ = load(data); | ||
|
|
||
| const publishedTime = | ||
| $('meta[property="article:published_time"]').attr('content') ?? | ||
| $('script[type="application/ld+json"]') | ||
| .toArray() | ||
| .map((script) => $(script).text()) | ||
| .map((jsonText) => { | ||
| try { | ||
| return JSON.parse(jsonText); | ||
| } catch { | ||
| return; | ||
| } | ||
| }) | ||
| .flatMap((entry) => (Array.isArray(entry) ? entry : [entry])) | ||
| .find((entry) => entry && typeof entry === 'object' && entry.datePublished)?.datePublished; | ||
|
Comment on lines
+208
to
+219
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary toArray/map/flatMap/find. All of the pages in the examples provided have only 1 |
||
|
|
||
| const description = $('meta[property="og:description"]').attr('content') || $('meta[name="description"]').attr('content'); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you've visited the web page with the full article, return it ( |
||
|
|
||
| return { | ||
| pubDate: publishedTime ? parseDate(publishedTime) : undefined, | ||
| description, | ||
| }; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do not place
titleandpubDateindescription. They belong to their dedicated fields.