-
Notifications
You must be signed in to change notification settings - Fork 9.5k
feat(route): Add Archdaily Search Support #21511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
a686fc5
dbce373
976c6c8
63bfdf6
41656ee
dbd5515
8ba793a
31231d0
02f633a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| import type { Namespace } from '@/types'; | ||
|
|
||
| export const namespace: Namespace = { | ||
| name: 'ArchDaily', | ||
| url: 'archdaily.com', | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,227 @@ | ||
| import { load } from 'cheerio'; | ||
|
|
||
| import type { DataItem, Route } from '@/types'; | ||
| import cache from '@/utils/cache'; | ||
| import got from '@/utils/got'; | ||
| import ofetch from '@/utils/ofetch'; | ||
| import { parseDate } from '@/utils/parse-date'; | ||
|
|
||
| const baseUrl = 'https://www.archdaily.com'; | ||
| const archdailyArticlePrefix = 'https://www.archdaily.com/'; | ||
| const allowedCategories = new Set(['all', 'projects', 'products', 'folders', 'articles', 'competitions', 'events']); | ||
|
|
||
| export const route: Route = { | ||
| path: '/search/:category/:search', | ||
| categories: ['journal'], | ||
| example: '/archdaily/search/projects/Urban Design', | ||
| parameters: { | ||
| category: 'The category to search in, including "all", "projects", "products", "folders", "articles", "competitions" and "events"', | ||
| search: 'The search query', | ||
| }, | ||
| features: { | ||
| requireConfig: false, | ||
| }, | ||
| radar: [ | ||
| { | ||
| source: ['www.archdaily.com/search/:category'], | ||
| target: '/search/:category/:search', | ||
| }, | ||
| ], | ||
| name: 'Search', | ||
| maintainers: ['Friday_Anubis'], | ||
| handler, | ||
| }; | ||
|
|
||
| async function handler(ctx) { | ||
| const { category, search } = ctx.req.param(); | ||
|
|
||
| const finalCategory = allowedCategories.has(category) ? category : 'all'; | ||
|
|
||
| if (finalCategory === 'competitions' || finalCategory === 'events') { | ||
| const pageCategory = finalCategory; | ||
| const listingUrl = `${baseUrl}/search/${pageCategory}/text/${encodeURIComponent(search)}`; | ||
| const { data: response } = await got(listingUrl); | ||
| const $ = load(response); | ||
|
|
||
| const seen = new Set<string>(); | ||
| const list = $('li.afd-search-list__item') | ||
| .toArray() | ||
| .flatMap((item) => { | ||
| const element = $(item); | ||
| const linkElement = element.find('a.afd-search-list__link').first(); | ||
| const href = linkElement.attr('href'); | ||
| const titleElement = element.find('h2.afd-search-list__title').first(); | ||
| const imageElement = element.find('img.afd-search-list__img').first(); | ||
|
|
||
| const title = titleElement.text().trim() || imageElement.attr('alt')?.trim(); | ||
| if (!href || !title) { | ||
| return []; | ||
| } | ||
|
|
||
| const link = normalizeArchdailyLink(href, baseUrl); | ||
| if (!link || !link.startsWith(archdailyArticlePrefix) || link.includes('/search/') || seen.has(link)) { | ||
| return []; | ||
| } | ||
| seen.add(link); | ||
|
|
||
| const articleId = getArchdailyArticleId(link); | ||
|
|
||
| return [ | ||
| { | ||
| title, | ||
| link, | ||
| image: normalizeImageUrl(imageElement.attr('src')), | ||
| guid: articleId ? `archdaily-${pageCategory}-${articleId}` : `${pageCategory}:${link}`, | ||
| }, | ||
| ]; | ||
| }); | ||
|
|
||
| const items: DataItem[] = await Promise.all( | ||
| list.map((item) => | ||
| cache.tryGet(item.guid, async () => { | ||
| const detail = await getCompetitionMeta(item.link); | ||
|
|
||
| return { | ||
| title: item.title, | ||
| link: item.link, | ||
| guid: item.guid, | ||
| description: `${item.image ? `<img src="${item.image}"><br>` : ''}${detail.description ?? ''}`, | ||
| pubDate: detail.pubDate, | ||
| }; | ||
| }) | ||
| ) | ||
| ); | ||
|
|
||
| return { | ||
| title: `ArchDaily - ${search} in ${pageCategory}`, | ||
| link: listingUrl, | ||
| description: `Search results for "${search}" in ${pageCategory} on ArchDaily`, | ||
| item: items, | ||
| }; | ||
| } | ||
|
|
||
| const response = await ofetch<{ results?: any[] }>(`${baseUrl}/search/api/v1/us/${finalCategory}?q=${encodeURIComponent(search)}`); | ||
|
|
||
| const items: DataItem[] = (response?.results ?? []).flatMap((item) => { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary use of |
||
| const title = item?.title || item?.name; | ||
| const link = item?.url; | ||
| if (!title || !link) { | ||
| return []; | ||
| } | ||
|
|
||
| if (finalCategory === 'folders') { | ||
| const images = (item?.images ?? []).map((image) => normalizeImageUrl(image)).filter(Boolean); | ||
| const author = item?.user?.slug_name; | ||
| const profileUrl = item?.user?.profile_url; | ||
| const folderTitle = author ? `${title} by ${author}` : title; | ||
|
|
||
| return [ | ||
| { | ||
| title: folderTitle, | ||
| link, | ||
| guid: item?.id ? `archdaily-folder-${item.id}` : undefined, | ||
| description: [ | ||
| `<p><strong>${folderTitle}</strong></p>`, | ||
| item?.last_update ? `<p>Updated: ${item.last_update}</p>` : undefined, | ||
|
Comment on lines
+124
to
+125
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not place |
||
| profileUrl ? `<p><a href="${profileUrl}">Uploader Profile</a></p>` : undefined, | ||
| images.map((image) => `<img src="${image}">`).join('<br>'), | ||
| ] | ||
| .filter(Boolean) | ||
| .join(''), | ||
| pubDate: item?.last_update ? parseDate(item.last_update) : undefined, | ||
| author, | ||
| category: ['folders', title], | ||
| }, | ||
| ]; | ||
| } | ||
|
|
||
| const image = normalizeImageUrl(item?.featured_images?.url_large || item?.featured_images?.url_medium || item?.featured_images?.url_small); | ||
| const itemCategory = (item?.tags ?? []).map((tag) => tag?.name).filter(Boolean); | ||
|
|
||
| return [ | ||
| { | ||
| title, | ||
| link, | ||
| description: `${image ? `<img src="${image}"><br>` : ''}${item?.meta_description ?? ''}`, | ||
| pubDate: item?.publication_date ? parseDate(item.publication_date) : undefined, | ||
| author: item?.author?.name, | ||
| category: itemCategory, | ||
| }, | ||
| ]; | ||
| }); | ||
|
|
||
| const seenOutput = new Set<string>(); | ||
| const dedupedItems = items.filter((item) => { | ||
| const key = item.guid || item.link; | ||
| if (!key || seenOutput.has(key)) { | ||
| return false; | ||
| } | ||
| seenOutput.add(key); | ||
| return true; | ||
| }); | ||
|
|
||
| return { | ||
| title: `ArchDaily - ${search}${finalCategory === 'all' ? '' : ` in ${finalCategory}`}`, | ||
| link: `${baseUrl}/search/${finalCategory}?q=${encodeURIComponent(search)}`, | ||
| description: `Search results for "${search}" on ArchDaily`, | ||
| item: dedupedItems, | ||
| }; | ||
| } | ||
|
|
||
| function normalizeImageUrl(url?: string) { | ||
| if (!url) { | ||
| return; | ||
| } | ||
| if (url.startsWith('//')) { | ||
| return `https:${url}`; | ||
| } | ||
| return url; | ||
| } | ||
|
|
||
| function normalizeArchdailyLink(url: string, baseUrl: string) { | ||
|
||
| if (url.startsWith('/')) { | ||
| return normalizeUrlWithoutSearchAndHash(`${baseUrl}${url}`); | ||
| } | ||
| if (url.startsWith(archdailyArticlePrefix)) { | ||
| return normalizeUrlWithoutSearchAndHash(url); | ||
| } | ||
| } | ||
|
|
||
| function normalizeUrlWithoutSearchAndHash(input: string) { | ||
| const parsed = new URL(input); | ||
| parsed.search = ''; | ||
| parsed.hash = ''; | ||
| return parsed.toString(); | ||
| } | ||
|
Comment on lines
+181
to
+195
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary normalisation. None of the links from have search queries or hash in their URLs. |
||
|
|
||
| function getArchdailyArticleId(link: string) { | ||
| const matched = link.match(/archdaily\.com\/(\d+)\//); | ||
| return matched ? matched[1] : undefined; | ||
| } | ||
|
|
||
| async function getCompetitionMeta(link: string) { | ||
| const { data } = await got(link); | ||
| const $ = load(data); | ||
|
|
||
| const publishedTime = | ||
| $('meta[property="article:published_time"]').attr('content') ?? | ||
| $('script[type="application/ld+json"]') | ||
| .toArray() | ||
| .map((script) => $(script).text()) | ||
| .map((jsonText) => { | ||
| try { | ||
| return JSON.parse(jsonText); | ||
| } catch { | ||
| return; | ||
| } | ||
| }) | ||
| .flatMap((entry) => (Array.isArray(entry) ? entry : [entry])) | ||
| .find((entry) => entry && typeof entry === 'object' && entry.datePublished)?.datePublished; | ||
|
Comment on lines
+208
to
+219
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary toArray/map/flatMap/find. All of the pages in the examples provided have only 1 |
||
|
|
||
| const description = $('meta[property="og:description"]').attr('content') || $('meta[name="description"]').attr('content'); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you've visited the web page with the full article, return it ( |
||
|
|
||
| return { | ||
| pubDate: publishedTime ? parseDate(publishedTime) : undefined, | ||
| description, | ||
| }; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unnecessary use of
flatMap. Return an object and usemapinstead.