Skip to content

Commit 27af44b

Browse files
jshasyphar
authored andcommitted
Set X-Robots-Tag: noindex for nonlatest rustdoc
We want /latest/ URLs to be indexed, but not /1.2.3/ URLs, because the latter compete for pagerank with /latest/ and aren't usually what people want. This replaces the previous canonical URL header. Canonical URLs were doing some good, but did not work in all cases. In particular, if some older version of a crate's documentation was very different than the latest version, Google would not accept the canonicalization. This would sometimes result in the old version still showing up in the search results instead of /latest/.
1 parent 3b56279 commit 27af44b

File tree

2 files changed

+13
-89
lines changed

2 files changed

+13
-89
lines changed

src/test/fakes.rs

-5
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,6 @@ impl<'a> FakeRelease<'a> {
246246
self
247247
}
248248

249-
pub(crate) fn documentation_url(mut self, documentation: Option<String>) -> Self {
250-
self.package.documentation = documentation;
251-
self
252-
}
253-
254249
/// Returns the release_id
255250
pub(crate) fn create(mut self) -> Result<i32> {
256251
use std::fs;

src/web/rustdoc.rs

+13-84
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use crate::{
1313
encode_url_path,
1414
error::{AxumNope, AxumResult},
1515
file::File,
16-
headers::CanonicalUrl,
1716
match_version_axum,
1817
metrics::RenderingTimesRecorder,
1918
page::TemplateData,
@@ -25,8 +24,7 @@ use anyhow::{anyhow, Context as _};
2524
use axum::{
2625
extract::{Extension, Path, Query},
2726
http::{StatusCode, Uri},
28-
response::{Html, IntoResponse, Response as AxumResponse},
29-
TypedHeader,
27+
response::{AppendHeaders, Html, IntoResponse, Response as AxumResponse},
3028
};
3129
use lol_html::errors::RewritingError;
3230
use once_cell::sync::Lazy;
@@ -289,7 +287,6 @@ pub(crate) async fn rustdoc_redirector_handler(
289287
#[derive(Debug, Clone, Serialize)]
290288
struct RustdocPage {
291289
latest_path: String,
292-
canonical_url: CanonicalUrl,
293290
permalink_path: String,
294291
latest_version: String,
295292
target: String,
@@ -315,7 +312,6 @@ impl RustdocPage {
315312
file_path: &str,
316313
) -> AxumResult<AxumResponse> {
317314
let is_latest_url = self.is_latest_url;
318-
let canonical_url = self.canonical_url.clone();
319315

320316
// Build the page of documentation
321317
let ctx = tera::Context::from_serialize(self).context("error creating tera context")?;
@@ -336,9 +332,10 @@ impl RustdocPage {
336332
result => result.context("error rewriting HTML")?,
337333
};
338334

335+
let robots = if is_latest_url { "" } else { "noindex" };
339336
Ok((
340337
StatusCode::OK,
341-
TypedHeader(canonical_url),
338+
AppendHeaders([("X-Robots-Tag", robots)]),
342339
Extension(if is_latest_url {
343340
CachePolicy::ForeverInCdn
344341
} else {
@@ -640,18 +637,6 @@ pub(crate) async fn rustdoc_html_server_handler(
640637
params.name, target_redirect, query_string
641638
);
642639

643-
// Set the canonical URL for search engines to the `/latest/` page on docs.rs.
644-
// Note: The URL this points to may not exist. For instance, if we're rendering
645-
// `struct Foo` in version 0.1.0 of a crate, and version 0.2.0 of that crate removes
646-
// `struct Foo`, this will point at a 404. That's fine: search engines will crawl
647-
// the target and will not canonicalize to a URL that doesn't exist.
648-
// Don't include index.html in the canonical URL.
649-
let canonical_url = CanonicalUrl::from_path(format!(
650-
"/{}/latest/{}",
651-
params.name,
652-
inner_path.replace("index.html", ""),
653-
));
654-
655640
metrics
656641
.recently_accessed_releases
657642
.record(krate.crate_id, krate.release_id, target);
@@ -671,7 +656,6 @@ pub(crate) async fn rustdoc_html_server_handler(
671656
move || {
672657
Ok(RustdocPage {
673658
latest_path,
674-
canonical_url,
675659
permalink_path,
676660
latest_version,
677661
target,
@@ -2394,90 +2378,35 @@ mod test {
23942378
}
23952379

23962380
#[test]
2397-
fn canonical_url() {
2381+
fn noindex_nonlatest() {
23982382
wrapper(|env| {
23992383
env.fake_release()
2400-
.name("dummy-dash")
2401-
.version("0.1.0")
2402-
.documentation_url(Some("http://example.com".to_string()))
2403-
.rustdoc_file("dummy_dash/index.html")
2404-
.create()?;
2405-
2406-
let utf8_filename = "序.html";
2407-
env.fake_release()
2408-
.name("dummy-docs")
2409-
.version("0.1.0")
2410-
.documentation_url(Some("https://docs.rs/foo".to_string()))
2411-
.rustdoc_file("dummy_docs/index.html")
2412-
.rustdoc_file(&format!("dummy_docs/{utf8_filename}"))
2413-
.create()?;
2414-
2415-
env.fake_release()
2416-
.name("dummy-nodocs")
2384+
.name("dummy")
24172385
.version("0.1.0")
2418-
.documentation_url(None)
2419-
.rustdoc_file("dummy_nodocs/index.html")
2420-
.rustdoc_file("dummy_nodocs/struct.Foo.html")
2386+
.rustdoc_file("dummy/index.html")
24212387
.create()?;
24222388

24232389
let web = env.frontend();
24242390

24252391
assert!(web
2426-
.get("/dummy-dash/0.1.0/dummy_dash/")
2392+
.get("/dummy/0.1.0/dummy/")
24272393
.send()?
24282394
.headers()
2429-
.get("link")
2395+
.get("x-robots-tag")
24302396
.unwrap()
24312397
.to_str()
24322398
.unwrap()
2433-
.contains("rel=\"canonical\""),);
2399+
.contains("noindex"));
24342400

2435-
assert_eq!(
2436-
web.get("/dummy-docs/0.1.0/dummy_docs/")
2437-
.send()?
2438-
.headers()
2439-
.get("link")
2440-
.unwrap()
2441-
.to_str()
2442-
.unwrap(),
2443-
"<https://docs.rs/dummy-docs/latest/dummy_docs/>; rel=\"canonical\""
2444-
);
2445-
2446-
assert_eq!(
2447-
web.get(&format!("/dummy-docs/0.1.0/dummy_docs/{utf8_filename}"))
2448-
.send()?
2449-
.headers()
2450-
.get("link")
2451-
.unwrap()
2452-
.to_str()
2453-
.unwrap(),
2454-
"<https://docs.rs/dummy-docs/latest/dummy_docs/%E5%BA%8F.html>; rel=\"canonical\"",
2455-
);
2456-
2457-
assert!(web
2458-
.get("/dummy-nodocs/0.1.0/dummy_nodocs/")
2401+
assert!(!web
2402+
.get("/dummy/latest/dummy/")
24592403
.send()?
24602404
.headers()
2461-
.get("link")
2405+
.get("x-robots-tag")
24622406
.unwrap()
24632407
.to_str()
24642408
.unwrap()
2465-
.contains(
2466-
"<https://docs.rs/dummy-nodocs/latest/dummy_nodocs/>; rel=\"canonical\""
2467-
),);
2468-
2469-
assert_eq!(
2470-
web
2471-
.get("/dummy-nodocs/0.1.0/dummy_nodocs/struct.Foo.html")
2472-
.send()?
2473-
.headers()
2474-
.get("link")
2475-
.unwrap()
2476-
.to_str()
2477-
.unwrap(),
2478-
"<https://docs.rs/dummy-nodocs/latest/dummy_nodocs/struct.Foo.html>; rel=\"canonical\"",
2479-
);
2480-
2409+
.contains("noindex"));
24812410
Ok(())
24822411
})
24832412
}

0 commit comments

Comments
 (0)