From baa0f648c4756c72b6ce3b939fb943796e0bddb0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 28 Oct 2022 21:54:09 -0700 Subject: [PATCH 1/2] rustdoc: compute maximum Levenshtein distance based on the query The heuristic is pretty close to the name resolver. Fixes #103357 --- src/librustdoc/html/static/js/search.js | 147 ++++++++++++++---------- src/test/rustdoc-js-std/typed-query.js | 3 - src/test/rustdoc-js/doc-alias.js | 2 - 3 files changed, 84 insertions(+), 68 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index d04ec357c40ac..b571d24a13d65 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -112,7 +112,6 @@ function levenshtein(s1, s2) { } function initSearch(rawSearchIndex) { - const MAX_LEV_DISTANCE = 3; const MAX_RESULTS = 200; const NO_TYPE_FILTER = -1; /** @@ -888,13 +887,13 @@ function initSearch(rawSearchIndex) { * @param {QueryElement} elem - The element from the parsed query. * @param {integer} defaultLev - This is the value to return in case there are no generics. * - * @return {integer} - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`. + * @return {integer} - Returns the best match (if any) or `maxLevDistance + 1`. */ - function checkGenerics(row, elem, defaultLev) { + function checkGenerics(row, elem, defaultLev, maxLevDistance) { if (row.generics.length === 0) { - return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1; + return elem.generics.length === 0 ? defaultLev : maxLevDistance + 1; } else if (row.generics.length > 0 && row.generics[0].name === null) { - return checkGenerics(row.generics[0], elem, defaultLev); + return checkGenerics(row.generics[0], elem, defaultLev, maxLevDistance); } // The names match, but we need to be sure that all generics kinda // match as well. @@ -905,8 +904,8 @@ function initSearch(rawSearchIndex) { elem_name = entry.name; if (elem_name === "") { // Pure generic, needs to check into it. - if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) { - return MAX_LEV_DISTANCE + 1; + if (checkGenerics(entry, elem, maxLevDistance + 1, maxLevDistance) !== 0) { + return maxLevDistance + 1; } continue; } @@ -933,7 +932,7 @@ function initSearch(rawSearchIndex) { } } if (match === null) { - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } elems[match] -= 1; if (elems[match] === 0) { @@ -942,7 +941,7 @@ function initSearch(rawSearchIndex) { } return 0; } - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } /** @@ -954,10 +953,10 @@ function initSearch(rawSearchIndex) { * * @return {integer} - Returns a Levenshtein distance to the best match. */ - function checkIfInGenerics(row, elem) { - let lev = MAX_LEV_DISTANCE + 1; + function checkIfInGenerics(row, elem, maxLevDistance) { + let lev = maxLevDistance + 1; for (const entry of row.generics) { - lev = Math.min(checkType(entry, elem, true), lev); + lev = Math.min(checkType(entry, elem, true, maxLevDistance), lev); if (lev === 0) { break; } @@ -974,15 +973,15 @@ function initSearch(rawSearchIndex) { * @param {boolean} literalSearch * * @return {integer} - Returns a Levenshtein distance to the best match. If there is - * no match, returns `MAX_LEV_DISTANCE + 1`. + * no match, returns `maxLevDistance + 1`. */ - function checkType(row, elem, literalSearch) { + function checkType(row, elem, literalSearch, maxLevDistance) { if (row.name === null) { // This is a pure "generic" search, no need to run other checks. if (row.generics.length > 0) { - return checkIfInGenerics(row, elem); + return checkIfInGenerics(row, elem, maxLevDistance); } - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } let lev = levenshtein(row.name, elem.name); @@ -996,9 +995,9 @@ function initSearch(rawSearchIndex) { return 0; } } - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } else if (elem.generics.length > 0) { - return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1); + return checkGenerics(row, elem, maxLevDistance + 1, maxLevDistance); } return 0; } else if (row.generics.length > 0) { @@ -1008,22 +1007,22 @@ function initSearch(rawSearchIndex) { } // The name didn't match so we now check if the type we're looking for is inside // the generics! - lev = checkIfInGenerics(row, elem); + lev = checkIfInGenerics(row, elem, maxLevDistance); // Now whatever happens, the returned distance is "less good" so we should mark // it as such, and so we add 0.5 to the distance to make it "less good". return lev + 0.5; - } else if (lev > MAX_LEV_DISTANCE) { + } else if (lev > maxLevDistance) { // So our item's name doesn't match at all and has generics. // // Maybe it's present in a sub generic? For example "f>>()", if we're // looking for "B", we'll need to go down. - return checkIfInGenerics(row, elem); + return checkIfInGenerics(row, elem, maxLevDistance); } else { // At this point, the name kinda match and we have generics to check, so // let's go! - const tmp_lev = checkGenerics(row, elem, lev); - if (tmp_lev > MAX_LEV_DISTANCE) { - return MAX_LEV_DISTANCE + 1; + const tmp_lev = checkGenerics(row, elem, lev, maxLevDistance); + if (tmp_lev > maxLevDistance) { + return maxLevDistance + 1; } // We compute the median value of both checks and return it. return (tmp_lev + lev) / 2; @@ -1031,7 +1030,7 @@ function initSearch(rawSearchIndex) { } else if (elem.generics.length > 0) { // In this case, we were expecting generics but there isn't so we simply reject this // one. - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } // No generics on our query or on the target type so we can return without doing // anything else. @@ -1046,23 +1045,26 @@ function initSearch(rawSearchIndex) { * @param {integer} typeFilter * * @return {integer} - Returns a Levenshtein distance to the best match. If there is no - * match, returns `MAX_LEV_DISTANCE + 1`. + * match, returns `maxLevDistance + 1`. */ - function findArg(row, elem, typeFilter) { - let lev = MAX_LEV_DISTANCE + 1; + function findArg(row, elem, typeFilter, maxLevDistance) { + let lev = maxLevDistance + 1; if (row && row.type && row.type.inputs && row.type.inputs.length > 0) { for (const input of row.type.inputs) { if (!typePassesFilter(typeFilter, input.ty)) { continue; } - lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch)); + lev = Math.min( + lev, + checkType(input, elem, parsedQuery.literalSearch, maxLevDistance) + ); if (lev === 0) { return 0; } } } - return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev; + return parsedQuery.literalSearch ? maxLevDistance + 1 : lev; } /** @@ -1073,10 +1075,10 @@ function initSearch(rawSearchIndex) { * @param {integer} typeFilter * * @return {integer} - Returns a Levenshtein distance to the best match. If there is no - * match, returns `MAX_LEV_DISTANCE + 1`. + * match, returns `maxLevDistance + 1`. */ - function checkReturned(row, elem, typeFilter) { - let lev = MAX_LEV_DISTANCE + 1; + function checkReturned(row, elem, typeFilter, maxLevDistance) { + let lev = maxLevDistance + 1; if (row && row.type && row.type.output.length > 0) { const ret = row.type.output; @@ -1084,20 +1086,23 @@ function initSearch(rawSearchIndex) { if (!typePassesFilter(typeFilter, ret_ty.ty)) { continue; } - lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch)); + lev = Math.min( + lev, + checkType(ret_ty, elem, parsedQuery.literalSearch, maxLevDistance) + ); if (lev === 0) { return 0; } } } - return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev; + return parsedQuery.literalSearch ? maxLevDistance + 1 : lev; } - function checkPath(contains, ty) { + function checkPath(contains, ty, maxLevDistance) { if (contains.length === 0) { return 0; } - let ret_lev = MAX_LEV_DISTANCE + 1; + let ret_lev = maxLevDistance + 1; const path = ty.path.split("::"); if (ty.parent && ty.parent.name) { @@ -1107,7 +1112,7 @@ function initSearch(rawSearchIndex) { const length = path.length; const clength = contains.length; if (clength > length) { - return MAX_LEV_DISTANCE + 1; + return maxLevDistance + 1; } for (let i = 0; i < length; ++i) { if (i + clength > length) { @@ -1117,7 +1122,7 @@ function initSearch(rawSearchIndex) { let aborted = false; for (let x = 0; x < clength; ++x) { const lev = levenshtein(path[i + x], contains[x]); - if (lev > MAX_LEV_DISTANCE) { + if (lev > maxLevDistance) { aborted = true; break; } @@ -1222,7 +1227,7 @@ function initSearch(rawSearchIndex) { * following condition: * * * If it is a "literal search" (`parsedQuery.literalSearch`), then `lev` must be 0. - * * If it is not a "literal search", `lev` must be <= `MAX_LEV_DISTANCE`. + * * If it is not a "literal search", `lev` must be <= `maxLevDistance`. * * The `results` map contains information which will be used to sort the search results: * @@ -1237,8 +1242,8 @@ function initSearch(rawSearchIndex) { * @param {integer} index * @param {integer} lev */ - function addIntoResults(results, fullId, id, index, lev) { - if (lev === 0 || (!parsedQuery.literalSearch && lev <= MAX_LEV_DISTANCE)) { + function addIntoResults(results, fullId, id, index, lev, maxLevDistance) { + if (lev === 0 || (!parsedQuery.literalSearch && lev <= maxLevDistance)) { if (results[fullId] !== undefined) { const result = results[fullId]; if (result.dontValidate || result.lev <= lev) { @@ -1275,7 +1280,8 @@ function initSearch(rawSearchIndex) { elem, results_others, results_in_args, - results_returned + results_returned, + maxLevDistance ) { if (!row || (filterCrates !== null && row.crate !== filterCrates)) { return; @@ -1283,11 +1289,11 @@ function initSearch(rawSearchIndex) { let lev, lev_add = 0, index = -1; const fullId = row.id; - const in_args = findArg(row, elem, parsedQuery.typeFilter); - const returned = checkReturned(row, elem, parsedQuery.typeFilter); + const in_args = findArg(row, elem, parsedQuery.typeFilter, maxLevDistance); + const returned = checkReturned(row, elem, parsedQuery.typeFilter, maxLevDistance); - addIntoResults(results_in_args, fullId, pos, index, in_args); - addIntoResults(results_returned, fullId, pos, index, returned); + addIntoResults(results_in_args, fullId, pos, index, in_args, maxLevDistance); + addIntoResults(results_returned, fullId, pos, index, returned, maxLevDistance); if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) { return; @@ -1296,7 +1302,7 @@ function initSearch(rawSearchIndex) { if (parsedQuery.literalSearch) { if (searchWord === elem.name) { - addIntoResults(results_others, fullId, pos, -1, 0); + addIntoResults(results_others, fullId, pos, -1, 0, maxLevDistance); } return; } @@ -1304,15 +1310,15 @@ function initSearch(rawSearchIndex) { // No need to check anything else if it's a "pure" generics search. if (elem.name.length === 0) { if (row.type !== null) { - lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1); - addIntoResults(results_others, fullId, pos, index, lev); + lev = checkGenerics(row.type, elem, maxLevDistance + 1, maxLevDistance); + addIntoResults(results_others, fullId, pos, index, lev, maxLevDistance); } return; } if (elem.fullPath.length > 1) { - lev = checkPath(elem.pathWithoutLast, row); - if (lev > MAX_LEV_DISTANCE || (parsedQuery.literalSearch && lev !== 0)) { + lev = checkPath(elem.pathWithoutLast, row, maxLevDistance); + if (lev > maxLevDistance || (parsedQuery.literalSearch && lev !== 0)) { return; } else if (lev > 0) { lev_add = lev / 10; @@ -1333,7 +1339,7 @@ function initSearch(rawSearchIndex) { } } lev += lev_add; - if (lev > MAX_LEV_DISTANCE) { + if (lev > maxLevDistance) { return; } else if (index !== -1 && elem.fullPath.length < 2) { lev -= 1; @@ -1341,7 +1347,7 @@ function initSearch(rawSearchIndex) { if (lev < 0) { lev = 0; } - addIntoResults(results_others, fullId, pos, index, lev); + addIntoResults(results_others, fullId, pos, index, lev, maxLevDistance); } /** @@ -1353,7 +1359,7 @@ function initSearch(rawSearchIndex) { * @param {integer} pos - Position in the `searchIndex`. * @param {Object} results */ - function handleArgs(row, pos, results) { + function handleArgs(row, pos, results, maxLevDistance) { if (!row || (filterCrates !== null && row.crate !== filterCrates)) { return; } @@ -1365,7 +1371,7 @@ function initSearch(rawSearchIndex) { function checkArgs(elems, callback) { for (const elem of elems) { // There is more than one parameter to the query so all checks should be "exact" - const lev = callback(row, elem, NO_TYPE_FILTER); + const lev = callback(row, elem, NO_TYPE_FILTER, maxLevDistance); if (lev <= 1) { nbLev += 1; totalLev += lev; @@ -1386,12 +1392,21 @@ function initSearch(rawSearchIndex) { return; } const lev = Math.round(totalLev / nbLev); - addIntoResults(results, row.id, pos, 0, lev); + addIntoResults(results, row.id, pos, 0, lev, maxLevDistance); } function innerRunQuery() { let elem, i, nSearchWords, in_returned, row; + let queryLen = 0; + for (const elem of parsedQuery.elems) { + queryLen += elem.name.length; + } + for (const elem of parsedQuery.returned) { + queryLen += elem.name.length; + } + const maxLevDistance = Math.ceil(queryLen / 3); + if (parsedQuery.foundElems === 1) { if (parsedQuery.elems.length === 1) { elem = parsedQuery.elems[0]; @@ -1404,7 +1419,8 @@ function initSearch(rawSearchIndex) { elem, results_others, results_in_args, - results_returned + results_returned, + maxLevDistance ); } } else if (parsedQuery.returned.length === 1) { @@ -1412,13 +1428,18 @@ function initSearch(rawSearchIndex) { elem = parsedQuery.returned[0]; for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { row = searchIndex[i]; - in_returned = checkReturned(row, elem, parsedQuery.typeFilter); - addIntoResults(results_others, row.id, i, -1, in_returned); + in_returned = checkReturned( + row, + elem, + parsedQuery.typeFilter, + maxLevDistance + ); + addIntoResults(results_others, row.id, i, -1, in_returned, maxLevDistance); } } } else if (parsedQuery.foundElems > 0) { for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { - handleArgs(searchIndex[i], i, results_others); + handleArgs(searchIndex[i], i, results_others, maxLevDistance); } } } @@ -1456,7 +1477,7 @@ function initSearch(rawSearchIndex) { * * @return {boolean} - Whether the result is valid or not */ - function validateResult(name, path, keys, parent) { + function validateResult(name, path, keys, parent, maxLevDistance) { if (!keys || !keys.length) { return true; } @@ -1471,7 +1492,7 @@ function initSearch(rawSearchIndex) { (parent !== undefined && parent.name !== undefined && parent.name.toLowerCase().indexOf(key) > -1) || // lastly check to see if the name was a levenshtein match - levenshtein(name, key) <= MAX_LEV_DISTANCE)) { + levenshtein(name, key) <= maxLevDistance)) { return false; } } diff --git a/src/test/rustdoc-js-std/typed-query.js b/src/test/rustdoc-js-std/typed-query.js index 25efbad269540..ee124fd78a9dc 100644 --- a/src/test/rustdoc-js-std/typed-query.js +++ b/src/test/rustdoc-js-std/typed-query.js @@ -10,8 +10,5 @@ const EXPECTED = { { 'path': 'std', 'name': 'println' }, { 'path': 'std', 'name': 'eprintln' }, { 'path': 'std::pin', 'name': 'pin' }, - { 'path': 'std::future', 'name': 'join' }, - { 'path': 'std', 'name': 'line' }, - { 'path': 'std', 'name': 'write' }, ], }; diff --git a/src/test/rustdoc-js/doc-alias.js b/src/test/rustdoc-js/doc-alias.js index 7bb0cbe388fee..62c8e7a74b940 100644 --- a/src/test/rustdoc-js/doc-alias.js +++ b/src/test/rustdoc-js/doc-alias.js @@ -1,5 +1,3 @@ -// exact-check - const QUERY = [ 'StructItem', 'StructFieldItem', From cc8155fde94b53d9e63677e46b4da6973467eb02 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 28 Oct 2022 23:07:36 -0700 Subject: [PATCH 2/2] rustdoc: update test case to deal with "coo" only accepting dist=1 --- src/test/rustdoc-gui/src/test_docs/lib.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/test/rustdoc-gui/src/test_docs/lib.rs b/src/test/rustdoc-gui/src/test_docs/lib.rs index fdf97e492aa2b..b6648358b5ad7 100644 --- a/src/test/rustdoc-gui/src/test_docs/lib.rs +++ b/src/test/rustdoc-gui/src/test_docs/lib.rs @@ -334,7 +334,7 @@ pub mod details { pub mod doc_block_table { pub trait DocBlockTableTrait { - fn func(); + fn foo(); } /// Struct doc. @@ -350,7 +350,7 @@ pub mod doc_block_table { /// | header1 | header2 | /// |--------------------------|--------------------------| /// | Lorem Ipsum, Lorem Ipsum | Lorem Ipsum, Lorem Ipsum | - fn func() { + fn foo() { println!(); } } @@ -416,3 +416,16 @@ pub trait TraitWithoutGenerics { fn foo(); } + +pub mod search_results { + + pub struct SearchResults { + pub foo: i32, + } + + #[macro_export] + macro_rules! foo { + () => {}; + } + +}