Skip to content

Commit 28ae6f5

Browse files
committed
auto merge of #15385 : jroweboy/rust/master, r=brson
This enables the docs search function to be more forgiving for spelling mistakes. The algorithm works as a dynamic programming algorithm to detect the minimum number of changes required to the search parameter string in order to match any string in the search index. If the number of changes is less then a threshold (currently defined as 3), then the search parameter will be included as it is a possible misspelling of the word. Any results returned by the algorithm are sorted by distance and are ranked lower than results that are partial or exact matches (aka the matches returned by the original search algorithm). Additionally, the increment in the for loops in this file were using one of three different ways to increment (`i += 1` `i++` and `++i`) so I just standardized it to `++i`. As an example, consider searching for the word `String` and accidentally typing in `Strnig`. The old system would return no results because it is a misspelling, but the Levenshtein distance between these two inputs is only two, which means that this will return `String` as a result. Additionally, it will return a few other results such as `strong`, and `StdRng` because these are also similar to `Strnig`. Because of the ranking system though, this change should be unobtrusive to anyone that spells the words correctly, as those are still ranked first before any Levenshtein results.
2 parents 7c28dd0 + c614510 commit 28ae6f5

File tree

1 file changed

+89
-44
lines changed

1 file changed

+89
-44
lines changed

src/librustdoc/html/static/main.js

+89-44
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
}
6262
$('#' + from)[0].scrollIntoView();
6363
$('.line-numbers span').removeClass('line-highlighted');
64-
for (i = from; i <= to; i += 1) {
64+
for (i = from; i <= to; ++i) {
6565
$('#' + i).addClass('line-highlighted');
6666
}
6767
}
@@ -102,7 +102,7 @@
102102
stripped = '',
103103
len = rootPath.match(/\.\.\//g).length + 1;
104104

105-
for (i = 0; i < len; i += 1) {
105+
for (i = 0; i < len; ++i) {
106106
match = url.match(/\/[^\/]*$/);
107107
if (i < len - 1) {
108108
stripped = match[0] + stripped;
@@ -114,9 +114,47 @@
114114

115115
document.location.href = url;
116116
});
117+
/**
118+
* A function to compute the Levenshtein distance between two strings
119+
* Licensed under the Creative Commons Attribution-ShareAlike 3.0 Unported
120+
* Full License can be found at http://creativecommons.org/licenses/by-sa/3.0/legalcode
121+
* This code is an unmodified version of the code written by Marco de Wit
122+
* and was found at http://stackoverflow.com/a/18514751/745719
123+
*/
124+
var levenshtein = (function() {
125+
var row2 = [];
126+
return function(s1, s2) {
127+
if (s1 === s2) {
128+
return 0;
129+
} else {
130+
var s1_len = s1.length, s2_len = s2.length;
131+
if (s1_len && s2_len) {
132+
var i1 = 0, i2 = 0, a, b, c, c2, row = row2;
133+
while (i1 < s1_len)
134+
row[i1] = ++i1;
135+
while (i2 < s2_len) {
136+
c2 = s2.charCodeAt(i2);
137+
a = i2;
138+
++i2;
139+
b = i2;
140+
for (i1 = 0; i1 < s1_len; ++i1) {
141+
c = a + (s1.charCodeAt(i1) !== c2 ? 1 : 0);
142+
a = row[i1];
143+
b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c);
144+
row[i1] = b;
145+
}
146+
}
147+
return b;
148+
} else {
149+
return s1_len + s2_len;
150+
}
151+
}
152+
};
153+
})();
117154

118155
function initSearch(rawSearchIndex) {
119156
var currentResults, index, searchIndex;
157+
var MAX_LEV_DISTANCE = 3;
120158
var params = getQueryStringParams();
121159

122160
// Populate search bar with query string search term when provided,
@@ -143,7 +181,7 @@
143181
split = valLower.split("::");
144182

145183
//remove empty keywords
146-
for (var j = 0; j < split.length; j++) {
184+
for (var j = 0; j < split.length; ++j) {
147185
split[j].toLowerCase();
148186
if (split[j] === "") {
149187
split.splice(j, 1);
@@ -156,7 +194,7 @@
156194
val.charAt(val.length - 1) === val.charAt(0))
157195
{
158196
val = val.substr(1, val.length - 2);
159-
for (var i = 0; i < nSearchWords; i += 1) {
197+
for (var i = 0; i < nSearchWords; ++i) {
160198
if (searchWords[i] === val) {
161199
// filter type: ... queries
162200
if (typeFilter < 0 || typeFilter === searchIndex[i].ty) {
@@ -170,15 +208,31 @@
170208
} else {
171209
// gather matching search results up to a certain maximum
172210
val = val.replace(/\_/g, "");
173-
for (var i = 0; i < split.length; i++) {
174-
for (var j = 0; j < nSearchWords; j += 1) {
211+
for (var i = 0; i < split.length; ++i) {
212+
for (var j = 0; j < nSearchWords; ++j) {
213+
var lev_distance;
175214
if (searchWords[j].indexOf(split[i]) > -1 ||
176215
searchWords[j].indexOf(val) > -1 ||
177216
searchWords[j].replace(/_/g, "").indexOf(val) > -1)
178217
{
179218
// filter type: ... queries
180219
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
181-
results.push({id: j, index: searchWords[j].replace(/_/g, "").indexOf(val)});
220+
results.push({
221+
id: j,
222+
index: searchWords[j].replace(/_/g, "").indexOf(val),
223+
lev: 0,
224+
});
225+
}
226+
} else if (
227+
(lev_distance = levenshtein(searchWords[j], val)) <=
228+
MAX_LEV_DISTANCE) {
229+
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
230+
results.push({
231+
id: j,
232+
index: 0,
233+
// we want lev results to go lower than others
234+
lev: lev_distance,
235+
});
182236
}
183237
}
184238
if (results.length === max) {
@@ -189,7 +243,7 @@
189243
}
190244

191245
var nresults = results.length;
192-
for (var i = 0; i < nresults; i += 1) {
246+
for (var i = 0; i < nresults; ++i) {
193247
results[i].word = searchWords[results[i].id];
194248
results[i].item = searchIndex[results[i].id] || {};
195249
}
@@ -201,6 +255,12 @@
201255
results.sort(function(aaa, bbb) {
202256
var a, b;
203257

258+
// Sort by non levenshtein results and then levenshtein results by the distance
259+
// (less changes required to match means higher rankings)
260+
a = (aaa.lev);
261+
b = (bbb.lev);
262+
if (a !== b) return a - b;
263+
204264
// sort by crate (non-current crate goes later)
205265
a = (aaa.item.crate !== window.currentCrate);
206266
b = (bbb.item.crate !== window.currentCrate);
@@ -258,7 +318,7 @@
258318
results[i].id = -1;
259319
}
260320
}
261-
for (var i = 0; i < results.length; i++) {
321+
for (var i = 0; i < results.length; ++i) {
262322
var result = results[i],
263323
name = result.item.name.toLowerCase(),
264324
path = result.item.path.toLowerCase(),
@@ -288,38 +348,23 @@
288348
* @return {[boolean]} [Whether the result is valid or not]
289349
*/
290350
function validateResult(name, path, keys, parent) {
291-
//initially valid
292-
var validate = true;
293-
//if there is a parent, then validate against parent
294-
if (parent !== undefined) {
295-
for (var i = 0; i < keys.length; i++) {
296-
// if previous keys are valid and current key is in the
297-
// path, name or parent
298-
if ((validate) &&
299-
(name.toLowerCase().indexOf(keys[i]) > -1 ||
300-
path.toLowerCase().indexOf(keys[i]) > -1 ||
301-
parent.name.toLowerCase().indexOf(keys[i]) > -1))
302-
{
303-
validate = true;
304-
} else {
305-
validate = false;
306-
}
307-
}
308-
} else {
309-
for (var i = 0; i < keys.length; i++) {
310-
// if previous keys are valid and current key is in the
311-
// path, name
312-
if ((validate) &&
313-
(name.toLowerCase().indexOf(keys[i]) > -1 ||
314-
path.toLowerCase().indexOf(keys[i]) > -1))
315-
{
316-
validate = true;
317-
} else {
318-
validate = false;
319-
}
351+
for (var i=0; i < keys.length; ++i) {
352+
// each check is for validation so we negate the conditions and invalidate
353+
if (!(
354+
// check for an exact name match
355+
name.toLowerCase().indexOf(keys[i]) > -1 ||
356+
// then an exact path match
357+
path.toLowerCase().indexOf(keys[i]) > -1 ||
358+
// next if there is a parent, check for exact parent match
359+
(parent !== undefined &&
360+
parent.name.toLowerCase().indexOf(keys[i]) > -1) ||
361+
// lastly check to see if the name was a levenshtein match
362+
levenshtein(name.toLowerCase(), keys[i]) <=
363+
MAX_LEV_DISTANCE)) {
364+
return false;
320365
}
321366
}
322-
return validate;
367+
return true;
323368
}
324369

325370
function getQuery() {
@@ -499,7 +544,7 @@
499544

500545
resultIndex = execQuery(query, 20000, index);
501546
len = resultIndex.length;
502-
for (i = 0; i < len; i += 1) {
547+
for (i = 0; i < len; ++i) {
503548
if (resultIndex[i].id > -1) {
504549
obj = searchIndex[resultIndex[i].id];
505550
filterdata.push([obj.name, obj.ty, obj.path, obj.desc]);
@@ -571,7 +616,7 @@
571616
// faster analysis operations
572617
var len = items.length;
573618
var lastPath = "";
574-
for (var i = 0; i < len; i += 1) {
619+
for (var i = 0; i < len; ++i) {
575620
var rawRow = items[i];
576621
var row = {crate: crate, ty: rawRow[0], name: rawRow[1],
577622
path: rawRow[2] || lastPath, desc: rawRow[3],
@@ -643,7 +688,7 @@
643688
crates.push(crate);
644689
}
645690
crates.sort();
646-
for (var i = 0; i < crates.length; i++) {
691+
for (var i = 0; i < crates.length; ++i) {
647692
var klass = 'crate';
648693
if (crates[i] == window.currentCrate) {
649694
klass += ' current';
@@ -660,10 +705,10 @@
660705
window.register_implementors = function(imp) {
661706
var list = $('#implementors-list');
662707
var libs = Object.getOwnPropertyNames(imp);
663-
for (var i = 0; i < libs.length; i++) {
708+
for (var i = 0; i < libs.length; ++i) {
664709
if (libs[i] == currentCrate) continue;
665710
var structs = imp[libs[i]];
666-
for (var j = 0; j < structs.length; j++) {
711+
for (var j = 0; j < structs.length; ++j) {
667712
var code = $('<code>').append(structs[j]);
668713
$.each(code.find('a'), function(idx, a) {
669714
var href = $(a).attr('href');

0 commit comments

Comments
 (0)