Make public API, docs algorithm-agnostic

jhpratt · jhpratt · commit 378c4ab9ab1f · 2023-02-19T04:11:10.000Z
diff --git a/compiler/rustc_ast_lowering/src/item.rs b/compiler/rustc_ast_lowering/src/item.rs
@@ -13,7 +13,7 @@ use rustc_hir::def_id::{LocalDefId, CRATE_DEF_ID};
 use rustc_hir::PredicateOrigin;
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_middle::ty::{DefIdTree, ResolverAstLowering, TyCtxt};
-use rustc_span::lev_distance::find_best_match_for_name;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::source_map::DesugaringKind;
 use rustc_span::symbol::{kw, sym, Ident};
 use rustc_span::{Span, Symbol};
diff --git a/compiler/rustc_hir_analysis/src/astconv/errors.rs b/compiler/rustc_hir_analysis/src/astconv/errors.rs
@@ -6,7 +6,7 @@ use rustc_hir as hir;
 use rustc_hir::def_id::DefId;
 use rustc_middle::ty;
 use rustc_session::parse::feature_err;
-use rustc_span::lev_distance::find_best_match_for_name;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::symbol::{sym, Ident};
 use rustc_span::{Span, Symbol, DUMMY_SP};
 
diff --git a/compiler/rustc_hir_analysis/src/astconv/mod.rs b/compiler/rustc_hir_analysis/src/astconv/mod.rs
@@ -34,8 +34,8 @@ use rustc_middle::ty::DynKind;
 use rustc_middle::ty::GenericParamDefKind;
 use rustc_middle::ty::{self, Const, DefIdTree, IsSuggestable, Ty, TyCtxt, TypeVisitable};
 use rustc_session::lint::builtin::{AMBIGUOUS_ASSOCIATED_ITEMS, BARE_TRAIT_OBJECTS};
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::edition::Edition;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::symbol::{kw, Ident, Symbol};
 use rustc_span::{sym, Span, DUMMY_SP};
 use rustc_target::spec::abi;
diff --git a/compiler/rustc_hir_typeck/src/expr.rs b/compiler/rustc_hir_typeck/src/expr.rs
@@ -45,8 +45,8 @@ use rustc_middle::ty::subst::SubstsRef;
 use rustc_middle::ty::{self, AdtKind, Ty, TypeVisitable};
 use rustc_session::errors::ExprParenthesesNeeded;
 use rustc_session::parse::feature_err;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::hygiene::DesugaringKind;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::source_map::{Span, Spanned};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_target::spec::abi::Abi::RustIntrinsic;
diff --git a/compiler/rustc_hir_typeck/src/method/probe.rs b/compiler/rustc_hir_typeck/src/method/probe.rs
@@ -25,8 +25,8 @@ use rustc_middle::ty::{InternalSubsts, SubstsRef};
 use rustc_session::lint;
 use rustc_span::def_id::DefId;
 use rustc_span::def_id::LocalDefId;
-use rustc_span::lev_distance::{
-    find_best_match_for_name_with_substrings, lev_distance_with_substrings,
+use rustc_span::edit_distance::{
+    edit_distance_with_substrings, find_best_match_for_name_with_substrings,
 };
 use rustc_span::symbol::sym;
 use rustc_span::{symbol::Ident, Span, Symbol, DUMMY_SP};
@@ -70,7 +70,7 @@ struct ProbeContext<'a, 'tcx> {
     impl_dups: FxHashSet<DefId>,
 
     /// When probing for names, include names that are close to the
-    /// requested name (by Levenshtein distance)
+    /// requested name (by edit distance)
     allow_similar_names: bool,
 
     /// Some(candidate) if there is a private candidate
@@ -1794,7 +1794,7 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
 
     /// Similarly to `probe_for_return_type`, this method attempts to find the best matching
     /// candidate method where the method name may have been misspelled. Similarly to other
-    /// Levenshtein based suggestions, we provide at most one such suggestion.
+    /// edit distance based suggestions, we provide at most one such suggestion.
     fn probe_for_similar_candidate(&mut self) -> Result<Option<ty::AssocItem>, MethodError<'tcx>> {
         debug!("probing for method names similar to {:?}", self.method_name);
 
@@ -2052,8 +2052,11 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
                         if self.matches_by_doc_alias(x.def_id) {
                             return true;
                         }
-                        match lev_distance_with_substrings(name.as_str(), x.name.as_str(), max_dist)
-                        {
+                        match edit_distance_with_substrings(
+                            name.as_str(),
+                            x.name.as_str(),
+                            max_dist,
+                        ) {
                             Some(d) => d > 0,
                             None => false,
                         }
diff --git a/compiler/rustc_hir_typeck/src/method/suggest.rs b/compiler/rustc_hir_typeck/src/method/suggest.rs
@@ -31,7 +31,7 @@ use rustc_middle::ty::{self, DefIdTree, GenericArgKind, Ty, TyCtxt, TypeVisitabl
 use rustc_middle::ty::{IsSuggestable, ToPolyTraitRef};
 use rustc_span::symbol::{kw, sym, Ident};
 use rustc_span::Symbol;
-use rustc_span::{lev_distance, source_map, ExpnKind, FileName, MacroKind, Span};
+use rustc_span::{edit_distance, source_map, ExpnKind, FileName, MacroKind, Span};
 use rustc_trait_selection::traits::error_reporting::on_unimplemented::OnUnimplementedNote;
 use rustc_trait_selection::traits::error_reporting::on_unimplemented::TypeErrCtxtExt as _;
 use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt as _;
@@ -1014,7 +1014,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
         // that had unsatisfied trait bounds
         if unsatisfied_predicates.is_empty() && rcvr_ty.is_enum() {
             let adt_def = rcvr_ty.ty_adt_def().expect("enum is not an ADT");
-            if let Some(suggestion) = lev_distance::find_best_match_for_name(
+            if let Some(suggestion) = edit_distance::find_best_match_for_name(
                 &adt_def.variants().iter().map(|s| s.name).collect::<Vec<_>>(),
                 item_name.name,
                 None,
diff --git a/compiler/rustc_hir_typeck/src/pat.rs b/compiler/rustc_hir_typeck/src/pat.rs
@@ -14,8 +14,8 @@ use rustc_infer::infer::type_variable::{TypeVariableOrigin, TypeVariableOriginKi
 use rustc_middle::middle::stability::EvalResult;
 use rustc_middle::ty::{self, Adt, BindingMode, Ty, TypeVisitable};
 use rustc_session::lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::hygiene::DesugaringKind;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::source_map::{Span, Spanned};
 use rustc_span::symbol::{kw, sym, Ident};
 use rustc_span::{BytePos, DUMMY_SP};
diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs
@@ -14,8 +14,8 @@ use rustc_session::filesearch::sysroot_candidates;
 use rustc_session::lint::{self, BuiltinLintDiagnostics, LintBuffer};
 use rustc_session::parse::CrateConfig;
 use rustc_session::{early_error, filesearch, output, Session};
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::edition::Edition;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::source_map::FileLoader;
 use rustc_span::symbol::{sym, Symbol};
 use session::CompilerIO;
diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs
@@ -39,7 +39,7 @@ use rustc_middle::ty::{self, print::Printer, subst::GenericArg, RegisteredTools,
 use rustc_session::lint::{BuiltinLintDiagnostics, LintExpectationId};
 use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
 use rustc_session::Session;
-use rustc_span::lev_distance::find_best_match_for_name;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::symbol::{sym, Ident, Symbol};
 use rustc_span::{BytePos, Span};
 use rustc_target::abi;
diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs
@@ -19,8 +19,8 @@ use rustc_errors::{
     struct_span_err, Applicability, DiagnosticBuilder, ErrorGuaranteed, IntoDiagnostic, PResult,
     StashKey,
 };
+use rustc_span::edit_distance::edit_distance;
 use rustc_span::edition::Edition;
-use rustc_span::lev_distance::lev_distance;
 use rustc_span::source_map::{self, Span};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_span::DUMMY_SP;
@@ -459,7 +459,8 @@ impl<'a> Parser<'a> {
                 // Maybe the user misspelled `macro_rules` (issue #91227)
                 if self.token.is_ident()
                     && path.segments.len() == 1
-                    && lev_distance("macro_rules", &path.segments[0].ident.to_string(), 3).is_some()
+                    && edit_distance("macro_rules", &path.segments[0].ident.to_string(), 3)
+                        .is_some()
                 {
                     err.span_suggestion(
                         path.span,
diff --git a/compiler/rustc_resolve/src/diagnostics.rs b/compiler/rustc_resolve/src/diagnostics.rs
@@ -21,9 +21,9 @@ use rustc_session::lint::builtin::ABSOLUTE_PATHS_NOT_STARTING_WITH_CRATE;
 use rustc_session::lint::builtin::MACRO_EXPANDED_MACRO_EXPORTS_ACCESSED_BY_ABSOLUTE_PATHS;
 use rustc_session::lint::BuiltinLintDiagnostics;
 use rustc_session::Session;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::edition::Edition;
 use rustc_span::hygiene::MacroKind;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::source_map::SourceMap;
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_span::{BytePos, Span, SyntaxContext};
diff --git a/compiler/rustc_resolve/src/imports.rs b/compiler/rustc_resolve/src/imports.rs
@@ -21,8 +21,8 @@ use rustc_middle::span_bug;
 use rustc_middle::ty;
 use rustc_session::lint::builtin::{PUB_USE_OF_PRIVATE_EXTERN_CRATE, UNUSED_IMPORTS};
 use rustc_session::lint::BuiltinLintDiagnostics;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::hygiene::LocalExpnId;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::symbol::{kw, Ident, Symbol};
 use rustc_span::Span;
 
diff --git a/compiler/rustc_resolve/src/late/diagnostics.rs b/compiler/rustc_resolve/src/late/diagnostics.rs
@@ -25,9 +25,9 @@ use rustc_middle::ty::DefIdTree;
 use rustc_session::lint;
 use rustc_session::parse::feature_err;
 use rustc_session::Session;
+use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::edition::Edition;
 use rustc_span::hygiene::MacroKind;
-use rustc_span::lev_distance::find_best_match_for_name;
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_span::{BytePos, Span};
 
@@ -542,7 +542,7 @@ impl<'a: 'ast, 'ast, 'tcx> LateResolutionVisitor<'a, '_, 'ast, 'tcx> {
             }
         }
 
-        // Try Levenshtein algorithm.
+        // Try finding a suitable replacement.
         let typo_sugg =
             self.lookup_typo_candidate(path, source.namespace(), is_expected).to_opt_suggestion();
         if path.len() == 1 && self.self_type_is_available() {
@@ -770,7 +770,7 @@ impl<'a: 'ast, 'ast, 'tcx> LateResolutionVisitor<'a, '_, 'ast, 'tcx> {
                 _ => {}
             }
 
-            // If the trait has a single item (which wasn't matched by Levenshtein), suggest it
+            // If the trait has a single item (which wasn't matched by the algorithm), suggest it
             let suggestion = self.get_single_associated_item(&path, &source, is_expected);
             if !self.r.add_typo_suggestion(err, suggestion, ident_span) {
                 fallback = !self.let_binding_suggestion(err, ident_span);
diff --git a/compiler/rustc_span/src/edit_distance.rs b/compiler/rustc_span/src/edit_distance.rs
@@ -1,22 +1,26 @@
-//! Damerau-Levenshtein distances.
+//! Edit distances.
 //!
-//! The [Damerau-Levenshtein distance] is a metric for measuring the difference between two strings.
-//! This implementation is a restricted version of the algorithm, as it does not permit modifying
-//! characters that have already been transposed.
+//! The [edit distance] is a metric for measuring the difference between two strings.
 //!
-//! [Damerau-Levenshtein distance]: https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
+//! [edit distance]: https://en.wikipedia.org/wiki/Edit_distance
+
+// The current implementation is the restricted Damerau-Levenshtein algorithm. It is restricted
+// because it does not permit modifying characters that have already been transposed. The specific
+// algorithm should not matter to the caller of the methods, which is why it is not noted in the
+// documentation.
 
 use crate::symbol::Symbol;
 use std::{cmp, mem};
 
 #[cfg(test)]
 mod tests;
 
-/// Finds the restricted Damerau-Levenshtein distance between two strings. Characters that have
-/// already been transposed may not be modified.
+/// Finds the [edit distance] between two strings.
+///
+/// Returns `None` if the distance exceeds the limit.
 ///
-/// Returns None if the distance exceeds the limit.
-pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
+/// [edit distance]: https://en.wikipedia.org/wiki/Edit_distance
+pub fn edit_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
     let mut a = &a.chars().collect::<Vec<_>>()[..];
     let mut b = &b.chars().collect::<Vec<_>>()[..];
 
@@ -95,29 +99,29 @@ pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
 }
 
 /// Provides a word similarity score between two words that accounts for substrings being more
-/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match.
-/// 0 is an identical match.
+/// meaningful than a typical edit distance. The lower the score, the closer the match. 0 is an
+/// identical match.
 ///
-/// Uses the Levenshtein distance between the two strings and removes the cost of the length
-/// difference. If this is 0 then it is either a substring match or a full word match, in the
-/// substring match case we detect this and return `1`. To prevent finding meaningless substrings,
-/// eg. "in" in "shrink", we only perform this subtraction of length difference if one of the words
-/// is not greater than twice the length of the other. For cases where the words are close in size
-/// but not an exact substring then the cost of the length difference is discounted by half.
+/// Uses the edit distance between the two strings and removes the cost of the length difference.
+/// If this is 0 then it is either a substring match or a full word match, in the substring match
+/// case we detect this and return `1`. To prevent finding meaningless substrings, eg. "in" in
+/// "shrink", we only perform this subtraction of length difference if one of the words is not
+/// greater than twice the length of the other. For cases where the words are close in size but not
+/// an exact substring then the cost of the length difference is discounted by half.
 ///
 /// Returns `None` if the distance exceeds the limit.
-pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
+pub fn edit_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
     let n = a.chars().count();
     let m = b.chars().count();
 
     // Check one isn't less than half the length of the other. If this is true then there is a
     // big difference in length.
     let big_len_diff = (n * 2) < m || (m * 2) < n;
     let len_diff = if n < m { m - n } else { n - m };
-    let lev = lev_distance(a, b, limit + len_diff)?;
+    let distance = edit_distance(a, b, limit + len_diff)?;
 
     // This is the crux, subtracting length difference means exact substring matches will now be 0
-    let score = lev - len_diff;
+    let score = distance - len_diff;
 
     // If the score is 0 but the words have different lengths then it's a substring match not a full
     // word match
@@ -136,12 +140,12 @@ pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<us
 
 /// Finds the best match for given word in the given iterator where substrings are meaningful.
 ///
-/// A version of [`find_best_match_for_name`] that uses [`lev_distance_with_substrings`] as the score
-/// for word similarity. This takes an optional distance limit which defaults to one-third of the
-/// given word.
+/// A version of [`find_best_match_for_name`] that uses [`edit_distance_with_substrings`] as the
+/// score for word similarity. This takes an optional distance limit which defaults to one-third of
+/// the given word.
 ///
-/// Besides the modified Levenshtein, we use case insensitive comparison to improve accuracy
-/// on an edge case with a lower(upper)case letters mismatch.
+/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case
+/// letters mismatch.
 pub fn find_best_match_for_name_with_substrings(
     candidates: &[Symbol],
     lookup: Symbol,
@@ -156,8 +160,8 @@ pub fn find_best_match_for_name_with_substrings(
 /// an optional limit for the maximum allowable edit distance, which defaults
 /// to one-third of the given word.
 ///
-/// Besides Levenshtein, we use case insensitive comparison to improve accuracy
-/// on an edge case with a lower(upper)case letters mismatch.
+/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case
+/// letters mismatch.
 pub fn find_best_match_for_name(
     candidates: &[Symbol],
     lookup: Symbol,
@@ -178,7 +182,7 @@ fn find_best_match_for_name_impl(
 
     // Priority of matches:
     // 1. Exact case insensitive match
-    // 2. Levenshtein distance match
+    // 2. Edit distance match
     // 3. Sorted word match
     if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) {
         return Some(*c);
@@ -188,9 +192,9 @@ fn find_best_match_for_name_impl(
     let mut best = None;
     for c in candidates {
         match if use_substring_score {
-            lev_distance_with_substrings(lookup, c.as_str(), dist)
+            edit_distance_with_substrings(lookup, c.as_str(), dist)
         } else {
-            lev_distance(lookup, c.as_str(), dist)
+            edit_distance(lookup, c.as_str(), dist)
         } {
             Some(0) => return Some(*c),
             Some(d) => {
diff --git a/compiler/rustc_span/src/edit_distance/tests.rs b/compiler/rustc_span/src/edit_distance/tests.rs
@@ -1,40 +1,40 @@
 use super::*;
 
 #[test]
-fn test_lev_distance() {
+fn test_edit_distance() {
     // Test bytelength agnosticity
     for c in (0..char::MAX as u32).filter_map(char::from_u32).map(|i| i.to_string()) {
-        assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0));
+        assert_eq!(edit_distance(&c[..], &c[..], usize::MAX), Some(0));
     }
 
     let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
     let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
     let c = "Mary häd ä little lämb\n\nLittle lämb\n";
-    assert_eq!(lev_distance(a, b, usize::MAX), Some(1));
-    assert_eq!(lev_distance(b, a, usize::MAX), Some(1));
-    assert_eq!(lev_distance(a, c, usize::MAX), Some(2));
-    assert_eq!(lev_distance(c, a, usize::MAX), Some(2));
-    assert_eq!(lev_distance(b, c, usize::MAX), Some(1));
-    assert_eq!(lev_distance(c, b, usize::MAX), Some(1));
+    assert_eq!(edit_distance(a, b, usize::MAX), Some(1));
+    assert_eq!(edit_distance(b, a, usize::MAX), Some(1));
+    assert_eq!(edit_distance(a, c, usize::MAX), Some(2));
+    assert_eq!(edit_distance(c, a, usize::MAX), Some(2));
+    assert_eq!(edit_distance(b, c, usize::MAX), Some(1));
+    assert_eq!(edit_distance(c, b, usize::MAX), Some(1));
 }
 
 #[test]
-fn test_lev_distance_limit() {
-    assert_eq!(lev_distance("abc", "abcd", 1), Some(1));
-    assert_eq!(lev_distance("abc", "abcd", 0), None);
-    assert_eq!(lev_distance("abc", "xyz", 3), Some(3));
-    assert_eq!(lev_distance("abc", "xyz", 2), None);
+fn test_edit_distance_limit() {
+    assert_eq!(edit_distance("abc", "abcd", 1), Some(1));
+    assert_eq!(edit_distance("abc", "abcd", 0), None);
+    assert_eq!(edit_distance("abc", "xyz", 3), Some(3));
+    assert_eq!(edit_distance("abc", "xyz", 2), None);
 }
 
 #[test]
 fn test_method_name_similarity_score() {
-    assert_eq!(lev_distance_with_substrings("empty", "is_empty", 1), Some(1));
-    assert_eq!(lev_distance_with_substrings("shrunk", "rchunks", 2), None);
-    assert_eq!(lev_distance_with_substrings("abc", "abcd", 1), Some(1));
-    assert_eq!(lev_distance_with_substrings("a", "abcd", 1), None);
-    assert_eq!(lev_distance_with_substrings("edf", "eq", 1), None);
-    assert_eq!(lev_distance_with_substrings("abc", "xyz", 3), Some(3));
-    assert_eq!(lev_distance_with_substrings("abcdef", "abcdef", 2), Some(0));
+    assert_eq!(edit_distance_with_substrings("empty", "is_empty", 1), Some(1));
+    assert_eq!(edit_distance_with_substrings("shrunk", "rchunks", 2), None);
+    assert_eq!(edit_distance_with_substrings("abc", "abcd", 1), Some(1));
+    assert_eq!(edit_distance_with_substrings("a", "abcd", 1), None);
+    assert_eq!(edit_distance_with_substrings("edf", "eq", 1), None);
+    assert_eq!(edit_distance_with_substrings("abc", "xyz", 3), Some(3));
+    assert_eq!(edit_distance_with_substrings("abcdef", "abcdef", 2), Some(0));
 }
 
 #[test]
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
@@ -47,7 +47,7 @@ pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
 use rustc_data_structures::stable_hasher::HashingControls;
 pub mod def_id;
 use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
-pub mod lev_distance;
+pub mod edit_distance;
 mod span_encoding;
 pub use span_encoding::{Span, DUMMY_SP};