Skip to content

Commit dd25c4f

Browse files
Address review comments
1 parent 66b91bd commit dd25c4f

2 files changed

Lines changed: 43 additions & 25 deletions

File tree

library/alloc/src/str.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,8 @@ impl str {
510510
/// Since some characters can expand into multiple characters when case folding,
511511
/// this function returns a [`String`] instead of modifying the parameter in-place.
512512
///
513-
/// This function does not perform any normalization (e.g. NFC).
513+
/// This function does not perform any [normalization] (e.g. NFC),
514+
/// so semantically and visually identical strings may compare unequal.
514515
///
515516
/// Like [`char::to_casefold()`] this method does not handle language-specific
516517
/// casing, like Turkish and Azeri I/ı/İ/i. See that method's documentation
@@ -551,7 +552,7 @@ impl str {
551552
/// assert_eq!(s0.to_casefold(), "tschüss");
552553
/// ```
553554
///
554-
/// No NFC normalization is performed:
555+
/// No NFC [normalization] is performed:
555556
///
556557
/// ```rust
557558
/// #![feature(casefold)]
@@ -569,6 +570,8 @@ impl str {
569570
/// assert_eq!(comp.to_casefold(), "\u{E1}");
570571
/// assert_eq!(decomp.to_casefold(), "a\u{0301}");
571572
/// ```
573+
///
574+
/// [normalization]: https://www.unicode.org/faq/normalization
572575
#[cfg(not(no_global_oom_handling))]
573576
#[rustc_allow_incoherent_impl]
574577
#[must_use = "this returns the case-folded string as a new String, \

library/core/src/char/methods.rs

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,16 +1075,17 @@ impl char {
10751075
}
10761076

10771077
/// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1078-
/// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ),
1078+
/// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
10791079
/// which has two lowercase forms.
10801080
///
10811081
/// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
1082-
/// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
1083-
/// see those resources for more information.
1082+
/// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1083+
/// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
10841084
///
10851085
/// [D136]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
10861086
/// [ucd]: https://www.unicode.org/reports/tr44/
10871087
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1088+
/// [`to_lowercase()`]: Self::to_lowercase()
10881089
#[must_use]
10891090
#[inline]
10901091
#[unstable(feature = "case_ignorable", issue = "154848")]
@@ -1154,8 +1155,6 @@ impl char {
11541155
/// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
11551156
/// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
11561157
///
1157-
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1158-
///
11591158
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
11601159
/// is independent of context and language. See [below](#notes-on-context-and-locale)
11611160
/// for more information.
@@ -1210,28 +1209,39 @@ impl char {
12101209
///
12111210
/// ## Greek sigma
12121211
///
1213-
/// In Greek, the letter simga (uppercase Σ) has two lowercase forms:
1214-
/// ς which is used only at the end of a word, and σ which is used everywhere else.
1215-
/// `to_lowercase()` always uses the second form:
1212+
/// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
1213+
/// 'σ' which is used in most situations, and 'ς' which appears only
1214+
/// at the end of a word. [`char::to_lowercase()`] always uses the first form:
12161215
///
12171216
/// ```
12181217
/// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
12191218
/// ```
12201219
///
1220+
/// `str::to_lowercase()` (only available with the `alloc` crate)
1221+
/// *does* properly handle this contextual mapping,
1222+
/// so prefer using that method if you can. Alternatively, you can use
1223+
/// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1224+
/// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1225+
/// along with [`SpecialCasing.txt`], for more details.
1226+
///
1227+
/// [`is_cased()`]: Self::is_cased()
1228+
/// [`is_case_ignorable()`]: Self::is_case_ignorable()
1229+
/// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1230+
///
12211231
/// ## Turkish and Azeri I/ı/İ/i
12221232
///
12231233
/// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
12241234
///
12251235
/// * 'Dotless': I / ı, sometimes written ï
12261236
/// * 'Dotted': İ / i
12271237
///
1228-
/// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
1238+
/// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
12291239
///
12301240
/// ```
12311241
/// let lower_i = 'I'.to_lowercase().to_string();
12321242
/// ```
12331243
///
1234-
/// The value of `lower_i` here relies on the language of the text: if we're
1244+
/// `'I'`'s correct lowercase relies on the language of the text: if we're
12351245
/// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
12361246
/// be `"ı"`. `to_lowercase()` does not take this into account, and so:
12371247
///
@@ -1242,6 +1252,8 @@ impl char {
12421252
/// ```
12431253
///
12441254
/// holds across languages.
1255+
///
1256+
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
12451257
#[must_use = "this returns the lowercased character as a new iterator, \
12461258
without modifying the original"]
12471259
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1392,22 +1404,22 @@ impl char {
13921404
/// As stated above, this method is locale-insensitive.
13931405
/// If you need locale support, consider using an external crate,
13941406
/// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1395-
/// which is developed by Unicode. A description of a common
1396-
/// locale-dependent casing issue follows:
1407+
/// which is developed by Unicode. A description of one common
1408+
/// locale-dependent casing issue follows (there are others):
13971409
///
13981410
/// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
13991411
///
14001412
/// * 'Dotless': I / ı, sometimes written ï
14011413
/// * 'Dotted': İ / i
14021414
///
1403-
/// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1415+
/// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
14041416
///
14051417
/// ```
14061418
/// #![feature(titlecase)]
14071419
/// let upper_i = 'i'.to_titlecase().to_string();
14081420
/// ```
14091421
///
1410-
/// The value of `upper_i` here relies on the language of the text: if we're
1422+
/// `'i'`'s correct titlecase relies on the language of the text: if we're
14111423
/// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
14121424
/// be `"İ"`. `to_titlecase()` does not take this into account, and so:
14131425
///
@@ -1504,21 +1516,21 @@ impl char {
15041516
/// As stated above, this method is locale-insensitive.
15051517
/// If you need locale support, consider using an external crate,
15061518
/// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1507-
/// which is developed by Unicode. A description of a common
1508-
/// locale-dependent casing issue follows:
1519+
/// which is developed by Unicode. A description of one common
1520+
/// locale-dependent casing issue follows (there are others):
15091521
///
15101522
/// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
15111523
///
15121524
/// * 'Dotless': I / ı, sometimes written ï
15131525
/// * 'Dotted': İ / i
15141526
///
1515-
/// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1527+
/// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
15161528
///
15171529
/// ```
15181530
/// let upper_i = 'i'.to_uppercase().to_string();
15191531
/// ```
15201532
///
1521-
/// The value of `upper_i` here relies on the language of the text: if we're
1533+
/// `'i'`'s correct uppercase relies on the language of the text: if we're
15221534
/// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
15231535
/// be `"İ"`. `to_uppercase()` does not take this into account, and so:
15241536
///
@@ -1542,20 +1554,23 @@ impl char {
15421554
/// Returns an iterator that yields the case folding of this `char` as one or more
15431555
/// `char`s.
15441556
///
1545-
/// Case folding is meant to be used when performing case-insensitive string comparisons,
1546-
/// but case-folded strings should not generally be exposed directly to users. For most,
1557+
/// Case folding is meant to be used when performing case-insensitive string comparisons.
1558+
/// Case-folded strings should not usually be exposed directly to users. For most,
15471559
/// but not all, characters, the casefold mapping is identical to the lowercase one.
15481560
///
15491561
/// This iterator yields the `char`(s) in the common or full case folding for this `char`,
15501562
/// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1563+
/// The maximum number of `char`s in a case folding is 3.
15511564
///
15521565
/// [ucd]: https://www.unicode.org/reports/tr44/
15531566
/// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
15541567
///
15551568
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
15561569
/// is independent of context and language.
15571570
///
1558-
/// It also does not perform any normalization (e.g. NFC).
1571+
/// It also does not perform any [normalization] (e.g. NFC).
1572+
///
1573+
/// [normalization]: https://www.unicode.org/faq/normalization
15591574
///
15601575
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
15611576
/// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
@@ -1591,14 +1606,14 @@ impl char {
15911606
/// * 'Dotless': I / ı, sometimes written ï
15921607
/// * 'Dotted': İ / i
15931608
///
1594-
/// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
1609+
/// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
15951610
///
15961611
/// ```
15971612
/// #![feature(casefold)]
15981613
/// let casefold_i = 'I'.to_casefold().to_string();
15991614
/// ```
16001615
///
1601-
/// The value of `casefold_i` here relies on the language of the text: if we're
1616+
/// `'I'`'s correct case folding relies on the language of the text: if we're
16021617
/// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
16031618
/// be `"ı"`. `to_casefold()` does not take this into account, and so:
16041619
///

0 commit comments

Comments
 (0)