@@ -1075,16 +1075,17 @@ impl char {
10751075 }
10761076
10771077 /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1078- /// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ ),
1078+ /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ' ),
10791079 /// which has two lowercase forms.
10801080 ///
10811081 /// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
1082- /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
1083- /// see those resources for more information.
1082+ /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1083+ /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
10841084 ///
10851085 /// [D136]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
10861086 /// [ucd]: https://www.unicode.org/reports/tr44/
10871087 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1088+ /// [`to_lowercase()`]: Self::to_lowercase()
10881089 #[ must_use]
10891090 #[ inline]
10901091 #[ unstable( feature = "case_ignorable" , issue = "154848" ) ]
@@ -1154,8 +1155,6 @@ impl char {
11541155 /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
11551156 /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
11561157 ///
1157- /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1158- ///
11591158 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
11601159 /// is independent of context and language. See [below](#notes-on-context-and-locale)
11611160 /// for more information.
@@ -1210,28 +1209,39 @@ impl char {
12101209 ///
12111210 /// ## Greek sigma
12121211 ///
1213- /// In Greek, the letter simga (uppercase Σ ) has two lowercase forms:
1214- /// ς which is used only at the end of a word , and σ which is used everywhere else.
1215- /// ` to_lowercase()` always uses the second form:
1212+ /// In Greek, the letter simga (uppercase 'Σ' ) has two lowercase forms:
1213+ /// 'σ' which is used in most situations , and 'ς' which appears only
1214+ /// at the end of a word. [`char:: to_lowercase()`] always uses the first form:
12161215 ///
12171216 /// ```
12181217 /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
12191218 /// ```
12201219 ///
1220+ /// `str::to_lowercase()` (only available with the `alloc` crate)
1221+ /// *does* properly handle this contextual mapping,
1222+ /// so prefer using that method if you can. Alternatively, you can use
1223+ /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1224+ /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1225+ /// along with [`SpecialCasing.txt`], for more details.
1226+ ///
1227+ /// [`is_cased()`]: Self::is_cased()
1228+ /// [`is_case_ignorable()`]: Self::is_case_ignorable()
1229+ /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1230+ ///
12211231 /// ## Turkish and Azeri I/ı/İ/i
12221232 ///
12231233 /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
12241234 ///
12251235 /// * 'Dotless': I / ı, sometimes written ï
12261236 /// * 'Dotted': İ / i
12271237 ///
1228- /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
1238+ /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
12291239 ///
12301240 /// ```
12311241 /// let lower_i = 'I'.to_lowercase().to_string();
12321242 /// ```
12331243 ///
1234- /// The value of `lower_i` here relies on the language of the text: if we're
1244+ /// `'I'`'s correct lowercase relies on the language of the text: if we're
12351245 /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
12361246 /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
12371247 ///
@@ -1242,6 +1252,8 @@ impl char {
12421252 /// ```
12431253 ///
12441254 /// holds across languages.
1255+ ///
1256+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
12451257 #[ must_use = "this returns the lowercased character as a new iterator, \
12461258 without modifying the original"]
12471259 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
@@ -1392,22 +1404,22 @@ impl char {
13921404 /// As stated above, this method is locale-insensitive.
13931405 /// If you need locale support, consider using an external crate,
13941406 /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1395- /// which is developed by Unicode. A description of a common
1396- /// locale-dependent casing issue follows:
1407+ /// which is developed by Unicode. A description of one common
1408+ /// locale-dependent casing issue follows (there are others) :
13971409 ///
13981410 /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
13991411 ///
14001412 /// * 'Dotless': I / ı, sometimes written ï
14011413 /// * 'Dotted': İ / i
14021414 ///
1403- /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1415+ /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
14041416 ///
14051417 /// ```
14061418 /// #![feature(titlecase)]
14071419 /// let upper_i = 'i'.to_titlecase().to_string();
14081420 /// ```
14091421 ///
1410- /// The value of `upper_i` here relies on the language of the text: if we're
1422+ /// `'i'`'s correct titlecase relies on the language of the text: if we're
14111423 /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
14121424 /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
14131425 ///
@@ -1504,21 +1516,21 @@ impl char {
15041516 /// As stated above, this method is locale-insensitive.
15051517 /// If you need locale support, consider using an external crate,
15061518 /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1507- /// which is developed by Unicode. A description of a common
1508- /// locale-dependent casing issue follows:
1519+ /// which is developed by Unicode. A description of one common
1520+ /// locale-dependent casing issue follows (there are others) :
15091521 ///
15101522 /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
15111523 ///
15121524 /// * 'Dotless': I / ı, sometimes written ï
15131525 /// * 'Dotted': İ / i
15141526 ///
1515- /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1527+ /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
15161528 ///
15171529 /// ```
15181530 /// let upper_i = 'i'.to_uppercase().to_string();
15191531 /// ```
15201532 ///
1521- /// The value of `upper_i` here relies on the language of the text: if we're
1533+ /// `'i'`'s correct uppercase relies on the language of the text: if we're
15221534 /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
15231535 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
15241536 ///
@@ -1542,20 +1554,23 @@ impl char {
15421554 /// Returns an iterator that yields the case folding of this `char` as one or more
15431555 /// `char`s.
15441556 ///
1545- /// Case folding is meant to be used when performing case-insensitive string comparisons,
1546- /// but case -folded strings should not generally be exposed directly to users. For most,
1557+ /// Case folding is meant to be used when performing case-insensitive string comparisons.
1558+ /// Case -folded strings should not usually be exposed directly to users. For most,
15471559 /// but not all, characters, the casefold mapping is identical to the lowercase one.
15481560 ///
15491561 /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
15501562 /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1563+ /// The maximum number of `char`s in a case folding is 3.
15511564 ///
15521565 /// [ucd]: https://www.unicode.org/reports/tr44/
15531566 /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
15541567 ///
15551568 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
15561569 /// is independent of context and language.
15571570 ///
1558- /// It also does not perform any normalization (e.g. NFC).
1571+ /// It also does not perform any [normalization] (e.g. NFC).
1572+ ///
1573+ /// [normalization]: https://www.unicode.org/faq/normalization
15591574 ///
15601575 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
15611576 /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
@@ -1591,14 +1606,14 @@ impl char {
15911606 /// * 'Dotless': I / ı, sometimes written ï
15921607 /// * 'Dotted': İ / i
15931608 ///
1594- /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
1609+ /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
15951610 ///
15961611 /// ```
15971612 /// #![feature(casefold)]
15981613 /// let casefold_i = 'I'.to_casefold().to_string();
15991614 /// ```
16001615 ///
1601- /// The value of `casefold_i` here relies on the language of the text: if we're
1616+ /// `'I'`'s correct case folding relies on the language of the text: if we're
16021617 /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
16031618 /// be `"ı"`. `to_casefold()` does not take this into account, and so:
16041619 ///
0 commit comments