Skip to content

Commit d8c2956

Browse files
author
BO41
committed
Improve docs on some char boolean methods
1 parent 0358617 commit d8c2956

File tree

1 file changed

+109
-63
lines changed

1 file changed

+109
-63
lines changed

src/libcore/char/methods.rs

+109-63
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ impl char {
116116

117117
// the code is split up here to improve execution speed for cases where
118118
// the `radix` is constant and 10 or smaller
119-
let val = if radix <= 10 {
119+
let val = if radix <= 10 {
120120
match self {
121-
'0' ..= '9' => self as u32 - '0' as u32,
121+
'0'..='9' => self as u32 - '0' as u32,
122122
_ => return None,
123123
}
124124
} else {
@@ -130,8 +130,11 @@ impl char {
130130
}
131131
};
132132

133-
if val < radix { Some(val) }
134-
else { None }
133+
if val < radix {
134+
Some(val)
135+
} else {
136+
None
137+
}
135138
}
136139

137140
/// Returns an iterator that yields the hexadecimal Unicode escape of a
@@ -303,8 +306,8 @@ impl char {
303306
'\r' => EscapeDefaultState::Backslash('r'),
304307
'\n' => EscapeDefaultState::Backslash('n'),
305308
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
306-
'\x20' ..= '\x7e' => EscapeDefaultState::Char(self),
307-
_ => EscapeDefaultState::Unicode(self.escape_unicode())
309+
'\x20'..='\x7e' => EscapeDefaultState::Char(self),
310+
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
308311
};
309312
EscapeDefault { state: init_state }
310313
}
@@ -436,30 +439,31 @@ impl char {
436439
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
437440
let code = self as u32;
438441
unsafe {
439-
let len =
440-
if code < MAX_ONE_B && !dst.is_empty() {
442+
let len = if code < MAX_ONE_B && !dst.is_empty() {
441443
*dst.get_unchecked_mut(0) = code as u8;
442444
1
443445
} else if code < MAX_TWO_B && dst.len() >= 2 {
444446
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
445447
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
446448
2
447-
} else if code < MAX_THREE_B && dst.len() >= 3 {
449+
} else if code < MAX_THREE_B && dst.len() >= 3 {
448450
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
449-
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
451+
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
450452
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
451453
3
452454
} else if dst.len() >= 4 {
453455
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
454456
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
455-
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
457+
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
456458
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
457459
4
458460
} else {
459-
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
461+
panic!(
462+
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
460463
from_u32_unchecked(code).len_utf8(),
461464
code,
462-
dst.len())
465+
dst.len(),
466+
)
463467
};
464468
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
465469
}
@@ -515,15 +519,24 @@ impl char {
515519
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
516520
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
517521
} else {
518-
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
522+
panic!(
523+
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
519524
from_u32_unchecked(code).len_utf16(),
520525
code,
521-
dst.len())
526+
dst.len(),
527+
)
522528
}
523529
}
524530
}
525531

526-
/// Returns `true` if this `char` is an alphabetic code point, and false if not.
532+
/// Returns `true` if this `char` has the `Alphabetic` property.
533+
///
534+
/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
535+
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
536+
///
537+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
538+
/// [ucd]: https://www.unicode.org/reports/tr44/
539+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
527540
///
528541
/// # Examples
529542
///
@@ -547,10 +560,14 @@ impl char {
547560
}
548561
}
549562

550-
/// Returns `true` if this `char` is lowercase.
563+
/// Returns `true` if this `char` has the `Lowercase` property.
551564
///
552-
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core
553-
/// Property `Lowercase`.
565+
/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
566+
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
567+
///
568+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
569+
/// [ucd]: https://www.unicode.org/reports/tr44/
570+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
554571
///
555572
/// # Examples
556573
///
@@ -575,10 +592,14 @@ impl char {
575592
}
576593
}
577594

578-
/// Returns `true` if this `char` is uppercase.
595+
/// Returns `true` if this `char` has the `Uppercase` property.
596+
///
597+
/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
598+
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
579599
///
580-
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core
581-
/// Property `Uppercase`.
600+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
601+
/// [ucd]: https://www.unicode.org/reports/tr44/
602+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
582603
///
583604
/// # Examples
584605
///
@@ -603,10 +624,12 @@ impl char {
603624
}
604625
}
605626

606-
/// Returns `true` if this `char` is whitespace.
627+
/// Returns `true` if this `char` has the `White_Space` property.
607628
///
608-
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
609-
/// Property `White_Space`.
629+
/// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
630+
///
631+
/// [ucd]: https://www.unicode.org/reports/tr44/
632+
/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
610633
///
611634
/// # Examples
612635
///
@@ -630,10 +653,10 @@ impl char {
630653
}
631654
}
632655

633-
/// Returns `true` if this `char` is alphanumeric.
656+
/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
634657
///
635-
/// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
636-
/// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
658+
/// [`is_alphabetic()`]: #method.is_alphabetic
659+
/// [`is_numeric()`]: #method.is_numeric
637660
///
638661
/// # Examples
639662
///
@@ -655,10 +678,15 @@ impl char {
655678
self.is_alphabetic() || self.is_numeric()
656679
}
657680

658-
/// Returns `true` if this `char` is a control code point.
681+
/// Returns `true` if this `char` has the general category for control codes.
682+
///
683+
/// Control codes (code points with the general category of `Cc`) are described in Chapter 4
684+
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
685+
/// Database][ucd] [`UnicodeData.txt`].
659686
///
660-
/// 'Control code point' is defined in terms of the Unicode General
661-
/// Category `Cc`.
687+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
688+
/// [ucd]: https://www.unicode.org/reports/tr44/
689+
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
662690
///
663691
/// # Examples
664692
///
@@ -675,19 +703,29 @@ impl char {
675703
general_category::Cc(self)
676704
}
677705

678-
/// Returns `true` if this `char` is an extended grapheme character.
706+
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
679707
///
680-
/// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
681-
/// Category `Grapheme_Extend`.
708+
/// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
709+
/// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
710+
/// [`DerivedCoreProperties.txt`].
711+
///
712+
/// [uax29]: https://www.unicode.org/reports/tr29/
713+
/// [ucd]: https://www.unicode.org/reports/tr44/
714+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
682715
#[inline]
683716
pub(crate) fn is_grapheme_extended(self) -> bool {
684717
derived_property::Grapheme_Extend(self)
685718
}
686719

687-
/// Returns `true` if this `char` is numeric.
720+
/// Returns `true` if this `char` has one of the general categories for numbers.
721+
///
722+
/// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
723+
/// characters, and `No` for other numeric characters) are specified in the [Unicode Character
724+
/// Database][ucd] [`UnicodeData.txt`].
688725
///
689-
/// 'Numeric'-ness is defined in terms of the Unicode General Categories
690-
/// `Nd`, `Nl`, `No`.
726+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
727+
/// [ucd]: https://www.unicode.org/reports/tr44/
728+
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
691729
///
692730
/// # Examples
693731
///
@@ -713,25 +751,29 @@ impl char {
713751
}
714752
}
715753

716-
/// Returns an iterator that yields the lowercase equivalent of a `char`
717-
/// as one or more `char`s.
754+
/// Returns an iterator that yields the lowercase mapping of this `char` as one or more
755+
/// `char`s.
718756
///
719-
/// If a character does not have a lowercase equivalent, the same character
720-
/// will be returned back by the iterator.
757+
/// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
721758
///
722-
/// This performs complex unconditional mappings with no tailoring: it maps
723-
/// one Unicode character to its lowercase equivalent according to the
724-
/// [Unicode database] and the additional complex mappings
725-
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
726-
/// language) are not considered here.
759+
/// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
760+
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
727761
///
728-
/// For a full reference, see [here][reference].
762+
/// [ucd]: https://www.unicode.org/reports/tr44/
763+
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
729764
///
730-
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
765+
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
766+
/// the `char`(s) given by [`SpecialCasing.txt`].
731767
///
732-
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
768+
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
733769
///
734-
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
770+
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
771+
/// is independent of context and language.
772+
///
773+
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
774+
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
775+
///
776+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
735777
///
736778
/// # Examples
737779
///
@@ -774,25 +816,29 @@ impl char {
774816
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
775817
}
776818

777-
/// Returns an iterator that yields the uppercase equivalent of a `char`
778-
/// as one or more `char`s.
819+
/// Returns an iterator that yields the uppercase mapping of this `char` as one or more
820+
/// `char`s.
821+
///
822+
/// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
823+
///
824+
/// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
825+
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
779826
///
780-
/// If a character does not have an uppercase equivalent, the same character
781-
/// will be returned back by the iterator.
827+
/// [ucd]: https://www.unicode.org/reports/tr44/
828+
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
782829
///
783-
/// This performs complex unconditional mappings with no tailoring: it maps
784-
/// one Unicode character to its uppercase equivalent according to the
785-
/// [Unicode database] and the additional complex mappings
786-
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
787-
/// language) are not considered here.
830+
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
831+
/// the `char`(s) given by [`SpecialCasing.txt`].
788832
///
789-
/// For a full reference, see [here][reference].
833+
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
790834
///
791-
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
835+
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
836+
/// is independent of context and language.
792837
///
793-
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
838+
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
839+
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
794840
///
795-
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
841+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
796842
///
797843
/// # Examples
798844
///

0 commit comments

Comments
 (0)