Skip to content

Commit 0b098ce

Browse files
author
snsmac
committed
Syntax: Make \p{Sc} work
'sc' refers to the 'Currency_Symbol' general category, but is also the abbreviation for the 'Script' property. Fixes rust-lang#835 Related rust-lang#719 b1489c8
1 parent 159a63c commit 0b098ce

File tree

2 files changed

+36
-33
lines changed

2 files changed

+36
-33
lines changed

regex-syntax/src/unicode.rs

+35-33
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ impl fmt::Display for UnicodeWordError {
8080
/// This returns an error if the Unicode case folding tables are not available.
8181
pub fn simple_fold(
8282
c: char,
83-
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
83+
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>> {
8484
#[cfg(not(feature = "unicode-case"))]
8585
fn imp(
8686
_: char,
87-
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
87+
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>>
8888
{
8989
use std::option::IntoIter;
9090
Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
@@ -93,7 +93,7 @@ pub fn simple_fold(
9393
#[cfg(feature = "unicode-case")]
9494
fn imp(
9595
c: char,
96-
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
96+
) -> FoldResult<result::Result<impl Iterator<Item=char>, Option<char>>>
9797
{
9898
use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
9999

@@ -220,7 +220,7 @@ impl<'a> ClassQuery<'a> {
220220
let canon_val =
221221
match canonical_value(vals, &property_value) {
222222
None => {
223-
return Err(Error::PropertyValueNotFound)
223+
return Err(Error::PropertyValueNotFound);
224224
}
225225
Some(canon_val) => canon_val,
226226
};
@@ -243,7 +243,9 @@ impl<'a> ClassQuery<'a> {
243243
// a general category. (Currently, we don't even support the
244244
// 'Case_Folding' property. But if we do in the future, users will be
245245
// required to spell it out.)
246-
if norm != "cf" {
246+
// 'sc' refers to the 'Currency_Symbol' general category, but is also
247+
// the abbreviation for the 'Script' property.
248+
if norm != "cf" && norm != "sc" {
247249
if let Some(canon) = canonical_prop(&norm)? {
248250
return Ok(CanonicalClassQuery::Binary(canon));
249251
}
@@ -462,24 +464,24 @@ fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
462464
/// If the property names data is not available, then an error is returned.
463465
fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
464466
#[cfg(not(any(
465-
feature = "unicode-age",
466-
feature = "unicode-bool",
467-
feature = "unicode-gencat",
468-
feature = "unicode-perl",
469-
feature = "unicode-script",
470-
feature = "unicode-segment",
467+
feature = "unicode-age",
468+
feature = "unicode-bool",
469+
feature = "unicode-gencat",
470+
feature = "unicode-perl",
471+
feature = "unicode-script",
472+
feature = "unicode-segment",
471473
)))]
472474
fn imp(_: &str) -> Result<Option<&'static str>> {
473475
Err(Error::PropertyNotFound)
474476
}
475477

476478
#[cfg(any(
477-
feature = "unicode-age",
478-
feature = "unicode-bool",
479-
feature = "unicode-gencat",
480-
feature = "unicode-perl",
481-
feature = "unicode-script",
482-
feature = "unicode-segment",
479+
feature = "unicode-age",
480+
feature = "unicode-bool",
481+
feature = "unicode-gencat",
482+
feature = "unicode-perl",
483+
feature = "unicode-script",
484+
feature = "unicode-segment",
483485
))]
484486
fn imp(name: &str) -> Result<Option<&'static str>> {
485487
use crate::unicode_tables::property_names::PROPERTY_NAMES;
@@ -519,24 +521,24 @@ fn property_values(
519521
canonical_property_name: &'static str,
520522
) -> Result<Option<PropertyValues>> {
521523
#[cfg(not(any(
522-
feature = "unicode-age",
523-
feature = "unicode-bool",
524-
feature = "unicode-gencat",
525-
feature = "unicode-perl",
526-
feature = "unicode-script",
527-
feature = "unicode-segment",
524+
feature = "unicode-age",
525+
feature = "unicode-bool",
526+
feature = "unicode-gencat",
527+
feature = "unicode-perl",
528+
feature = "unicode-script",
529+
feature = "unicode-segment",
528530
)))]
529531
fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
530532
Err(Error::PropertyValueNotFound)
531533
}
532534

533535
#[cfg(any(
534-
feature = "unicode-age",
535-
feature = "unicode-bool",
536-
feature = "unicode-gencat",
537-
feature = "unicode-perl",
538-
feature = "unicode-script",
539-
feature = "unicode-segment",
536+
feature = "unicode-age",
537+
feature = "unicode-bool",
538+
feature = "unicode-gencat",
539+
feature = "unicode-perl",
540+
feature = "unicode-script",
541+
feature = "unicode-segment",
540542
))]
541543
fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
542544
use crate::unicode_tables::property_values::PROPERTY_VALUES;
@@ -569,15 +571,15 @@ fn property_set(
569571
///
570572
/// If the given age value isn't valid or if the data isn't available, then an
571573
/// error is returned instead.
572-
fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
574+
fn ages(canonical_age: &str) -> Result<impl Iterator<Item=Range>> {
573575
#[cfg(not(feature = "unicode-age"))]
574-
fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
576+
fn imp(_: &str) -> Result<impl Iterator<Item=Range>> {
575577
use std::option::IntoIter;
576578
Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
577579
}
578580

579581
#[cfg(feature = "unicode-age")]
580-
fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
582+
fn imp(canonical_age: &str) -> Result<impl Iterator<Item=Range>> {
581583
use crate::unicode_tables::age;
582584

583585
const AGES: &[(&str, Range)] = &[
@@ -878,7 +880,7 @@ mod tests {
878880
};
879881

880882
#[cfg(feature = "unicode-case")]
881-
fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
883+
fn simple_fold_ok(c: char) -> impl Iterator<Item=char> {
882884
simple_fold(c).unwrap().unwrap()
883885
}
884886

tests/unicode.rs

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
7777
// See: https://github.com/rust-lang/regex/issues/719
7878
mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
7979
mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
80+
mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1)));
8081
mat!(
8182
uni_class_gencat_initial_punctuation,
8283
r"\p{Initial_Punctuation}",

0 commit comments

Comments
 (0)