From 52423fcadb56e92364107ce183c2715a181a08af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Mon, 30 Jan 2023 07:50:36 -0800 Subject: [PATCH 1/2] ucptest: regenerate testoutput Last sync with 1a5fcd (Remove unused variables in ucptest.c and update test data for added properties, 2022-04-25), and showing significant differences. --- maint/ucptestdata/testoutput1 | 16 +- maint/ucptestdata/testoutput2 | 318 +++++++++++++++++++++++++--------- 2 files changed, 246 insertions(+), 88 deletions(-) diff --git a/maint/ucptestdata/testoutput1 b/maint/ucptestdata/testoutput1 index 469be7dc7..c4d461855 100644 --- a/maint/ucptestdata/testoutput1 +++ b/maint/ucptestdata/testoutput1 @@ -300,18 +300,18 @@ U+FFED ON Symbol: Other symbol, common, Other, [alphabetic, caseignorable, exte U+FFEE ON Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] U+FFEF L Control: Unassigned, unknown, Other findprop fff8 fff9 fffa fffb fffc fffd fffe ffff -U+FFF8 BN Control: Unassigned, unknown, Control, [dash, defaultignorablecodepoint, deprecated, extendedpictographic, joincontrol, lowercase, patternwhitespace, quotationmark, sentenceterminal, softdotted, xidcontinue, xidstart] -U+FFF9 ON Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart] -U+FFFA ON Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart] -U+FFFB ON Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart] +U+FFF8 BN Control: Unassigned, unknown, Control, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, patternsyntax] +U+FFF9 ON Control: Format, common, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+FFFA ON Control: Format, common, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+FFFB ON Control: Format, common, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] U+FFFC ON Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] U+FFFD ON Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] -U+FFFE BN Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+FFFF BN Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] +U+FFFE BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] findprop 10000 10001 e01ef f0000 100000 U+10000 L Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue] U+10001 L Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue] -U+E01EF NSM Mark: Non-spacing mark, inherited, Extend, [] +U+E01EF NSM Mark: Non-spacing mark, inherited, Extend, [ascii, alphabetic, cased, emojicomponent] U+F0000 L Control: Private use, unknown, Other U+100000 L Control: Private use, unknown, Other @@ -391,7 +391,7 @@ findprop 32ff U+32FF L Symbol: Other symbol, common, Other, [han], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] findprop 1f16d -U+1F16D ON Symbol: Other symbol, common, Extended Pictographic, [ascii, sentenceterminal, unifiedideograph, whitespace, xidcontinue] +U+1F16D ON Symbol: Other symbol, common, Extended Pictographic, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, regionalindicator, xidcontinue, xidstart] findprop U+10e93 U+10eaa U+10E93 R Letter: Other letter, yezidi, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue] diff --git a/maint/ucptestdata/testoutput2 b/maint/ucptestdata/testoutput2 index 088e80202..532d67433 100644 --- a/maint/ucptestdata/testoutput2 +++ b/maint/ucptestdata/testoutput2 @@ -1,52 +1,52 @@ find script Han -U+2E80..U+2E99 ON Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart] -U+2E9B..U+2EF3 ON Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart] -U+2F00..U+2FD5 ON Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart] +U+2E80..U+2E99 ON Symbol: Other symbol, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, regionalindicator, xidcontinue, xidstart] +U+2E9B..U+2EF3 ON Symbol: Other symbol, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, regionalindicator, xidcontinue, xidstart] +U+2F00..U+2FD5 ON Symbol: Other symbol, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, regionalindicator, xidcontinue, xidstart] U+3005 L Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax] - U+3007 L Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+3021..U+3029 L Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+3038..U+303A L Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] + U+3007 L Number: Letter number, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+3021..U+3029 L Number: Letter number, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+3038..U+303A L Number: Letter number, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] U+303B L Letter: Modifier letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart] -U+3400..U+4DBF L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+4E00..U+9FFF L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+F900..U+FA0D L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+FA0E..U+FA0F L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] - U+FA10 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] - U+FA11 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] - U+FA12 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+FA13..U+FA14 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+FA15..U+FA1E L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] - U+FA1F L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] - U+FA20 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] - U+FA21 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] - U+FA22 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+FA23..U+FA24 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+FA25..U+FA26 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+FA27..U+FA29 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+FA2A..U+FA6D L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+FA70..U+FAD9 L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] +U+3400..U+4DBF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+4E00..U+9FFF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+F900..U+FA0D L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+FA0E..U+FA0F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA10 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] + U+FA11 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA12 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+FA13..U+FA14 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA15..U+FA1E L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] + U+FA1F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA20 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] + U+FA21 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA22 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+FA23..U+FA24 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA25..U+FA26 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+FA27..U+FA29 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA2A..U+FA6D L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+FA70..U+FAD9 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] U+16FE2 ON Punctuation: Other punctuation, han, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] U+16FE3 L Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+16FF0..U+16FF1 L Mark: Spacing mark, han, SpacingMark, [caseignorable, graphemeextend, idcontinue, ideographic, xidcontinue] -U+20000..U+2A6DF L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+2A700..U+2B738 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+2B740..U+2B81D L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+2B820..U+2CEA1 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+2CEB0..U+2EBE0 L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+2F800..U+2FA1D L Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart] -U+30000..U+3134A L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] +U+20000..U+2A6DF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2A700..U+2B738 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B740..U+2B81D L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B820..U+2CEA1 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2CEB0..U+2EBE0 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2F800..U+2FA1D L Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, quotationmark, regionalindicator, xidcontinue, xidstart] +U+30000..U+3134A L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] find type Pe script Common scriptx Hangul -U+3009 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+300B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] +U+3009 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+300B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] U+300D ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation] U+300F ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation] -U+3011 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+3015 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+3017 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+3019 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+301B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase] -U+301E..U+301F ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [softdotted, terminalpunctuation, unifiedideograph, xidcontinue, xidstart] - U+FF63 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, emojimodifier, emojimodifierbase] +U+3011 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+3015 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+3017 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+3019 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+301B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [softdotted, terminalpunctuation, unifiedideograph, whitespace, xidcontinue] +U+301E..U+301F ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [dash, defaultignorablecodepoint, emojimodifier, emojipresentation, joincontrol, lowercase, patternwhitespace, quotationmark, regionalindicator, softdotted, xidcontinue, xidstart] + U+FF63 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidicontrol, bidimirrored, caseignorable, sentenceterminal, unifiedideograph, xidcontinue, xidstart] find type Sk U+005E ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart] U+0060 ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart] @@ -70,7 +70,7 @@ U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other, [alphabetic, cased, gr U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] -U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana], [alphabetic, bidimirrored, caseignorable, cased, changeswhencasefolded, changeswhenlowercased, changeswhentitlecased, changeswhenuppercased, dash, defaultignorablecodepoint, deprecated, diacritic, emoji, emojicomponent, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, extender, graphemebase, graphemeextend, graphemelink, hexdigit, idsbinaryoperator, idstrinaryoperator, idcontinue, idstart, ideographic, sentenceterminal, unifiedideograph, whitespace, xidcontinue] +U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana], [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [latin, han], [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+A708..U+A716 ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+A720..U+A721 ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] @@ -78,28 +78,28 @@ U+A789..U+A78A L Symbol: Modifier symbol, common, Other, [alphabetic, cased, g U+AB5B L Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, math, softdotted, xidcontinue, xidstart] - U+FF3E ON Symbol: Modifier symbol, common, Other, [asciihexdigit, bidicontrol, bidimirrored, cased, changeswhencasefolded, sentenceterminal, unifiedideograph, whitespace, xidstart] + U+FF3E ON Symbol: Modifier symbol, common, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] U+FF40 ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] U+FFE3 ON Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart] -U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternsyntax, radical, sentenceterminal, terminalpunctuation] +U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend, [ascii, asciihexdigit, alphabetic, bidicontrol, bidimirrored, caseignorable, changeswhenlowercased, changeswhenuppercased, defaultignorablecodepoint, sentenceterminal, unifiedideograph, xidcontinue, xidstart] find type Pd U+002D ES Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart] U+058A ON Punctuation: Dash punctuation, armenian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+05BE R Punctuation: Dash punctuation, hebrew, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+1806 ON Punctuation: Dash punctuation, mongolian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] -U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+2E17 ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+2E1A ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] -U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+2E40 ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+2E5D ON Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+301C ON Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart] - U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+2E17 ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+2E1A ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] +U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+2E40 ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+2E5D ON Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+301C ON Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [ascii, alphabetic, cased, changeswhenlowercased, dash, emojimodifier, emojimodifierbase] + U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [sentenceterminal, unifiedideograph, whitespace, xidcontinue] U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana], [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+FE58 ON Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] - U+FE63 ES Punctuation: Dash punctuation, common, Other, [caseignorable, sentenceterminal, unifiedideograph, xidcontinue] + U+FE63 ES Punctuation: Dash punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, extendedpictographic, quotationmark, sentenceterminal, xidcontinue, xidstart] U+FF0D ES Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] U+10EAD R Punctuation: Dash punctuation, yezidi, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax] find gbreak LVT @@ -220,12 +220,12 @@ U+060C CS Punctuation: Other punctuation, common, Other, [arabic, syriac, thaan U+202F CS Separator: Space separator, common, Other, [latin, mongolian], [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase] U+2044 CS Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue] U+FE50 CS Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation] -U+FE52 CS Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] -U+FE55 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FE52 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FE55 CS Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, caseignorable, changeswhencasefolded, sentenceterminal, unifiedideograph, xidcontinue, xidstart] U+FF0C CS Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation] -U+FF0E CS Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart] +U+FF0E CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] U+FF0F CS Punctuation: Other punctuation, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart] -U+FF1A CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FF1A CS Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, caseignorable, changeswhencasefolded, sentenceterminal, unifiedideograph, xidcontinue, xidstart] find bidi CS type Sm U+2044 CS Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue] find bidi B @@ -263,36 +263,194 @@ U+00AD BN Control: Format, common, Control, [caseignorable, prependedconcatenat U+180E BN Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark] U+200B BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] U+2060 BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+2118 ON Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] -U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] -U+AAC0 L Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] -U+AAC2 L Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] -U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] -U+FE55 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+FEFF BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+FF1A CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+FF21..U+FF26 L Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+10D22..U+10D23 AL Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] - U+1135D L Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+210A L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+210E..U+210F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+2113 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+212F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+2134 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+213C..U+213D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+2146..U+2147 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+302E..U+302F L Mark: Spacing mark, hangul, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+3400..U+4DBF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+4E00..U+9FFF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA0E..U+FA0F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA11 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA13..U+FA14 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA1F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA21 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA23..U+FA24 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA27..U+FA29 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FDD0..U+FDEF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+FE52 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] + U+FEFF BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] + U+FF0E CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FF10..U+FF19 EN Number: Decimal number, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FFF9..U+FFFB ON Control: Format, common, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+FFFE..U+FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+13430..U+13438 L Control: Format, egyptianhieroglyphs, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] U+1BCA0..U+1BCA3 BN Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark] +U+1D16E..U+1D172 L Mark: Spacing mark, common, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] U+1D173..U+1D17A BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+1F1E6..U+1F1FF L Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D41A..U+1D421 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D424..U+1D433 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D44E..U+1D454 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D458..U+1D467 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D482..U+1D489 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D48C..U+1D49B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4B6..U+1D4B9 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D4BB L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D4BD L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4C0..U+1D4C3 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4C5..U+1D4CF L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4EA..U+1D4F1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4F4..U+1D503 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D51E..U+1D525 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D528..U+1D537 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D552..U+1D559 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D55C..U+1D56B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D586..U+1D58D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D590..U+1D59F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5BA..U+1D5C1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5C4..U+1D5D3 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5EE..U+1D5F5 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5F8..U+1D607 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D622..U+1D629 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D62C..U+1D63B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D656..U+1D65D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D660..U+1D66F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D68A..U+1D691 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D694..U+1D6A5 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6C2..U+1D6DA L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6DC..U+1D6E1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6FC..U+1D714 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D716..U+1D71B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D736..U+1D74E L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D750..U+1D755 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D770..U+1D788 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D78A..U+1D78F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D7AA..U+1D7C2 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D7C4..U+1D7C9 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D7CB L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1F170..U+1F171 L Symbol: Other symbol, common, Extended Pictographic, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1F17E..U+1F17F L Symbol: Other symbol, common, Extended Pictographic, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1FFFE..U+1FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+20000..U+2A6DF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2A700..U+2B738 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B740..U+2B81D L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B820..U+2CEA1 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2CEB0..U+2EBE0 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2FFFE..U+2FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+30000..U+3134A L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+3FFFE..U+3FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+4FFFE..U+4FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+5FFFE..U+5FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+6FFFE..U+6FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+7FFFE..U+7FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+8FFFE..U+8FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+9FFFE..U+9FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+AFFFE..U+AFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+BFFFE..U+BFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+CFFFE..U+CFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+DFFFE..U+DFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+EFFFE..U+EFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+FFFFE..U+FFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+10FFFE BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+10FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] find bool pcm U+00AD BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] U+180E BN Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark] U+200B BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] U+2060 BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+2118 ON Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] -U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] -U+AAC0 L Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] -U+AAC2 L Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] -U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] -U+FE55 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+FEFF BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+FF1A CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+FF21..U+FF26 L Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] -U+10D22..U+10D23 AL Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] - U+1135D L Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+210A L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+210E..U+210F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+2113 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+212F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+2134 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+213C..U+213D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+2146..U+2147 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+302E..U+302F L Mark: Spacing mark, hangul, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+3400..U+4DBF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+4E00..U+9FFF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA0E..U+FA0F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA11 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA13..U+FA14 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA1F L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] + U+FA21 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA23..U+FA24 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FA27..U+FA29 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+FDD0..U+FDEF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+FE52 CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] + U+FEFF BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] + U+FF0E CS Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FF10..U+FF19 EN Number: Decimal number, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark] +U+FFF9..U+FFFB ON Control: Format, common, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+FFFE..U+FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+13430..U+13438 L Control: Format, egyptianhieroglyphs, Control, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] U+1BCA0..U+1BCA3 BN Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark] +U+1D16E..U+1D172 L Mark: Spacing mark, common, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] U+1D173..U+1D17A BN Control: Format, common, Control, [caseignorable, prependedconcatenationmark] -U+1F1E6..U+1F1FF L Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D41A..U+1D421 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D424..U+1D433 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D44E..U+1D454 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D458..U+1D467 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D482..U+1D489 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D48C..U+1D49B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4B6..U+1D4B9 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D4BB L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D4BD L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4C0..U+1D4C3 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4C5..U+1D4CF L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4EA..U+1D4F1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D4F4..U+1D503 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D51E..U+1D525 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D528..U+1D537 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D552..U+1D559 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D55C..U+1D56B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D586..U+1D58D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D590..U+1D59F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5BA..U+1D5C1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5C4..U+1D5D3 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5EE..U+1D5F5 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D5F8..U+1D607 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D622..U+1D629 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D62C..U+1D63B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D656..U+1D65D L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D660..U+1D66F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D68A..U+1D691 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D694..U+1D6A5 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6C2..U+1D6DA L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6DC..U+1D6E1 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D6FC..U+1D714 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D716..U+1D71B L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D736..U+1D74E L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D750..U+1D755 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D770..U+1D788 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D78A..U+1D78F L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D7AA..U+1D7C2 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1D7C4..U+1D7C9 L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] + U+1D7CB L Letter: Lower case letter, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1F170..U+1F171 L Symbol: Other symbol, common, Extended Pictographic, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1F17E..U+1F17F L Symbol: Other symbol, common, Extended Pictographic, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart] +U+1FFFE..U+1FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+20000..U+2A6DF L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2A700..U+2B738 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B740..U+2B81D L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2B820..U+2CEA1 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2CEB0..U+2EBE0 L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+2FFFE..U+2FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+30000..U+3134A L Letter: Other letter, han, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark] +U+3FFFE..U+3FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+4FFFE..U+4FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+5FFFE..U+5FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+6FFFE..U+6FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+7FFFE..U+7FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+8FFFE..U+8FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+9FFFE..U+9FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+AFFFE..U+AFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+BFFFE..U+BFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+CFFFE..U+CFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+DFFFE..U+DFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+EFFFE..U+EFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] +U+FFFFE..U+FFFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+10FFFE BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] + U+10FFFF BN Control: Unassigned, unknown, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark] From fd102057adda89a676b6ace6c11d87c0cde29c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Mon, 30 Jan 2023 05:05:28 -0800 Subject: [PATCH 2/2] fix `findprop +` with UTF-8 characters and duplicated other case --- maint/ucptest.c | 110 ++++++---------------------------- maint/ucptestdata/testinput1 | 1 + maint/ucptestdata/testoutput1 | 4 ++ 3 files changed, 23 insertions(+), 92 deletions(-) diff --git a/maint/ucptest.c b/maint/ucptest.c index 34ff2c57f..93822494a 100644 --- a/maint/ucptest.c +++ b/maint/ucptest.c @@ -2,12 +2,13 @@ * A program for testing the Unicode property table * ***************************************************/ -/* Copyright (c) University of Cambridge 2008-2022 */ +/* Copyright (c) University of Cambridge 2008-2023 */ /* Compile thus: - gcc -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 -o ucptest \ - ucptest.c ../src/pcre2_ucd.c ../src/pcre2_tables.c + gcc -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 \ + -fvisibility=hidden -o ucptest ucptest.c \ + ../src/pcre2_ord2utf.c ../src/pcre2_ucd.c ../src/pcre2_tables.c Add -lreadline or -ledit if PCRE2 was configured with readline or libedit support in pcre2test. @@ -87,7 +88,7 @@ type, gbreak or bidi. The defined values for that property are listed. */ #endif #ifndef SUPPORT_UNICODE -#define SUPPORT_UNICODE +#error "Unicode support not enabled" #endif #include @@ -125,7 +126,6 @@ type, gbreak or bidi. The defined values for that property are listed. */ #define CSS (char **) #define US (unsigned char *) #define CUS (const unsigned char *) -#define USS (unsigned char **) /* -------------------------------------------------------------------*/ @@ -208,81 +208,6 @@ static const unsigned char *bd_names[] = { US"WS", US"White space" }; -static const unsigned int utf8_table1[] = { - 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff}; - -static const int utf8_table2[] = { - 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; - -/* Macro to pick up the remaining bytes of a UTF-8 character, advancing -the pointer. */ - -#define GETUTF8INC(c, eptr) \ - { \ - if ((c & 0x20u) == 0) \ - c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ - else if ((c & 0x10u) == 0) \ - { \ - c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ - eptr += 2; \ - } \ - else if ((c & 0x08u) == 0) \ - { \ - c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ - ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ - eptr += 3; \ - } \ - else if ((c & 0x04u) == 0) \ - { \ - c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ - ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ - (eptr[3] & 0x3fu); \ - eptr += 4; \ - } \ - else \ - { \ - c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ - ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ - ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ - eptr += 5; \ - } \ - } - - - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - -/* This function takes an unsigned long integer value in the range 0 - -0x7fffffff and encodes it as a UTF-8 character in 1 to 6 bytes. - -Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long - -Returns: number of bytes placed in the buffer - 0 if input code point is too big -*/ - -static size_t -ord2utf8(unsigned int cvalue, unsigned char *buffer) -{ -size_t i, j; -for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (cvalue <= utf8_table1[i]) break; -if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; -buffer += i; -for (j = i; j > 0; j--) - { - *buffer-- = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } -*buffer = utf8_table2[i] | cvalue; -return i + 1; -} - - /************************************************* * Test for interaction * @@ -357,7 +282,7 @@ return yield; static void print_prop(unsigned int c, BOOL is_just_one) { -int type = UCD_CATEGORY(c); +unsigned int type = UCD_CATEGORY(c); int fulltype = UCD_CHARTYPE(c); int script = UCD_SCRIPT(c); int scriptx = UCD_SCRIPTX(c); @@ -473,7 +398,7 @@ printf("U+%04X %s %s: %s, %s, %s", c, bidiclass, typename, fulltypename, if (is_just_one && (othercase != c || caseset != 0)) { - printf(", U+%04X", othercase); + if (othercase != c) printf(", U+%04X", othercase); if (caseset != 0) { const uint32_t *p = PRIV(ucd_caseless_sets) + caseset - 1; @@ -517,8 +442,8 @@ if (bprops != 0) if (show_character && is_just_one) { unsigned char buffer[8]; - size_t len = ord2utf8(c, buffer); - printf(", >%.*s<", (int)len, buffer); + int len = (int)PRIV(ord2utf_8)(c, buffer); + printf(", >%.*s<", len, buffer); } printf("\n"); @@ -557,7 +482,6 @@ const char *pad = " "; while (*s != 0) { unsigned int offset = 0; - BOOL scriptx_not = FALSE; for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s; *t = 0; @@ -573,6 +497,7 @@ while (*s != 0) if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scriptx") == 0) { + BOOL scriptx_not = FALSE; for (t = value; *t != 0; t++) *t = tolower(*t); if (value[0] == '!') @@ -656,7 +581,7 @@ while (*s != 0) for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2) { - if (strcmp(CS (value + offset), CS type_names[i]) == 0) + if (strcmp(CS (value + offset), CCS type_names[i]) == 0) { type = i/2; break; @@ -687,7 +612,7 @@ while (*s != 0) for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2) { - if (strcmp(CS (value + offset), CS gb_names[i]) == 0) + if (strcmp(CS (value + offset), CCS gb_names[i]) == 0) { gbreak = i/2; break; @@ -719,7 +644,7 @@ while (*s != 0) } for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2) { - if (strcasecmp(CS (value + offset), CS bd_names[i]) == 0) + if (strcasecmp(CS (value + offset), CCS bd_names[i]) == 0) { bidiclass = i/2; break; @@ -903,13 +828,14 @@ if (strcmp(CS name, "findprop") == 0) if (c > 0x7fu) { GETCHARINC(c, t); + endptr = t; } - endptr = t+1; + else endptr = t+1; } else { - if (strncmp(CS t, "U+", 2) == 0) t += 2; - c = strtoul(CS t, CSS(&endptr), 16); + if (memcmp(t, "U+", 2) == 0) t += 2; + c = (uint32_t)strtoul(CS t, CSS(&endptr), 16); } if (*endptr != 0 && !isspace(*endptr)) @@ -1018,7 +944,7 @@ if (argc > first_arg) char *arg = argv[first_arg]; unsigned char *s = buffer; - if (*arg != '+' && strncmp(arg, "U+", 2) != 0 && !isdigit(*arg)) + if (*arg != '+' && memcmp(arg, "U+", 2) != 0 && !isdigit(*arg)) { while (*arg != 0) { diff --git a/maint/ucptestdata/testinput1 b/maint/ucptestdata/testinput1 index f7aaa9a93..6e3ee30ec 100644 --- a/maint/ucptestdata/testinput1 +++ b/maint/ucptestdata/testinput1 @@ -46,5 +46,6 @@ findprop 32ff findprop 1f16d findprop U+10e93 U+10eaa +findprop +á +é U+212A findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067 diff --git a/maint/ucptestdata/testoutput1 b/maint/ucptestdata/testoutput1 index c4d461855..7096190be 100644 --- a/maint/ucptestdata/testoutput1 +++ b/maint/ucptestdata/testoutput1 @@ -396,6 +396,10 @@ U+1F16D ON Symbol: Other symbol, common, Extended Pictographic, [changeswhenupp findprop U+10e93 U+10eaa U+10E93 R Letter: Other letter, yezidi, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue] U+10EAA R Control: Unassigned, unknown, Other +findprop +á +é U+212A +U+00E1 L Letter: Lower case letter, latin, Other, U+00C1, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue] +U+00E9 L Letter: Lower case letter, latin, Other, U+00C9, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue] +U+212A L Letter: Upper case letter, latin, Other, U+004B, U+006B, [alphabetic, graphemeextend, idcontinue, xidcontinue] findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067 U+0602 AN Control: Format, arabic, Prepend, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, lowercase]