Skip to content

Update to Unicode 14.0.0 #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/pcre2pattern.3
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,7 @@ Common,
Coptic,
Cuneiform,
Cypriot,
Cypro_Minoan,
Cyrillic,
Deseret,
Devanagari,
Expand Down Expand Up @@ -915,6 +916,7 @@ Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Osage,
Osmanya,
Expand Down Expand Up @@ -946,16 +948,19 @@ Tai_Tham,
Tai_Viet,
Takri,
Tamil,
Tangsa,
Tangut,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Toto,
Ugaritic,
Unknown,
Vai,
Vithkuqi,
Wancho,
Warang_Citi,
Yezidi,
Expand Down
5 changes: 5 additions & 0 deletions doc/pcre2syntax.3
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ Common,
Coptic,
Cuneiform,
Cypriot,
Cypro_Minoan,
Cyrillic,
Deseret,
Devanagari,
Expand Down Expand Up @@ -281,6 +282,7 @@ Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Osage,
Osmanya,
Expand Down Expand Up @@ -312,15 +314,18 @@ Tai_Tham,
Tai_Viet,
Takri,
Tamil,
Tangsa,
Tangut,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Toto,
Ugaritic,
Vai,
Vithkuqi,
Wancho,
Warang_Citi,
Yezidi,
Expand Down
5 changes: 4 additions & 1 deletion maint/GenerateUtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# Added 'Unknown' script, 01-October-2018.
# Added script names for Unicode 12.1.0, 27-July-2019.
# Added script names for Unicode 13.0.0, 10-March-2020.
# Added Script names for Unicode 14.0.0, PCRE2-10.39

script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
Expand Down Expand Up @@ -66,7 +67,9 @@
# New for Unicode 12.0.0
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
# New for Unicode 13.0.0
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi',
# New for Unicode 14.0.0
'Cypro_Minoan', 'Old_Uyghur', 'Tangsa', 'Toto', 'Vithkuqi'
]

category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
Expand Down
11 changes: 8 additions & 3 deletions maint/MultiStage2.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
# 03-October-2018: Added new field for Script Extensions
# 27-July-2019: Updated for Unicode 12.1.0
# 10-March-2020: Updated for Unicode 13.0.0
# PCRE2-10.39: Updated for Unicode 14.0.0
# ----------------------------------------------------------------------------
#
#
Expand Down Expand Up @@ -432,7 +433,9 @@ def print_records(records, record_size):
# New for Unicode 12.0.0
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
# New for Unicode 13.0.0
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi',
# New for Unicode 14.0.0
'Cypro_Minoan', 'Old_Uyghur', 'Tangsa', 'Toto', 'Vithkuqi'
]

script_abbrevs = [
Expand Down Expand Up @@ -469,8 +472,10 @@ def print_records(records, record_size):
#New for Unicode 12.0.0
'Elym', 'Nand', 'Hmnp', 'Wcho',
#New for Unicode 13.0.0
'Chrs', 'Diak', 'Kits', 'Yezi'
]
'Chrs', 'Diak', 'Kits', 'Yezi',
#New for Unicode 14.0.0
'Cpmn', 'Ougr', 'Tngs', 'Toto', 'Vith'
]

category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
Expand Down
46 changes: 43 additions & 3 deletions maint/Unicode.tables/CaseFolding.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CaseFolding-13.0.0.txt
# Date: 2019-09-08, 23:30:59 GMT
# © 2019 Unicode®, Inc.
# CaseFolding-14.0.0.txt
# Date: 2021-03-08, 19:35:41 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
Expand Down Expand Up @@ -1050,6 +1050,7 @@
2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
Expand Down Expand Up @@ -1230,12 +1231,16 @@ A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
AB70; C; 13A0; # CHEROKEE SMALL LETTER A
AB71; C; 13A1; # CHEROKEE SMALL LETTER E
Expand Down Expand Up @@ -1431,6 +1436,41 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
10570; C; 10597; # VITHKUQI CAPITAL LETTER A
10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
Expand Down
Loading