diff --git a/src/main/java/com/networknt/schema/JsonMetaSchema.java b/src/main/java/com/networknt/schema/JsonMetaSchema.java index 8c0444029..b5ed0c738 100644 --- a/src/main/java/com/networknt/schema/JsonMetaSchema.java +++ b/src/main/java/com/networknt/schema/JsonMetaSchema.java @@ -19,6 +19,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.networknt.schema.format.DateFormat; import com.networknt.schema.format.EmailFormat; +import com.networknt.schema.format.IdnEmailFormat; +import com.networknt.schema.format.IdnHostnameFormat; import com.networknt.schema.format.IriFormat; import com.networknt.schema.format.IriReferenceFormat; import com.networknt.schema.format.PatternFormat; @@ -59,6 +61,8 @@ static PatternFormat pattern(String name, String regex) { COMMON_BUILTIN_FORMATS.add(pattern("uuid", "^\\p{XDigit}{8}-\\p{XDigit}{4}-\\p{XDigit}{4}-\\p{XDigit}{4}-\\p{XDigit}{12}$", "must be a valid RFC 4122 UUID")); COMMON_BUILTIN_FORMATS.add(new DateFormat()); COMMON_BUILTIN_FORMATS.add(new EmailFormat()); + COMMON_BUILTIN_FORMATS.add(new IdnEmailFormat()); + COMMON_BUILTIN_FORMATS.add(new IdnHostnameFormat()); COMMON_BUILTIN_FORMATS.add(new IriFormat()); COMMON_BUILTIN_FORMATS.add(new IriReferenceFormat()); COMMON_BUILTIN_FORMATS.add(new RegexFormat()); diff --git a/src/main/java/com/networknt/schema/format/EmailFormat.java b/src/main/java/com/networknt/schema/format/EmailFormat.java index 009b3d115..d58ee368b 100644 --- a/src/main/java/com/networknt/schema/format/EmailFormat.java +++ b/src/main/java/com/networknt/schema/format/EmailFormat.java @@ -24,25 +24,11 @@ public class EmailFormat extends AbstractFormat { public EmailFormat() { super("email", "must be a valid RFC 5321 Mailbox"); - this.emailValidator = new SpecialEmailValidator(true, true); + this.emailValidator = new IPv6AwareEmailValidator(true, true); } @Override public boolean matches(String value) { return this.emailValidator.isValid(value); } - - static class SpecialEmailValidator extends EmailValidator { - private static final long serialVersionUID = 1L; - - public SpecialEmailValidator(boolean b, boolean c) { - super(b, c); - } - - @Override - protected boolean isValidDomain(String domain) { - return super.isValidDomain(domain.startsWith("[IPv6:") ? domain.replace("IPv6:", "") : domain); - } - - } } diff --git a/src/main/java/com/networknt/schema/format/IPv6AwareEmailValidator.java b/src/main/java/com/networknt/schema/format/IPv6AwareEmailValidator.java new file mode 100644 index 000000000..c9e7fc401 --- /dev/null +++ b/src/main/java/com/networknt/schema/format/IPv6AwareEmailValidator.java @@ -0,0 +1,33 @@ +package com.networknt.schema.format; + +import com.networknt.org.apache.commons.validator.routines.DomainValidator; +import com.networknt.org.apache.commons.validator.routines.EmailValidator; + +/** + * This is an extension of the Apache Commons Validator that correctly + * handles email addresses containing an IPv6 literal as the domain. + *

+ * Apache's {@link EmailValidator} delegates validation of the domain to + * its {@link DomainValidator}, which is not aware that it is validating + * an email address, which has a peculiar way of representing an IPv6 + * literal. + */ +class IPv6AwareEmailValidator extends EmailValidator { + private static final long serialVersionUID = 1L; + + /** + * Creates a new IPv6AwareEmailValidator. + * + * @param allowLocal Should local addresses be considered valid? + * @param allowTld Should TLDs be allowed? + */ + public IPv6AwareEmailValidator(final boolean allowLocal, final boolean allowTld) { + super(allowLocal, allowTld); + } + + @Override + protected boolean isValidDomain(String domain) { + return super.isValidDomain(domain.startsWith("[IPv6:") ? domain.replace("IPv6:", "") : domain); + } + +} \ No newline at end of file diff --git a/src/main/java/com/networknt/schema/format/IdnEmailFormat.java b/src/main/java/com/networknt/schema/format/IdnEmailFormat.java new file mode 100644 index 000000000..36daf7927 --- /dev/null +++ b/src/main/java/com/networknt/schema/format/IdnEmailFormat.java @@ -0,0 +1,19 @@ +package com.networknt.schema.format; + +import com.networknt.org.apache.commons.validator.routines.EmailValidator; + +public class IdnEmailFormat extends AbstractFormat { + + private final EmailValidator emailValidator; + + public IdnEmailFormat() { + super("idn-email", "must be a valid RFC 6531 Mailbox"); + this.emailValidator = new IPv6AwareEmailValidator(true, true); + } + + @Override + public boolean matches(String value) { + return this.emailValidator.isValid(value); + } + +} diff --git a/src/main/java/com/networknt/schema/format/IdnHostnameFormat.java b/src/main/java/com/networknt/schema/format/IdnHostnameFormat.java new file mode 100644 index 000000000..723d9e46c --- /dev/null +++ b/src/main/java/com/networknt/schema/format/IdnHostnameFormat.java @@ -0,0 +1,16 @@ +package com.networknt.schema.format; + +import com.networknt.schema.utils.RFC5892; + +public class IdnHostnameFormat extends AbstractFormat { + + public IdnHostnameFormat() { + super("idn-hostname", "must be a valid RFC 5890 internationalized hostname"); + } + + @Override + public boolean matches(String value) { + if (null == value || value.isEmpty()) return true; + return RFC5892.isValid(value); + } +} diff --git a/src/main/java/com/networknt/schema/utils/RFC5892.java b/src/main/java/com/networknt/schema/utils/RFC5892.java new file mode 100644 index 000000000..bd71193b3 --- /dev/null +++ b/src/main/java/com/networknt/schema/utils/RFC5892.java @@ -0,0 +1,396 @@ +package com.networknt.schema.utils; + +import java.net.IDN; +import java.text.Normalizer; +import java.text.ParseException; +import java.util.BitSet; +import java.util.function.BiPredicate; + +import static com.networknt.schema.utils.UnicodeDatabase.*; +import static java.lang.Character.*; + +/** + * Encapsulates the rules determining whether a label conforms to the RFC 5892 specification. + *

+ * In the context of RFC 5892. a label is a subcomponent of a DNS entry. For example, + * schema.networknt.com has three sub-components or labels: com, networknt and schema. + *

+ * Each component (or label) must satisfy the constraints identified in RFC 5892. + */ +public class RFC5892 { + + private static final String ACE_PREFIX = "xn--"; + private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length(); + + private static final int GREEK_LOWER_NUMERAL_SIGN = 0x0375; + private static final int HEBREW_GERESH = 0x05F3; + private static final int HEBREW_GERSHAYIM = 0x05F4; + private static final int KATAKANA_MIDDLE_DOT = 0x30FB; + private static final int MIDDLE_DOT = 0x00B7; + private static final int VIRAMA = 0x94D; + private static final int ZERO_WIDTH_JOINER = 0x200D; + private static final int ZERO_WIDTH_NON_JOINER = 0x200C; + + private static final BitSet CONTEXTJ = new BitSet(0x110000); + private static final BitSet CONTEXTO = new BitSet(0x110000); + private static final BitSet DISALLOWED = new BitSet(0x110000); + private static final BitSet UNASSIGNED = new BitSet(0x110000); + + private static BiPredicate RULE_ARABIC_INDIC_DIGITS_RULE = RFC5892::testArabicIndicDigit; + private static BiPredicate RULE_EXTENDED_ARABIC_INDIC_DIGITS_RULE = RFC5892::testExtendedArabicIndicDigit; + private static BiPredicate RULE_GREEK_LOWER_NUMERAL_SIGN = RFC5892::testGreekLowerNumeralSign; + private static BiPredicate RULE_HEBREW_GERESH_GERSHAYIM = RFC5892::testHebrewPuncuation; + private static BiPredicate RULE_KATAKANA_MIDDLE_DOT = RFC5892::testKatakanaMiddleDot; + private static BiPredicate RULE_MIDDLE_DOT = RFC5892::testeMiddleDotRule; + private static BiPredicate RULE_ZERO_WIDTH_JOINER = RFC5892::testZeroWidthJoiner; + private static BiPredicate RULE_ZERO_WIDTH_NON_JOINER = RFC5892::testZeroWidthNonJoiner; + + private static BiPredicate ALLOWED_CHARACTER = RFC5892::testAllowedCharacter; + + private static BiPredicate LTR = RFC5892::testLTR; + private static BiPredicate RTL = RFC5892::testRTL; + + private static BiPredicate IDNA_RULES = + ALLOWED_CHARACTER + .and(RULE_ARABIC_INDIC_DIGITS_RULE) + .and(RULE_EXTENDED_ARABIC_INDIC_DIGITS_RULE) + .and(RULE_GREEK_LOWER_NUMERAL_SIGN) + .and(RULE_HEBREW_GERESH_GERSHAYIM) + .and(RULE_KATAKANA_MIDDLE_DOT) + .and(RULE_MIDDLE_DOT) + .and(RULE_ZERO_WIDTH_JOINER) + .and(RULE_ZERO_WIDTH_NON_JOINER) + ; + + private static boolean isContextJ(int codepoint) { + if (CONTEXTJ.isEmpty()) loadDerivedProperties(); + return CONTEXTJ.get(codepoint); + } + + private static boolean isContextO(int codepoint) { + if (CONTEXTO.isEmpty()) loadDerivedProperties(); + return CONTEXTO.get(codepoint); + } + + private static boolean isDisallowed(int codepoint) { + if (DISALLOWED.isEmpty()) loadDerivedProperties(); + return DISALLOWED.get(codepoint); + } + + private static boolean isUnassigned(int codepoint) { + if (UNASSIGNED.isEmpty()) loadDerivedProperties(); + return UNASSIGNED.get(codepoint); + } + + private static boolean testAllowedCharacter(String s, int i) { + int c = s.codePointAt(i); + return !isDisallowed(c) && !isUnassigned(c) // RFC 5891 4.2.2. Rejection of Characters That Are Not Permitted + && !isContextJ(c) && !isContextO(c); // RFC 5891 4.2.3.3. Contextual Rules + } + + public static boolean isValid(String value) { + // RFC 5892 calls each segment in a host name a label. They are separated by '.'. + String[] labels = value.split("\\."); + for (String label : labels) { + if (label.isEmpty()) continue; // A DNS entry may contain a trailing '.'. + + String unicode = label; + if (isACE(label)) { + // IDN returns the original value when it encounters an issue converting to Unicode + unicode = IDN.toUnicode(label, IDN.USE_STD3_ASCII_RULES); + if (unicode.equalsIgnoreCase(label)) return false; + } + + int len = unicode.length(); + BiPredicate rules; + + // RFC 5891 5.4. Validation and Character List Testing + if (!Normalizer.isNormalized(unicode, Normalizer.Form.NFC)) return false; + + // RFC 5891 4.2.3.1. Hyphen Restrictions + if ('-' == unicode.charAt(0) || '-' == unicode.codePointBefore(len)) return false; + if (4 <= len && '-' == unicode.codePointAt(2) && '-' == unicode.codePointAt(3)) return false; + + // RFC 5891 4.2.3.2. Leading Combining Marks + if (isCombiningMark(unicode.codePointAt(0))) return false; + + // RFC 5893 2. The Bidi Rule + switch (getDirectionality(unicode.codePointAt(0))) { + case DIRECTIONALITY_LEFT_TO_RIGHT: + rules = IDNA_RULES.and(LTR); + break; + case DIRECTIONALITY_RIGHT_TO_LEFT: + case DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC: + rules = IDNA_RULES.and(RTL); + break; + default: return false; + } + + for (int i = 0; i < len; ++i) { + if (!rules.test(unicode, i)) return false; + } + + try { + String ace = IDN.toASCII(unicode, IDN.USE_STD3_ASCII_RULES); + if (63 < ace.length()) return false; // RFC 5891 4.2.4. Registration Validation Requirements + } catch (IllegalArgumentException e) { + Throwable t = e.getCause(); + if (t instanceof ParseException) { + String m = t.getMessage(); + // Ignore this. Java does not have the latest spec. + return m.startsWith("The input does not conform to the rules for BiDi code points"); + } + return false; + } + } + + return true; + } + + private static boolean isACE(String value) { + return ACE_PREFIX_LENGTH <= value.length() && + ACE_PREFIX.equalsIgnoreCase(value.substring(0, ACE_PREFIX_LENGTH)); + } + + private static boolean isCombiningMark(int codepoint) { + switch (getType(codepoint)) { + case NON_SPACING_MARK: + case ENCLOSING_MARK: + case COMBINING_SPACING_MARK: + return true; + default: + return false; + } + } + + /* RFC 5893 1.4 Terminology + * L - Left to right - most letters in LTR scripts + * R - Right to left - most letters in non-Arabic RTL scripts + * AL - Arabic letters - most letters in the Arabic script + * EN - European Number (0-9, and Extended Arabic-Indic numbers) + * ES - European Number Separator (+ and -) + * ET - European Number Terminator (currency symbols, the hash sign, the percent sign and so on) + * AN - Arabic Number; this encompasses the Arabic-Indic numbers, but not the Extended Arabic-Indic numbers + * CS - Common Number Separator (. , / : et al) + * NSM - Nonspacing Mark - most combining accents + * BN - Boundary Neutral - control characters (ZWNJ, ZWJ, and others) + * B - Paragraph Separator + * S - Segment Separator + * WS - Whitespace, including the SPACE character + * ON - Other Neutrals, including @, &, parentheses, MIDDLE DOT + * LRE, LRO, RLE, RLO, PDF - these are "directional control characters" and are not used in IDNA labels. + */ + + // RFC 5891 4.2.3.4. Labels Containing Characters Written Right to Left + private static boolean testLTR(String s, int i) { + int c = s.codePointAt(i); + switch (getDirectionality(c)) { + case DIRECTIONALITY_LEFT_TO_RIGHT: + case DIRECTIONALITY_EUROPEAN_NUMBER: + case DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR: + case DIRECTIONALITY_COMMON_NUMBER_SEPARATOR: + case DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR: + case DIRECTIONALITY_OTHER_NEUTRALS: + case DIRECTIONALITY_BOUNDARY_NEUTRAL: + case DIRECTIONALITY_NONSPACING_MARK: + return true; + default: return false; + } + } + + // RFC 5891 4.2.3.4. Labels Containing Characters Written Right to Left + private static boolean testRTL(String s, int i) { + int c = s.codePointAt(i); + switch (getDirectionality(c)) { + case DIRECTIONALITY_RIGHT_TO_LEFT: + case DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC: + case DIRECTIONALITY_ARABIC_NUMBER: + case DIRECTIONALITY_EUROPEAN_NUMBER: + case DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR: + case DIRECTIONALITY_COMMON_NUMBER_SEPARATOR: + case DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR: + case DIRECTIONALITY_OTHER_NEUTRALS: + case DIRECTIONALITY_BOUNDARY_NEUTRAL: + case DIRECTIONALITY_NONSPACING_MARK: + return true; + default: return false; + } + } + + /** + * Determines whether the GREEK LOWER NUMERAL SIGN (KERAIA) conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the KERAIA within the source label + * @return {@code true} if the KERAIA rule is valid at the given location + * or the character at the given position is not the KERAIA character. + */ + private static boolean testGreekLowerNumeralSign(String s, int i) { + int c = s.codePointAt(i); + if (GREEK_LOWER_NUMERAL_SIGN == c) { + // There must be a Greek character after this symbol + if (s.length() == 1 + i) return false; + int following = s.codePointAt(i + 1); + if (!isGreek(following)) return false; + } + return true; + } + + /** + * Determines whether the HEBREW PUNCTUATION (GERESH or GERSHAYIM) conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the character within the source label + * @return {@code true} if the rule is valid at the given location + * or the character at the given position is not a GERESH or GERSHAYIM character. + */ + private static boolean testHebrewPuncuation(String s, int i) { + int c = s.codePointAt(i); + if (HEBREW_GERESH == c || HEBREW_GERSHAYIM == c) { + // There must be a Hebrew character before this symbol + if (0 == i) return false; + int preceding = s.codePointAt(i - 1); + if (!isHebrew(preceding)) return false; + } + return true; + } + + /** + * Determines whether the KATAKANA MIDDLE DOT conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the character within the source label + * @return {@code true} if the rule is valid at the given location + * or the character at the given position is not a KATAKANA MIDDLE DOT character. + */ + private static boolean testKatakanaMiddleDot(String s, int i) { + int c = s.codePointAt(i); + if (KATAKANA_MIDDLE_DOT == c) { + // There must be a Katakana, Hiragana or Han character after this symbol + if (s.length() == 1 + i) return false; + int following = s.codePointAt(i + 1); + if (!isKatakana(following)) return false; + } + return true; + } + + /** + * Determines whether the MIDDLE DOT conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the MIDDLE DOT within the source label + * @return {@code true} if the MIDDLE DOT rule is valid at the given location + * or the character at the given position is not the MIDDLE DOT character. + */ + private static boolean testeMiddleDotRule(String s, int i) { + int c = s.codePointAt(i); + if (MIDDLE_DOT == c) { + // There must be a 'l' character before and after this symbol + if (0 == i) return false; + if (s.length() == 1 + i) return false; + int preceding = s.codePointAt(i - 1); + int following = s.codePointAt(i + 1); + if ('l' != preceding || 'l' != following) return false; + } + return true; + } + + /** + * Determines whether the ZERO WIDTH JOINER conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the character within the source label + * @return {@code true} if the rule is valid at the given location + * or the character at the given position is not a ZERO WIDTH JOINER character. + */ + private static boolean testZeroWidthJoiner(String s, int i) { + int c = s.codePointAt(i); + if (ZERO_WIDTH_JOINER == c) { + // There must be a virama character before this symbol. + if (0 == i) return false; + int preceding = s.codePointAt(i - 1); + if (VIRAMA != preceding) return false; + } + return true; + } + + /** + * Determines whether the ZERO WIDTH NON-OINER conforms to the RFC 5892 specification. + * + * @param s Must be a simple Unicode string; i.e., not ACE encoded + * @param i the location of the character within the source label + * @return {@code true} if the rule is valid at the given location + * or the character at the given position is not a ZERO WIDTH NON-JOINER character. + */ + private static boolean testZeroWidthNonJoiner(String s, int i) { + int c = s.codePointAt(i); + if (ZERO_WIDTH_NON_JOINER == c) { + // There must be a virama character before this symbol or + // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then True; + + if (0 == i) return false; + int preceding = s.codePointBefore(i); + if (VIRAMA == preceding) return true; + + int j = i; + while (0 < j && isJoinTypeTransparent(s.codePointBefore(j))) --j; + if (0 == j) return false; + + preceding = s.codePointBefore(j); + if (!isJoinTypeLeft(preceding) && !isJoinTypeDual(preceding)) return false; + + j = i + 1; + int len = s.length(); + if (len == j) return false; + + while (j < len && isJoinTypeTransparent(s.codePointAt(j))) ++j; + if (len == j) return false; + + int following = s.codePointAt(j); + if (!isJoinTypeRight(following) && !isJoinTypeDual(following)) return false; + } + return true; + } + + private static boolean testArabicIndicDigit(String s, int i) { + int c = s.codePointAt(i); + if (isArabicIndicDigit(c)) { + return !s.codePoints().anyMatch(UnicodeDatabase::isExtendedArabicIndicDigit); + } + return true; + } + + private static boolean testExtendedArabicIndicDigit(String s, int i) { + int c = s.codePointAt(i); + if (isExtendedArabicIndicDigit(c)) { + return !s.codePoints().anyMatch(UnicodeDatabase::isArabicIndicDigit); + } + return true; + } + + private static synchronized void loadDerivedProperties() { + if (DISALLOWED.isEmpty()) { + UCDLoader.loadMapping("/ucd/RFC5892-appendix-B.txt", v -> { + switch (v) { + case "CONTEXTJ": return CONTEXTJ; + case "CONTEXTO": return CONTEXTO; + case "DISALLOWED": return DISALLOWED; + case "UNASSIGNED": return UNASSIGNED; + default: return null; + } + }); + + // We have IDNA rules for these. + CONTEXTJ.clear(ZERO_WIDTH_JOINER); + CONTEXTJ.clear(ZERO_WIDTH_NON_JOINER); + CONTEXTO.clear(0x660, 0x066A); // ARABIC-INDIC DIGITS + CONTEXTO.clear(0x6F0, 0x06FA); // EXTENDED ARABIC-INDIC DIGITS + CONTEXTO.clear(GREEK_LOWER_NUMERAL_SIGN); + CONTEXTO.clear(HEBREW_GERESH); + CONTEXTO.clear(HEBREW_GERSHAYIM); + CONTEXTO.clear(KATAKANA_MIDDLE_DOT); + CONTEXTO.clear(MIDDLE_DOT); + } + } + +} diff --git a/src/main/java/com/networknt/schema/utils/UCDLoader.java b/src/main/java/com/networknt/schema/utils/UCDLoader.java new file mode 100644 index 000000000..46b577e09 --- /dev/null +++ b/src/main/java/com/networknt/schema/utils/UCDLoader.java @@ -0,0 +1,43 @@ +package com.networknt.schema.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.BitSet; +import java.util.function.Function; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.networknt.schema.format.IdnHostnameFormat; + +public class UCDLoader { + private static final Logger logger = LoggerFactory.getLogger(UCDLoader.class); + + static void loadMapping(String filename, Function selector) { + try ( + InputStream is = IdnHostnameFormat.class.getResourceAsStream(filename); + LineNumberReader rd = new LineNumberReader(new InputStreamReader(is)); + ) { + rd.lines().forEach(line -> { + if (!line.isEmpty() && '#' != line.charAt(0)) { + String[] s = line.split("\\s*[;#]\\s*", 3); + + BitSet bs = selector.apply(s[1]); + if (null != bs) { + String[] n = s[0].split("\\.\\."); + switch (n.length) { + case 2: bs.set(Integer.parseUnsignedInt(n[0], 16), 1 + Integer.parseUnsignedInt(n[1], 16)); break; + case 1: bs.set(Integer.parseUnsignedInt(n[0], 16)); break; + default: throw new IllegalStateException("Unable to parse integer range on line " + rd.getLineNumber()); + } + } + } + }); + } catch (IllegalStateException | IOException e) { + logger.error("unable to load Unicode data from file '{}': {}", filename, e.getMessage()); + } + } + +} diff --git a/src/main/java/com/networknt/schema/utils/UnicodeDatabase.java b/src/main/java/com/networknt/schema/utils/UnicodeDatabase.java new file mode 100644 index 000000000..70812ccfb --- /dev/null +++ b/src/main/java/com/networknt/schema/utils/UnicodeDatabase.java @@ -0,0 +1,104 @@ +package com.networknt.schema.utils; + +import java.util.BitSet; + +public class UnicodeDatabase { + private static final BitSet ARABIC_INDIC_DIGITS = new BitSet(0x11000); + private static final BitSet EXTENDED_ARABIC_INDIC_DIGITS = new BitSet(0x11000); + private static final BitSet GREEK_CHARACTERS = new BitSet(0x2000); + private static final BitSet HEBREW_CHARACTERS = new BitSet(0x0600); + private static final BitSet KATAKANA_CHARACTERS = new BitSet(0x33000); + + private static final BitSet JOIN_TYPE_CAUSING = new BitSet(0x110000); + private static final BitSet JOIN_TYPE_DUAL = new BitSet(0x110000); + private static final BitSet JOIN_TYPE_LEFT = new BitSet(0x110000); + private static final BitSet JOIN_TYPE_RIGHT = new BitSet(0x110000); + private static final BitSet JOIN_TYPE_TRANSPARENT = new BitSet(0x110000); + + static { + // TODO: Should we initialize this lazily? + ARABIC_INDIC_DIGITS.set(0x0660, 0x066A); + EXTENDED_ARABIC_INDIC_DIGITS.set(0x06F0, 0x6FA); + GREEK_CHARACTERS.set(0x0370, 0x0400); + GREEK_CHARACTERS.set(0x1F00, 0x2000); + HEBREW_CHARACTERS.set(0x0590, 0x0600); + KATAKANA_CHARACTERS.set(0x2E80, 0x2F00); // The CJK Radicals Supplement code block + KATAKANA_CHARACTERS.set(0x2F00, 0x2FE0); // The Kangxi Radicals code block + KATAKANA_CHARACTERS.set(0x3000, 0x3040); // The CJK Symbols and Punctuation code block + KATAKANA_CHARACTERS.set(0x3040, 0x30A0); // The Hiragana code block. + KATAKANA_CHARACTERS.set(0x30A0, 0x3100); // The Katakana code block. + KATAKANA_CHARACTERS.set(0x3400, 0x4DC0); // The CJK Unified Ideographs Extension A code block + KATAKANA_CHARACTERS.set(0x4E00, 0xA000); // The CJK Unified Ideographs code block + KATAKANA_CHARACTERS.set(0xF900, 0xFB00); // The CJK Compatibility Ideographs code block + KATAKANA_CHARACTERS.set(0x16FE0, 0x17000); // The Ideographic Symbols and Punctuation code block + KATAKANA_CHARACTERS.set(0x20000, 0x2A6E0); // The CJK Unified Ideographs Extension B code block + KATAKANA_CHARACTERS.set(0x2A700, 0x2B740); // The CJK Unified Ideographs Extension C code block + KATAKANA_CHARACTERS.set(0x2B740, 0x2B820); // The CJK Unified Ideographs Extension D code block + KATAKANA_CHARACTERS.set(0x2B820, 0x2CEB0); // The CJK Unified Ideographs Extension E code block + KATAKANA_CHARACTERS.set(0x2CEB0, 0x2EBF0); // The CJK Unified Ideographs Extension F code block + KATAKANA_CHARACTERS.set(0x2F800, 0x2FA20); // The CJK Compatibility Ideographs Supplement code block + KATAKANA_CHARACTERS.set(0x30000, 0x31350); // The CJK Unified Ideographs Extension G code block + KATAKANA_CHARACTERS.set(0x31350, 0x323B0); // The CJK Unified Ideographs Extension H code block + } + + public static boolean isArabicIndicDigit(int codepoint) { + return ARABIC_INDIC_DIGITS.get(codepoint); + } + + public static boolean isExtendedArabicIndicDigit(int codepoint) { + return EXTENDED_ARABIC_INDIC_DIGITS.get(codepoint); + } + + public static boolean isGreek(int codepoint) { + return GREEK_CHARACTERS.get(codepoint); + } + + public static boolean isHebrew(int codepoint) { + return HEBREW_CHARACTERS.get(codepoint); + } + + public static boolean isKatakana(int codepoint) { + return KATAKANA_CHARACTERS.get(codepoint); + } + + public static boolean isJoinTypeCausing(int codepoint) { + if (JOIN_TYPE_CAUSING.isEmpty()) loadJoiningTypes(); + return JOIN_TYPE_CAUSING.get(codepoint); + } + + public static boolean isJoinTypeDual(int codepoint) { + if (JOIN_TYPE_DUAL.isEmpty()) loadJoiningTypes(); + return JOIN_TYPE_DUAL.get(codepoint); + } + + public static boolean isJoinTypeLeft(int codepoint) { + if (JOIN_TYPE_LEFT.isEmpty()) loadJoiningTypes(); + return JOIN_TYPE_LEFT.get(codepoint); + } + + public static boolean isJoinTypeRight(int codepoint) { + if (JOIN_TYPE_RIGHT.isEmpty()) loadJoiningTypes(); + return JOIN_TYPE_RIGHT.get(codepoint); + } + + public static boolean isJoinTypeTransparent(int codepoint) { + if (JOIN_TYPE_TRANSPARENT.isEmpty()) loadJoiningTypes(); + return JOIN_TYPE_TRANSPARENT.get(codepoint); + } + + private static synchronized void loadJoiningTypes() { + if (JOIN_TYPE_DUAL.isEmpty()) { + UCDLoader.loadMapping("/ucd/extracted/DerivedJoiningType.txt", v -> { + switch (v) { + case "C": return JOIN_TYPE_CAUSING; + case "D": return JOIN_TYPE_DUAL; + case "L": return JOIN_TYPE_LEFT; + case "R": return JOIN_TYPE_RIGHT; + case "T": return JOIN_TYPE_TRANSPARENT; + default: return null; + } + }); + } + } + +} diff --git a/src/main/resources/ucd/RFC5892-appendix-B.txt b/src/main/resources/ucd/RFC5892-appendix-B.txt new file mode 100644 index 000000000..2ac7f8a3e --- /dev/null +++ b/src/main/resources/ucd/RFC5892-appendix-B.txt @@ -0,0 +1,2321 @@ +0000..002C ; DISALLOWED # ..COMMA +002D ; PVALID # HYPHEN-MINUS +002E..002F ; DISALLOWED # FULL STOP..SOLIDUS +0030..0039 ; PVALID # DIGIT ZERO..DIGIT NINE +003A..0060 ; DISALLOWED # COLON..GRAVE ACCENT +0061..007A ; PVALID # LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B..00B6 ; DISALLOWED # LEFT CURLY BRACKET..PILCROW SIGN +00B7 ; CONTEXTO # MIDDLE DOT +00B8..00DE ; DISALLOWED # CEDILLA..LATIN CAPITAL LETTER THORN +00DF..00F6 ; PVALID # LATIN SMALL LETTER SHARP S..LATIN SMALL LETT +00F7 ; DISALLOWED # DIVISION SIGN +00F8..00FF ; PVALID # LATIN SMALL LETTER O WITH STROKE..LATIN SMAL +0100 ; DISALLOWED # LATIN CAPITAL LETTER A WITH MACRON +0101 ; PVALID # LATIN SMALL LETTER A WITH MACRON +0102 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE +0103 ; PVALID # LATIN SMALL LETTER A WITH BREVE +0104 ; DISALLOWED # LATIN CAPITAL LETTER A WITH OGONEK +0105 ; PVALID # LATIN SMALL LETTER A WITH OGONEK +0106 ; DISALLOWED # LATIN CAPITAL LETTER C WITH ACUTE +0107 ; PVALID # LATIN SMALL LETTER C WITH ACUTE +0108 ; DISALLOWED # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +0109 ; PVALID # LATIN SMALL LETTER C WITH CIRCUMFLEX +010A ; DISALLOWED # LATIN CAPITAL LETTER C WITH DOT ABOVE +010B ; PVALID # LATIN SMALL LETTER C WITH DOT ABOVE +010C ; DISALLOWED # LATIN CAPITAL LETTER C WITH CARON +010D ; PVALID # LATIN SMALL LETTER C WITH CARON +010E ; DISALLOWED # LATIN CAPITAL LETTER D WITH CARON +010F ; PVALID # LATIN SMALL LETTER D WITH CARON +0110 ; DISALLOWED # LATIN CAPITAL LETTER D WITH STROKE +0111 ; PVALID # LATIN SMALL LETTER D WITH STROKE +0112 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON +0113 ; PVALID # LATIN SMALL LETTER E WITH MACRON +0114 ; DISALLOWED # LATIN CAPITAL LETTER E WITH BREVE +0115 ; PVALID # LATIN SMALL LETTER E WITH BREVE +0116 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOT ABOVE +0117 ; PVALID # LATIN SMALL LETTER E WITH DOT ABOVE +0118 ; DISALLOWED # LATIN CAPITAL LETTER E WITH OGONEK +0119 ; PVALID # LATIN SMALL LETTER E WITH OGONEK +011A ; DISALLOWED # LATIN CAPITAL LETTER E WITH CARON +011B ; PVALID # LATIN SMALL LETTER E WITH CARON +011C ; DISALLOWED # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011D ; PVALID # LATIN SMALL LETTER G WITH CIRCUMFLEX +011E ; DISALLOWED # LATIN CAPITAL LETTER G WITH BREVE +011F ; PVALID # LATIN SMALL LETTER G WITH BREVE +0120 ; DISALLOWED # LATIN CAPITAL LETTER G WITH DOT ABOVE +0121 ; PVALID # LATIN SMALL LETTER G WITH DOT ABOVE +0122 ; DISALLOWED # LATIN CAPITAL LETTER G WITH CEDILLA +0123 ; PVALID # LATIN SMALL LETTER G WITH CEDILLA +0124 ; DISALLOWED # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0125 ; PVALID # LATIN SMALL LETTER H WITH CIRCUMFLEX +0126 ; DISALLOWED # LATIN CAPITAL LETTER H WITH STROKE +0127 ; PVALID # LATIN SMALL LETTER H WITH STROKE +0128 ; DISALLOWED # LATIN CAPITAL LETTER I WITH TILDE +0129 ; PVALID # LATIN SMALL LETTER I WITH TILDE +012A ; DISALLOWED # LATIN CAPITAL LETTER I WITH MACRON +012B ; PVALID # LATIN SMALL LETTER I WITH MACRON +012C ; DISALLOWED # LATIN CAPITAL LETTER I WITH BREVE +012D ; PVALID # LATIN SMALL LETTER I WITH BREVE +012E ; DISALLOWED # LATIN CAPITAL LETTER I WITH OGONEK +012F ; PVALID # LATIN SMALL LETTER I WITH OGONEK +0130 ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOT ABOVE +0131 ; PVALID # LATIN SMALL LETTER DOTLESS I +0132..0134 ; DISALLOWED # LATIN CAPITAL LIGATURE IJ..LATIN CAPITAL LET +0135 ; PVALID # LATIN SMALL LETTER J WITH CIRCUMFLEX +0136 ; DISALLOWED # LATIN CAPITAL LETTER K WITH CEDILLA +0137..0138 ; PVALID # LATIN SMALL LETTER K WITH CEDILLA..LATIN SMA +0139 ; DISALLOWED # LATIN CAPITAL LETTER L WITH ACUTE +013A ; PVALID # LATIN SMALL LETTER L WITH ACUTE +013B ; DISALLOWED # LATIN CAPITAL LETTER L WITH CEDILLA +013C ; PVALID # LATIN SMALL LETTER L WITH CEDILLA +013D ; DISALLOWED # LATIN CAPITAL LETTER L WITH CARON +013E ; PVALID # LATIN SMALL LETTER L WITH CARON +013F..0141 ; DISALLOWED # LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATI +0142 ; PVALID # LATIN SMALL LETTER L WITH STROKE +0143 ; DISALLOWED # LATIN CAPITAL LETTER N WITH ACUTE +0144 ; PVALID # LATIN SMALL LETTER N WITH ACUTE +0145 ; DISALLOWED # LATIN CAPITAL LETTER N WITH CEDILLA +0146 ; PVALID # LATIN SMALL LETTER N WITH CEDILLA +0147 ; DISALLOWED # LATIN CAPITAL LETTER N WITH CARON +0148 ; PVALID # LATIN SMALL LETTER N WITH CARON +0149..014A ; DISALLOWED # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE. +014B ; PVALID # LATIN SMALL LETTER ENG +014C ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON +014D ; PVALID # LATIN SMALL LETTER O WITH MACRON +014E ; DISALLOWED # LATIN CAPITAL LETTER O WITH BREVE +014F ; PVALID # LATIN SMALL LETTER O WITH BREVE +0150 ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0151 ; PVALID # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0152 ; DISALLOWED # LATIN CAPITAL LIGATURE OE +0153 ; PVALID # LATIN SMALL LIGATURE OE +0154 ; DISALLOWED # LATIN CAPITAL LETTER R WITH ACUTE +0155 ; PVALID # LATIN SMALL LETTER R WITH ACUTE +0156 ; DISALLOWED # LATIN CAPITAL LETTER R WITH CEDILLA +0157 ; PVALID # LATIN SMALL LETTER R WITH CEDILLA +0158 ; DISALLOWED # LATIN CAPITAL LETTER R WITH CARON +0159 ; PVALID # LATIN SMALL LETTER R WITH CARON +015A ; DISALLOWED # LATIN CAPITAL LETTER S WITH ACUTE +015B ; PVALID # LATIN SMALL LETTER S WITH ACUTE +015C ; DISALLOWED # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015D ; PVALID # LATIN SMALL LETTER S WITH CIRCUMFLEX +015E ; DISALLOWED # LATIN CAPITAL LETTER S WITH CEDILLA +015F ; PVALID # LATIN SMALL LETTER S WITH CEDILLA +0160 ; DISALLOWED # LATIN CAPITAL LETTER S WITH CARON +0161 ; PVALID # LATIN SMALL LETTER S WITH CARON +0162 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CEDILLA +0163 ; PVALID # LATIN SMALL LETTER T WITH CEDILLA +0164 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CARON +0165 ; PVALID # LATIN SMALL LETTER T WITH CARON +0166 ; DISALLOWED # LATIN CAPITAL LETTER T WITH STROKE +0167 ; PVALID # LATIN SMALL LETTER T WITH STROKE +0168 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE +0169 ; PVALID # LATIN SMALL LETTER U WITH TILDE +016A ; DISALLOWED # LATIN CAPITAL LETTER U WITH MACRON +016B ; PVALID # LATIN SMALL LETTER U WITH MACRON +016C ; DISALLOWED # LATIN CAPITAL LETTER U WITH BREVE +016D ; PVALID # LATIN SMALL LETTER U WITH BREVE +016E ; DISALLOWED # LATIN CAPITAL LETTER U WITH RING ABOVE +016F ; PVALID # LATIN SMALL LETTER U WITH RING ABOVE +0170 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0171 ; PVALID # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0172 ; DISALLOWED # LATIN CAPITAL LETTER U WITH OGONEK +0173 ; PVALID # LATIN SMALL LETTER U WITH OGONEK +0174 ; DISALLOWED # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0175 ; PVALID # LATIN SMALL LETTER W WITH CIRCUMFLEX +0176 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0177 ; PVALID # LATIN SMALL LETTER Y WITH CIRCUMFLEX +0178..0179 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN +017A ; PVALID # LATIN SMALL LETTER Z WITH ACUTE +017B ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017C ; PVALID # LATIN SMALL LETTER Z WITH DOT ABOVE +017D ; DISALLOWED # LATIN CAPITAL LETTER Z WITH CARON +017E ; PVALID # LATIN SMALL LETTER Z WITH CARON +017F ; DISALLOWED # LATIN SMALL LETTER LONG S +0180 ; PVALID # LATIN SMALL LETTER B WITH STROKE +0181..0182 ; DISALLOWED # LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPI +0183 ; PVALID # LATIN SMALL LETTER B WITH TOPBAR +0184 ; DISALLOWED # LATIN CAPITAL LETTER TONE SIX +0185 ; PVALID # LATIN SMALL LETTER TONE SIX +0186..0187 ; DISALLOWED # LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL L +0188 ; PVALID # LATIN SMALL LETTER C WITH HOOK +0189..018B ; DISALLOWED # LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITA +018C..018D ; PVALID # LATIN SMALL LETTER D WITH TOPBAR..LATIN SMAL +018E..0191 ; DISALLOWED # LATIN CAPITAL LETTER REVERSED E..LATIN CAPIT +0192 ; PVALID # LATIN SMALL LETTER F WITH HOOK +0193..0194 ; DISALLOWED # LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPI +0195 ; PVALID # LATIN SMALL LETTER HV +0196..0198 ; DISALLOWED # LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LET +0199..019B ; PVALID # LATIN SMALL LETTER K WITH HOOK..LATIN SMALL +019C..019D ; DISALLOWED # LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL +019E ; PVALID # LATIN SMALL LETTER N WITH LONG RIGHT LEG +019F..01A0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LA +01A1 ; PVALID # LATIN SMALL LETTER O WITH HORN +01A2 ; DISALLOWED # LATIN CAPITAL LETTER OI +01A3 ; PVALID # LATIN SMALL LETTER OI +01A4 ; DISALLOWED # LATIN CAPITAL LETTER P WITH HOOK +01A5 ; PVALID # LATIN SMALL LETTER P WITH HOOK +01A6..01A7 ; DISALLOWED # LATIN LETTER YR..LATIN CAPITAL LETTER TONE T +01A8 ; PVALID # LATIN SMALL LETTER TONE TWO +01A9 ; DISALLOWED # LATIN CAPITAL LETTER ESH +01AA..01AB ; PVALID # LATIN LETTER REVERSED ESH LOOP..LATIN SMALL +01AC ; DISALLOWED # LATIN CAPITAL LETTER T WITH HOOK +01AD ; PVALID # LATIN SMALL LETTER T WITH HOOK +01AE..01AF ; DISALLOWED # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK.. +01B0 ; PVALID # LATIN SMALL LETTER U WITH HORN +01B1..01B3 ; DISALLOWED # LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL +01B4 ; PVALID # LATIN SMALL LETTER Y WITH HOOK +01B5 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH STROKE +01B6 ; PVALID # LATIN SMALL LETTER Z WITH STROKE +01B7..01B8 ; DISALLOWED # LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETT +01B9..01BB ; PVALID # LATIN SMALL LETTER EZH REVERSED..LATIN LETTE +01BC ; DISALLOWED # LATIN CAPITAL LETTER TONE FIVE +01BD..01C3 ; PVALID # LATIN SMALL LETTER TONE FIVE..LATIN LETTER R +01C4..01CD ; DISALLOWED # LATIN CAPITAL LETTER DZ WITH CARON..LATIN CA +01CE ; PVALID # LATIN SMALL LETTER A WITH CARON +01CF ; DISALLOWED # LATIN CAPITAL LETTER I WITH CARON +01D0 ; PVALID # LATIN SMALL LETTER I WITH CARON +01D1 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CARON +01D2 ; PVALID # LATIN SMALL LETTER O WITH CARON +01D3 ; DISALLOWED # LATIN CAPITAL LETTER U WITH CARON +01D4 ; PVALID # LATIN SMALL LETTER U WITH CARON +01D5 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND MA +01D6 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND MACR +01D7 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND AC +01D8 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND ACUT +01D9 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND CA +01DA ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND CARO +01DB ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND GR +01DC..01DD ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND GRAV +01DE ; DISALLOWED # LATIN CAPITAL LETTER A WITH DIAERESIS AND MA +01DF ; PVALID # LATIN SMALL LETTER A WITH DIAERESIS AND MACR +01E0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MA +01E1 ; PVALID # LATIN SMALL LETTER A WITH DOT ABOVE AND MACR +01E2 ; DISALLOWED # LATIN CAPITAL LETTER AE WITH MACRON +01E3 ; PVALID # LATIN SMALL LETTER AE WITH MACRON +01E4 ; DISALLOWED # LATIN CAPITAL LETTER G WITH STROKE +01E5 ; PVALID # LATIN SMALL LETTER G WITH STROKE +01E6 ; DISALLOWED # LATIN CAPITAL LETTER G WITH CARON +01E7 ; PVALID # LATIN SMALL LETTER G WITH CARON +01E8 ; DISALLOWED # LATIN CAPITAL LETTER K WITH CARON +01E9 ; PVALID # LATIN SMALL LETTER K WITH CARON +01EA ; DISALLOWED # LATIN CAPITAL LETTER O WITH OGONEK +01EB ; PVALID # LATIN SMALL LETTER O WITH OGONEK +01EC ; DISALLOWED # LATIN CAPITAL LETTER O WITH OGONEK AND MACRO +01ED ; PVALID # LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EE ; DISALLOWED # LATIN CAPITAL LETTER EZH WITH CARON +01EF..01F0 ; PVALID # LATIN SMALL LETTER EZH WITH CARON..LATIN SMA +01F1..01F4 ; DISALLOWED # LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTE +01F5 ; PVALID # LATIN SMALL LETTER G WITH ACUTE +01F6..01F8 ; DISALLOWED # LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LE +01F9 ; PVALID # LATIN SMALL LETTER N WITH GRAVE +01FA ; DISALLOWED # LATIN CAPITAL LETTER A WITH RING ABOVE AND A +01FB ; PVALID # LATIN SMALL LETTER A WITH RING ABOVE AND ACU +01FC ; DISALLOWED # LATIN CAPITAL LETTER AE WITH ACUTE +01FD ; PVALID # LATIN SMALL LETTER AE WITH ACUTE +01FE ; DISALLOWED # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +01FF ; PVALID # LATIN SMALL LETTER O WITH STROKE AND ACUTE +0200 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0201 ; PVALID # LATIN SMALL LETTER A WITH DOUBLE GRAVE +0202 ; DISALLOWED # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0203 ; PVALID # LATIN SMALL LETTER A WITH INVERTED BREVE +0204 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0205 ; PVALID # LATIN SMALL LETTER E WITH DOUBLE GRAVE +0206 ; DISALLOWED # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0207 ; PVALID # LATIN SMALL LETTER E WITH INVERTED BREVE +0208 ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +0209 ; PVALID # LATIN SMALL LETTER I WITH DOUBLE GRAVE +020A ; DISALLOWED # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020B ; PVALID # LATIN SMALL LETTER I WITH INVERTED BREVE +020C ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020D ; PVALID # LATIN SMALL LETTER O WITH DOUBLE GRAVE +020E ; DISALLOWED # LATIN CAPITAL LETTER O WITH INVERTED BREVE +020F ; PVALID # LATIN SMALL LETTER O WITH INVERTED BREVE +0210 ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0211 ; PVALID # LATIN SMALL LETTER R WITH DOUBLE GRAVE +0212 ; DISALLOWED # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0213 ; PVALID # LATIN SMALL LETTER R WITH INVERTED BREVE +0214 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0215 ; PVALID # LATIN SMALL LETTER U WITH DOUBLE GRAVE +0216 ; DISALLOWED # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0217 ; PVALID # LATIN SMALL LETTER U WITH INVERTED BREVE +0218 ; DISALLOWED # LATIN CAPITAL LETTER S WITH COMMA BELOW +0219 ; PVALID # LATIN SMALL LETTER S WITH COMMA BELOW +021A ; DISALLOWED # LATIN CAPITAL LETTER T WITH COMMA BELOW +021B ; PVALID # LATIN SMALL LETTER T WITH COMMA BELOW +021C ; DISALLOWED # LATIN CAPITAL LETTER YOGH +021D ; PVALID # LATIN SMALL LETTER YOGH +021E ; DISALLOWED # LATIN CAPITAL LETTER H WITH CARON +021F ; PVALID # LATIN SMALL LETTER H WITH CARON +0220 ; DISALLOWED # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0221 ; PVALID # LATIN SMALL LETTER D WITH CURL +0222 ; DISALLOWED # LATIN CAPITAL LETTER OU +0223 ; PVALID # LATIN SMALL LETTER OU +0224 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH HOOK +0225 ; PVALID # LATIN SMALL LETTER Z WITH HOOK +0226 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT ABOVE +0227 ; PVALID # LATIN SMALL LETTER A WITH DOT ABOVE +0228 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CEDILLA +0229 ; PVALID # LATIN SMALL LETTER E WITH CEDILLA +022A ; DISALLOWED # LATIN CAPITAL LETTER O WITH DIAERESIS AND MA +022B ; PVALID # LATIN SMALL LETTER O WITH DIAERESIS AND MACR +022C ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022D ; PVALID # LATIN SMALL LETTER O WITH TILDE AND MACRON +022E ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT ABOVE +022F ; PVALID # LATIN SMALL LETTER O WITH DOT ABOVE +0230 ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MA +0231 ; PVALID # LATIN SMALL LETTER O WITH DOT ABOVE AND MACR +0232 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH MACRON +0233..0239 ; PVALID # LATIN SMALL LETTER Y WITH MACRON..LATIN SMAL +023A..023B ; DISALLOWED # LATIN CAPITAL LETTER A WITH STROKE..LATIN CA +023C ; PVALID # LATIN SMALL LETTER C WITH STROKE +023D..023E ; DISALLOWED # LATIN CAPITAL LETTER L WITH BAR..LATIN CAPIT +023F..0240 ; PVALID # LATIN SMALL LETTER S WITH SWASH TAIL..LATIN +0241 ; DISALLOWED # LATIN CAPITAL LETTER GLOTTAL STOP +0242 ; PVALID # LATIN SMALL LETTER GLOTTAL STOP +0243..0246 ; DISALLOWED # LATIN CAPITAL LETTER B WITH STROKE..LATIN CA +0247 ; PVALID # LATIN SMALL LETTER E WITH STROKE +0248 ; DISALLOWED # LATIN CAPITAL LETTER J WITH STROKE +0249 ; PVALID # LATIN SMALL LETTER J WITH STROKE +024A ; DISALLOWED # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024B ; PVALID # LATIN SMALL LETTER Q WITH HOOK TAIL +024C ; DISALLOWED # LATIN CAPITAL LETTER R WITH STROKE +024D ; PVALID # LATIN SMALL LETTER R WITH STROKE +024E ; DISALLOWED # LATIN CAPITAL LETTER Y WITH STROKE +024F..02AF ; PVALID # LATIN SMALL LETTER Y WITH STROKE..LATIN SMAL +02B0..02B8 ; DISALLOWED # MODIFIER LETTER SMALL H..MODIFIER LETTER SMA +02B9..02C1 ; PVALID # MODIFIER LETTER PRIME..MODIFIER LETTER REVER +02C2..02C5 ; DISALLOWED # MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LET +02C6..02D1 ; PVALID # MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER +02D2..02EB ; DISALLOWED # MODIFIER LETTER CENTRED RIGHT HALF RING..MOD +02EC ; PVALID # MODIFIER LETTER VOICING +02ED ; DISALLOWED # MODIFIER LETTER UNASPIRATED +02EE ; PVALID # MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; DISALLOWED # MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER +0300..033F ; PVALID # COMBINING GRAVE ACCENT..COMBINING DOUBLE OVE +0340..0341 ; DISALLOWED # COMBINING GRAVE TONE MARK..COMBINING ACUTE T +0342 ; PVALID # COMBINING GREEK PERISPOMENI +0343..0345 ; DISALLOWED # COMBINING GREEK KORONIS..COMBINING GREEK YPO +0346..034E ; PVALID # COMBINING BRIDGE ABOVE..COMBINING UPWARDS AR +034F ; DISALLOWED # COMBINING GRAPHEME JOINER +0350..036F ; PVALID # COMBINING RIGHT ARROWHEAD ABOVE..COMBINING L +0370 ; DISALLOWED # GREEK CAPITAL LETTER HETA +0371 ; PVALID # GREEK SMALL LETTER HETA +0372 ; DISALLOWED # GREEK CAPITAL LETTER ARCHAIC SAMPI +0373 ; PVALID # GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; DISALLOWED # GREEK NUMERAL SIGN +0375 ; CONTEXTO # GREEK LOWER NUMERAL SIGN +0376 ; DISALLOWED # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +0377 ; PVALID # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +0378..0379 ; UNASSIGNED # .. +037A ; DISALLOWED # GREEK YPOGEGRAMMENI +037B..037D ; PVALID # GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GR +037E ; DISALLOWED # GREEK QUESTION MARK +037F..0383 ; UNASSIGNED # .. +0384..038A ; DISALLOWED # GREEK TONOS..GREEK CAPITAL LETTER IOTA WITH +038B ; UNASSIGNED # +038C ; DISALLOWED # GREEK CAPITAL LETTER OMICRON WITH TONOS +038D ; UNASSIGNED # +038E..038F ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH TONOS..GRE +0390 ; PVALID # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND T +0391..03A1 ; DISALLOWED # GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LE +03A2 ; UNASSIGNED # +03A3..03AB ; DISALLOWED # GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LE +03AC..03CE ; PVALID # GREEK SMALL LETTER ALPHA WITH TONOS..GREEK S +03CF..03D6 ; DISALLOWED # GREEK CAPITAL KAI SYMBOL..GREEK PI SYMBOL +03D7 ; PVALID # GREEK KAI SYMBOL +03D8 ; DISALLOWED # GREEK LETTER ARCHAIC KOPPA +03D9 ; PVALID # GREEK SMALL LETTER ARCHAIC KOPPA +03DA ; DISALLOWED # GREEK LETTER STIGMA +03DB ; PVALID # GREEK SMALL LETTER STIGMA +03DC ; DISALLOWED # GREEK LETTER DIGAMMA +03DD ; PVALID # GREEK SMALL LETTER DIGAMMA +03DE ; DISALLOWED # GREEK LETTER KOPPA +03DF ; PVALID # GREEK SMALL LETTER KOPPA +03E0 ; DISALLOWED # GREEK LETTER SAMPI +03E1 ; PVALID # GREEK SMALL LETTER SAMPI +03E2 ; DISALLOWED # COPTIC CAPITAL LETTER SHEI +03E3 ; PVALID # COPTIC SMALL LETTER SHEI +03E4 ; DISALLOWED # COPTIC CAPITAL LETTER FEI +03E5 ; PVALID # COPTIC SMALL LETTER FEI +03E6 ; DISALLOWED # COPTIC CAPITAL LETTER KHEI +03E7 ; PVALID # COPTIC SMALL LETTER KHEI +03E8 ; DISALLOWED # COPTIC CAPITAL LETTER HORI +03E9 ; PVALID # COPTIC SMALL LETTER HORI +03EA ; DISALLOWED # COPTIC CAPITAL LETTER GANGIA +03EB ; PVALID # COPTIC SMALL LETTER GANGIA +03EC ; DISALLOWED # COPTIC CAPITAL LETTER SHIMA +03ED ; PVALID # COPTIC SMALL LETTER SHIMA +03EE ; DISALLOWED # COPTIC CAPITAL LETTER DEI +03EF ; PVALID # COPTIC SMALL LETTER DEI +03F0..03F2 ; DISALLOWED # GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBO +03F3 ; PVALID # GREEK LETTER YOT +03F4..03F7 ; DISALLOWED # GREEK CAPITAL THETA SYMBOL..GREEK CAPITAL LE +03F8 ; PVALID # GREEK SMALL LETTER SHO +03F9..03FA ; DISALLOWED # GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAP +03FB..03FC ; PVALID # GREEK SMALL LETTER SAN..GREEK RHO WITH STROK +03FD..042F ; DISALLOWED # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL.. +0430..045F ; PVALID # CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETT +0460 ; DISALLOWED # CYRILLIC CAPITAL LETTER OMEGA +0461 ; PVALID # CYRILLIC SMALL LETTER OMEGA +0462 ; DISALLOWED # CYRILLIC CAPITAL LETTER YAT +0463 ; PVALID # CYRILLIC SMALL LETTER YAT +0464 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED E +0465 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED E +0466 ; DISALLOWED # CYRILLIC CAPITAL LETTER LITTLE YUS +0467 ; PVALID # CYRILLIC SMALL LETTER LITTLE YUS +0468 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +0469 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046A ; DISALLOWED # CYRILLIC CAPITAL LETTER BIG YUS +046B ; PVALID # CYRILLIC SMALL LETTER BIG YUS +046C ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046D ; PVALID # CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046E ; DISALLOWED # CYRILLIC CAPITAL LETTER KSI +046F ; PVALID # CYRILLIC SMALL LETTER KSI +0470 ; DISALLOWED # CYRILLIC CAPITAL LETTER PSI +0471 ; PVALID # CYRILLIC SMALL LETTER PSI +0472 ; DISALLOWED # CYRILLIC CAPITAL LETTER FITA +0473 ; PVALID # CYRILLIC SMALL LETTER FITA +0474 ; DISALLOWED # CYRILLIC CAPITAL LETTER IZHITSA +0475 ; PVALID # CYRILLIC SMALL LETTER IZHITSA +0476 ; DISALLOWED # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE +0477 ; PVALID # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GR +0478 ; DISALLOWED # CYRILLIC CAPITAL LETTER UK +0479 ; PVALID # CYRILLIC SMALL LETTER UK +047A ; DISALLOWED # CYRILLIC CAPITAL LETTER ROUND OMEGA +047B ; PVALID # CYRILLIC SMALL LETTER ROUND OMEGA +047C ; DISALLOWED # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047D ; PVALID # CYRILLIC SMALL LETTER OMEGA WITH TITLO +047E ; DISALLOWED # CYRILLIC CAPITAL LETTER OT +047F ; PVALID # CYRILLIC SMALL LETTER OT +0480 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOPPA +0481 ; PVALID # CYRILLIC SMALL LETTER KOPPA +0482 ; DISALLOWED # CYRILLIC THOUSANDS SIGN +0483..0487 ; PVALID # COMBINING CYRILLIC TITLO..COMBINING CYRILLIC +0488..048A ; DISALLOWED # COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..C +048B ; PVALID # CYRILLIC SMALL LETTER SHORT I WITH TAIL +048C ; DISALLOWED # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048D ; PVALID # CYRILLIC SMALL LETTER SEMISOFT SIGN +048E ; DISALLOWED # CYRILLIC CAPITAL LETTER ER WITH TICK +048F ; PVALID # CYRILLIC SMALL LETTER ER WITH TICK +0490 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0491 ; PVALID # CYRILLIC SMALL LETTER GHE WITH UPTURN +0492 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0493 ; PVALID # CYRILLIC SMALL LETTER GHE WITH STROKE +0494 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0495 ; PVALID # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0496 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0497 ; PVALID # CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0498 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +0499 ; PVALID # CYRILLIC SMALL LETTER ZE WITH DESCENDER +049A ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049B ; PVALID # CYRILLIC SMALL LETTER KA WITH DESCENDER +049C ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STR +049D ; PVALID # CYRILLIC SMALL LETTER KA WITH VERTICAL STROK +049E ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH STROKE +049F ; PVALID # CYRILLIC SMALL LETTER KA WITH STROKE +04A0 ; DISALLOWED # CYRILLIC CAPITAL LETTER BASHKIR KA +04A1 ; PVALID # CYRILLIC SMALL LETTER BASHKIR KA +04A2 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A3 ; PVALID # CYRILLIC SMALL LETTER EN WITH DESCENDER +04A4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE EN GHE +04A5 ; PVALID # CYRILLIC SMALL LIGATURE EN GHE +04A6 ; DISALLOWED # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A7 ; PVALID # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A8 ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04A9 ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN HA +04AA ; DISALLOWED # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AB ; PVALID # CYRILLIC SMALL LETTER ES WITH DESCENDER +04AC ; DISALLOWED # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AD ; PVALID # CYRILLIC SMALL LETTER TE WITH DESCENDER +04AE ; DISALLOWED # CYRILLIC CAPITAL LETTER STRAIGHT U +04AF ; PVALID # CYRILLIC SMALL LETTER STRAIGHT U +04B0 ; DISALLOWED # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STRO +04B1 ; PVALID # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B2 ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B3 ; PVALID # CYRILLIC SMALL LETTER HA WITH DESCENDER +04B4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE TE TSE +04B5 ; PVALID # CYRILLIC SMALL LIGATURE TE TSE +04B6 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B7 ; PVALID # CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B8 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL ST +04B9 ; PVALID # CYRILLIC SMALL LETTER CHE WITH VERTICAL STRO +04BA ; DISALLOWED # CYRILLIC CAPITAL LETTER SHHA +04BB ; PVALID # CYRILLIC SMALL LETTER SHHA +04BC ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BD ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN CHE +04BE ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH D +04BF ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DES +04C0..04C1 ; DISALLOWED # CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL L +04C2 ; PVALID # CYRILLIC SMALL LETTER ZHE WITH BREVE +04C3 ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C4 ; PVALID # CYRILLIC SMALL LETTER KA WITH HOOK +04C5 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C6 ; PVALID # CYRILLIC SMALL LETTER EL WITH TAIL +04C7 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C8 ; PVALID # CYRILLIC SMALL LETTER EN WITH HOOK +04C9 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CA ; PVALID # CYRILLIC SMALL LETTER EN WITH TAIL +04CB ; DISALLOWED # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CC ; PVALID # CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CD ; DISALLOWED # CYRILLIC CAPITAL LETTER EM WITH TAIL +04CE..04CF ; PVALID # CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC +04D0 ; DISALLOWED # CYRILLIC CAPITAL LETTER A WITH BREVE +04D1 ; PVALID # CYRILLIC SMALL LETTER A WITH BREVE +04D2 ; DISALLOWED # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D3 ; PVALID # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE A IE +04D5 ; PVALID # CYRILLIC SMALL LIGATURE A IE +04D6 ; DISALLOWED # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D7 ; PVALID # CYRILLIC SMALL LETTER IE WITH BREVE +04D8 ; DISALLOWED # CYRILLIC CAPITAL LETTER SCHWA +04D9 ; PVALID # CYRILLIC SMALL LETTER SCHWA +04DA ; DISALLOWED # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DB ; PVALID # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DC ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DD ; PVALID # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DE ; DISALLOWED # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04DF ; PVALID # CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E0 ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E1 ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN DZE +04E2 ; DISALLOWED # CYRILLIC CAPITAL LETTER I WITH MACRON +04E3 ; PVALID # CYRILLIC SMALL LETTER I WITH MACRON +04E4 ; DISALLOWED # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E5 ; PVALID # CYRILLIC SMALL LETTER I WITH DIAERESIS +04E6 ; DISALLOWED # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E7 ; PVALID # CYRILLIC SMALL LETTER O WITH DIAERESIS +04E8 ; DISALLOWED # CYRILLIC CAPITAL LETTER BARRED O +04E9 ; PVALID # CYRILLIC SMALL LETTER BARRED O +04EA ; DISALLOWED # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERE +04EB ; PVALID # CYRILLIC SMALL LETTER BARRED O WITH DIAERESI +04EC ; DISALLOWED # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04ED ; PVALID # CYRILLIC SMALL LETTER E WITH DIAERESIS +04EE ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH MACRON +04EF ; PVALID # CYRILLIC SMALL LETTER U WITH MACRON +04F0 ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F1 ; PVALID # CYRILLIC SMALL LETTER U WITH DIAERESIS +04F2 ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F3 ; PVALID # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F4 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F5 ; PVALID # CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F6 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F7 ; PVALID # CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F8 ; DISALLOWED # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04F9 ; PVALID # CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FA ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND +04FB ; PVALID # CYRILLIC SMALL LETTER GHE WITH STROKE AND HO +04FC ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FD ; PVALID # CYRILLIC SMALL LETTER HA WITH HOOK +04FE ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH STROKE +04FF ; PVALID # CYRILLIC SMALL LETTER HA WITH STROKE +0500 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DE +0501 ; PVALID # CYRILLIC SMALL LETTER KOMI DE +0502 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DJE +0503 ; PVALID # CYRILLIC SMALL LETTER KOMI DJE +0504 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI ZJE +0505 ; PVALID # CYRILLIC SMALL LETTER KOMI ZJE +0506 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DZJE +0507 ; PVALID # CYRILLIC SMALL LETTER KOMI DZJE +0508 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI LJE +0509 ; PVALID # CYRILLIC SMALL LETTER KOMI LJE +050A ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI NJE +050B ; PVALID # CYRILLIC SMALL LETTER KOMI NJE +050C ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI SJE +050D ; PVALID # CYRILLIC SMALL LETTER KOMI SJE +050E ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI TJE +050F ; PVALID # CYRILLIC SMALL LETTER KOMI TJE +0510 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED ZE +0511 ; PVALID # CYRILLIC SMALL LETTER REVERSED ZE +0512 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH HOOK +0513 ; PVALID # CYRILLIC SMALL LETTER EL WITH HOOK +0514 ; DISALLOWED # CYRILLIC CAPITAL LETTER LHA +0515 ; PVALID # CYRILLIC SMALL LETTER LHA +0516 ; DISALLOWED # CYRILLIC CAPITAL LETTER RHA +0517 ; PVALID # CYRILLIC SMALL LETTER RHA +0518 ; DISALLOWED # CYRILLIC CAPITAL LETTER YAE +0519 ; PVALID # CYRILLIC SMALL LETTER YAE +051A ; DISALLOWED # CYRILLIC CAPITAL LETTER QA +051B ; PVALID # CYRILLIC SMALL LETTER QA +051C ; DISALLOWED # CYRILLIC CAPITAL LETTER WE +051D ; PVALID # CYRILLIC SMALL LETTER WE +051E ; DISALLOWED # CYRILLIC CAPITAL LETTER ALEUT KA +051F ; PVALID # CYRILLIC SMALL LETTER ALEUT KA +0520 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0521 ; PVALID # CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0522 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0523 ; PVALID # CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0524 ; DISALLOWED # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0525 ; PVALID # CYRILLIC SMALL LETTER PE WITH DESCENDER +0526..0530 ; UNASSIGNED # .. +0531..0556 ; DISALLOWED # ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITA +0557..0558 ; UNASSIGNED # .. +0559 ; PVALID # ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; DISALLOWED # ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION M +0560 ; UNASSIGNED # +0561..0586 ; PVALID # ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LE +0587 ; DISALLOWED # ARMENIAN SMALL LIGATURE ECH YIWN +0588 ; UNASSIGNED # +0589..058A ; DISALLOWED # ARMENIAN FULL STOP..ARMENIAN HYPHEN +058B..0590 ; UNASSIGNED # .. +0591..05BD ; PVALID # HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; DISALLOWED # HEBREW PUNCTUATION MAQAF +05BF ; PVALID # HEBREW POINT RAFE +05C0 ; DISALLOWED # HEBREW PUNCTUATION PASEQ +05C1..05C2 ; PVALID # HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; DISALLOWED # HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; PVALID # HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; DISALLOWED # HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; PVALID # HEBREW POINT QAMATS QATAN +05C8..05CF ; UNASSIGNED # .. +05D0..05EA ; PVALID # HEBREW LETTER ALEF..HEBREW LETTER TAV +05EB..05EF ; UNASSIGNED # .. +05F0..05F2 ; PVALID # HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW L +05F3..05F4 ; CONTEXTO # HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATIO +05F5..05FF ; UNASSIGNED # .. +0600..0603 ; DISALLOWED # ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0604..0605 ; UNASSIGNED # .. +0606..060F ; DISALLOWED # ARABIC-INDIC CUBE ROOT..ARABIC SIGN MISRA +0610..061A ; PVALID # ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..AR +061B ; DISALLOWED # ARABIC SEMICOLON +061C..061D ; UNASSIGNED # .. +061E..061F ; DISALLOWED # ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC Q +0620 ; UNASSIGNED # +0621..063F ; PVALID # ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH +0640 ; DISALLOWED # ARABIC TATWEEL +0641..065E ; PVALID # ARABIC LETTER FEH..ARABIC FATHA WITH TWO DOT +065F ; UNASSIGNED # +0660..0669 ; CONTEXTO # ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT +066A..066D ; DISALLOWED # ARABIC PERCENT SIGN..ARABIC FIVE POINTED STA +066E..0674 ; PVALID # ARABIC LETTER DOTLESS BEH..ARABIC LETTER HIG +0675..0678 ; DISALLOWED # ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER +0679..06D3 ; PVALID # ARABIC LETTER TTEH..ARABIC LETTER YEH BARREE +06D4 ; DISALLOWED # ARABIC FULL STOP +06D5..06DC ; PVALID # ARABIC LETTER AE..ARABIC SMALL HIGH SEEN +06DD..06DE ; DISALLOWED # ARABIC END OF AYAH..ARABIC START OF RUB EL H +06DF..06E8 ; PVALID # ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL +06E9 ; DISALLOWED # ARABIC PLACE OF SAJDAH +06EA..06EF ; PVALID # ARABIC EMPTY CENTRE LOW STOP..ARABIC LETTER +06F0..06F9 ; CONTEXTO # EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED A +06FA..06FF ; PVALID # ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC L +0700..070D ; DISALLOWED # SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN AST +070E ; UNASSIGNED # +070F ; DISALLOWED # SYRIAC ABBREVIATION MARK +0710..074A ; PVALID # SYRIAC LETTER ALAPH..SYRIAC BARREKH +074B..074C ; UNASSIGNED # .. +074D..07B1 ; PVALID # SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER N +07B2..07BF ; UNASSIGNED # .. +07C0..07F5 ; PVALID # NKO DIGIT ZERO..NKO LOW TONE APOSTROPHE +07F6..07FA ; DISALLOWED # NKO SYMBOL OO DENNEN..NKO LAJANYALAN +07FB..07FF ; UNASSIGNED # .. +0800..082D ; PVALID # SAMARITAN LETTER ALAF..SAMARITAN MARK NEQUDA +082E..082F ; UNASSIGNED # .. +0830..083E ; DISALLOWED # SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUN +083F..08FF ; UNASSIGNED # .. +0900..0939 ; PVALID # DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANA +093A..093B ; UNASSIGNED # .. +093C..094E ; PVALID # DEVANAGARI SIGN NUKTA..DEVANAGARI VOWEL SIGN +094F ; UNASSIGNED # +0950..0955 ; PVALID # DEVANAGARI OM..DEVANAGARI VOWEL SIGN CANDRA +0956..0957 ; UNASSIGNED # .. +0958..095F ; DISALLOWED # DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA +0960..0963 ; PVALID # DEVANAGARI LETTER VOCALIC RR..DEVANAGARI VOW +0964..0965 ; DISALLOWED # DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; PVALID # DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; DISALLOWED # DEVANAGARI ABBREVIATION SIGN +0971..0972 ; PVALID # DEVANAGARI SIGN HIGH SPACING DOT..DEVANAGARI +0973..0978 ; UNASSIGNED # .. +0979..097F ; PVALID # DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA +0980 ; UNASSIGNED # +0981..0983 ; PVALID # BENGALI SIGN CANDRABINDU..BENGALI SIGN VISAR +0984 ; UNASSIGNED # +0985..098C ; PVALID # BENGALI LETTER A..BENGALI LETTER VOCALIC L +098D..098E ; UNASSIGNED # .. +098F..0990 ; PVALID # BENGALI LETTER E..BENGALI LETTER AI +0991..0992 ; UNASSIGNED # .. +0993..09A8 ; PVALID # BENGALI LETTER O..BENGALI LETTER NA +09A9 ; UNASSIGNED # +09AA..09B0 ; PVALID # BENGALI LETTER PA..BENGALI LETTER RA +09B1 ; UNASSIGNED # +09B2 ; PVALID # BENGALI LETTER LA +09B3..09B5 ; UNASSIGNED # .. +09B6..09B9 ; PVALID # BENGALI LETTER SHA..BENGALI LETTER HA +09BA..09BB ; UNASSIGNED # .. +09BC..09C4 ; PVALID # BENGALI SIGN NUKTA..BENGALI VOWEL SIGN VOCAL +09C5..09C6 ; UNASSIGNED # .. +09C7..09C8 ; PVALID # BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09C9..09CA ; UNASSIGNED # .. +09CB..09CE ; PVALID # BENGALI VOWEL SIGN O..BENGALI LETTER KHANDA +09CF..09D6 ; UNASSIGNED # .. +09D7 ; PVALID # BENGALI AU LENGTH MARK +09D8..09DB ; UNASSIGNED # .. +09DC..09DD ; DISALLOWED # BENGALI LETTER RRA..BENGALI LETTER RHA +09DE ; UNASSIGNED # +09DF ; DISALLOWED # BENGALI LETTER YYA +09E0..09E3 ; PVALID # BENGALI LETTER VOCALIC RR..BENGALI VOWEL SIG +09E4..09E5 ; UNASSIGNED # .. +09E6..09F1 ; PVALID # BENGALI DIGIT ZERO..BENGALI LETTER RA WITH L +09F2..09FB ; DISALLOWED # BENGALI RUPEE MARK..BENGALI GANDA MARK +09FC..0A00 ; UNASSIGNED # .. +0A01..0A03 ; PVALID # GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN VISA +0A04 ; UNASSIGNED # +0A05..0A0A ; PVALID # GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0B..0A0E ; UNASSIGNED # .. +0A0F..0A10 ; PVALID # GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A11..0A12 ; UNASSIGNED # .. +0A13..0A28 ; PVALID # GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A29 ; UNASSIGNED # +0A2A..0A30 ; PVALID # GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A31 ; UNASSIGNED # +0A32 ; PVALID # GURMUKHI LETTER LA +0A33 ; DISALLOWED # GURMUKHI LETTER LLA +0A34 ; UNASSIGNED # +0A35 ; PVALID # GURMUKHI LETTER VA +0A36 ; DISALLOWED # GURMUKHI LETTER SHA +0A37 ; UNASSIGNED # +0A38..0A39 ; PVALID # GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3A..0A3B ; UNASSIGNED # .. +0A3C ; PVALID # GURMUKHI SIGN NUKTA +0A3D ; UNASSIGNED # +0A3E..0A42 ; PVALID # GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN +0A43..0A46 ; UNASSIGNED # .. +0A47..0A48 ; PVALID # GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN +0A49..0A4A ; UNASSIGNED # .. +0A4B..0A4D ; PVALID # GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A4E..0A50 ; UNASSIGNED # .. +0A51 ; PVALID # GURMUKHI SIGN UDAAT +0A52..0A58 ; UNASSIGNED # .. +0A59..0A5B ; DISALLOWED # GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA +0A5C ; PVALID # GURMUKHI LETTER RRA +0A5D ; UNASSIGNED # +0A5E ; DISALLOWED # GURMUKHI LETTER FA +0A5F..0A65 ; UNASSIGNED # .. +0A66..0A75 ; PVALID # GURMUKHI DIGIT ZERO..GURMUKHI SIGN YAKASH +0A76..0A80 ; UNASSIGNED # .. +0A81..0A83 ; PVALID # GUJARATI SIGN CANDRABINDU..GUJARATI SIGN VIS +0A84 ; UNASSIGNED # +0A85..0A8D ; PVALID # GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8E ; UNASSIGNED # +0A8F..0A91 ; PVALID # GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A92 ; UNASSIGNED # +0A93..0AA8 ; PVALID # GUJARATI LETTER O..GUJARATI LETTER NA +0AA9 ; UNASSIGNED # +0AAA..0AB0 ; PVALID # GUJARATI LETTER PA..GUJARATI LETTER RA +0AB1 ; UNASSIGNED # +0AB2..0AB3 ; PVALID # GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB4 ; UNASSIGNED # +0AB5..0AB9 ; PVALID # GUJARATI LETTER VA..GUJARATI LETTER HA +0ABA..0ABB ; UNASSIGNED # .. +0ABC..0AC5 ; PVALID # GUJARATI SIGN NUKTA..GUJARATI VOWEL SIGN CAN +0AC6 ; UNASSIGNED # +0AC7..0AC9 ; PVALID # GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN C +0ACA ; UNASSIGNED # +0ACB..0ACD ; PVALID # GUJARATI VOWEL SIGN O..GUJARATI SIGN VIRAMA +0ACE..0ACF ; UNASSIGNED # .. +0AD0 ; PVALID # GUJARATI OM +0AD1..0ADF ; UNASSIGNED # .. +0AE0..0AE3 ; PVALID # GUJARATI LETTER VOCALIC RR..GUJARATI VOWEL S +0AE4..0AE5 ; UNASSIGNED # .. +0AE6..0AEF ; PVALID # GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; UNASSIGNED # +0AF1 ; DISALLOWED # GUJARATI RUPEE SIGN +0AF2..0B00 ; UNASSIGNED # .. +0B01..0B03 ; PVALID # ORIYA SIGN CANDRABINDU..ORIYA SIGN VISARGA +0B04 ; UNASSIGNED # +0B05..0B0C ; PVALID # ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0D..0B0E ; UNASSIGNED # .. +0B0F..0B10 ; PVALID # ORIYA LETTER E..ORIYA LETTER AI +0B11..0B12 ; UNASSIGNED # .. +0B13..0B28 ; PVALID # ORIYA LETTER O..ORIYA LETTER NA +0B29 ; UNASSIGNED # +0B2A..0B30 ; PVALID # ORIYA LETTER PA..ORIYA LETTER RA +0B31 ; UNASSIGNED # +0B32..0B33 ; PVALID # ORIYA LETTER LA..ORIYA LETTER LLA +0B34 ; UNASSIGNED # +0B35..0B39 ; PVALID # ORIYA LETTER VA..ORIYA LETTER HA +0B3A..0B3B ; UNASSIGNED # .. +0B3C..0B44 ; PVALID # ORIYA SIGN NUKTA..ORIYA VOWEL SIGN VOCALIC R +0B45..0B46 ; UNASSIGNED # .. +0B47..0B48 ; PVALID # ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B49..0B4A ; UNASSIGNED # .. +0B4B..0B4D ; PVALID # ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA +0B4E..0B55 ; UNASSIGNED # .. +0B56..0B57 ; PVALID # ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK +0B58..0B5B ; UNASSIGNED # .. +0B5C..0B5D ; DISALLOWED # ORIYA LETTER RRA..ORIYA LETTER RHA +0B5E ; UNASSIGNED # +0B5F..0B63 ; PVALID # ORIYA LETTER YYA..ORIYA VOWEL SIGN VOCALIC L +0B64..0B65 ; UNASSIGNED # .. +0B66..0B6F ; PVALID # ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; DISALLOWED # ORIYA ISSHAR +0B71 ; PVALID # ORIYA LETTER WA +0B72..0B81 ; UNASSIGNED # .. +0B82..0B83 ; PVALID # TAMIL SIGN ANUSVARA..TAMIL SIGN VISARGA +0B84 ; UNASSIGNED # +0B85..0B8A ; PVALID # TAMIL LETTER A..TAMIL LETTER UU +0B8B..0B8D ; UNASSIGNED # .. +0B8E..0B90 ; PVALID # TAMIL LETTER E..TAMIL LETTER AI +0B91 ; UNASSIGNED # +0B92..0B95 ; PVALID # TAMIL LETTER O..TAMIL LETTER KA +0B96..0B98 ; UNASSIGNED # .. +0B99..0B9A ; PVALID # TAMIL LETTER NGA..TAMIL LETTER CA +0B9B ; UNASSIGNED # +0B9C ; PVALID # TAMIL LETTER JA +0B9D ; UNASSIGNED # +0B9E..0B9F ; PVALID # TAMIL LETTER NYA..TAMIL LETTER TTA +0BA0..0BA2 ; UNASSIGNED # .. +0BA3..0BA4 ; PVALID # TAMIL LETTER NNA..TAMIL LETTER TA +0BA5..0BA7 ; UNASSIGNED # .. +0BA8..0BAA ; PVALID # TAMIL LETTER NA..TAMIL LETTER PA +0BAB..0BAD ; UNASSIGNED # .. +0BAE..0BB9 ; PVALID # TAMIL LETTER MA..TAMIL LETTER HA +0BBA..0BBD ; UNASSIGNED # .. +0BBE..0BC2 ; PVALID # TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN UU +0BC3..0BC5 ; UNASSIGNED # .. +0BC6..0BC8 ; PVALID # TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BC9 ; UNASSIGNED # +0BCA..0BCD ; PVALID # TAMIL VOWEL SIGN O..TAMIL SIGN VIRAMA +0BCE..0BCF ; UNASSIGNED # .. +0BD0 ; PVALID # TAMIL OM +0BD1..0BD6 ; UNASSIGNED # .. +0BD7 ; PVALID # TAMIL AU LENGTH MARK +0BD8..0BE5 ; UNASSIGNED # .. +0BE6..0BEF ; PVALID # TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BFA ; DISALLOWED # TAMIL NUMBER TEN..TAMIL NUMBER SIGN +0BFB..0C00 ; UNASSIGNED # .. +0C01..0C03 ; PVALID # TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; UNASSIGNED # +0C05..0C0C ; PVALID # TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0D ; UNASSIGNED # +0C0E..0C10 ; PVALID # TELUGU LETTER E..TELUGU LETTER AI +0C11 ; UNASSIGNED # +0C12..0C28 ; PVALID # TELUGU LETTER O..TELUGU LETTER NA +0C29 ; UNASSIGNED # +0C2A..0C33 ; PVALID # TELUGU LETTER PA..TELUGU LETTER LLA +0C34 ; UNASSIGNED # +0C35..0C39 ; PVALID # TELUGU LETTER VA..TELUGU LETTER HA +0C3A..0C3C ; UNASSIGNED # .. +0C3D..0C44 ; PVALID # TELUGU SIGN AVAGRAHA..TELUGU VOWEL SIGN VOCA +0C45 ; UNASSIGNED # +0C46..0C48 ; PVALID # TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C49 ; UNASSIGNED # +0C4A..0C4D ; PVALID # TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C4E..0C54 ; UNASSIGNED # .. +0C55..0C56 ; PVALID # TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C57 ; UNASSIGNED # +0C58..0C59 ; PVALID # TELUGU LETTER TSA..TELUGU LETTER DZA +0C5A..0C5F ; UNASSIGNED # .. +0C60..0C63 ; PVALID # TELUGU LETTER VOCALIC RR..TELUGU VOWEL SIGN +0C64..0C65 ; UNASSIGNED # .. +0C66..0C6F ; PVALID # TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C70..0C77 ; UNASSIGNED # .. +0C78..0C7F ; DISALLOWED # TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF +0C80..0C81 ; UNASSIGNED # .. +0C82..0C83 ; PVALID # KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; UNASSIGNED # +0C85..0C8C ; PVALID # KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8D ; UNASSIGNED # +0C8E..0C90 ; PVALID # KANNADA LETTER E..KANNADA LETTER AI +0C91 ; UNASSIGNED # +0C92..0CA8 ; PVALID # KANNADA LETTER O..KANNADA LETTER NA +0CA9 ; UNASSIGNED # +0CAA..0CB3 ; PVALID # KANNADA LETTER PA..KANNADA LETTER LLA +0CB4 ; UNASSIGNED # +0CB5..0CB9 ; PVALID # KANNADA LETTER VA..KANNADA LETTER HA +0CBA..0CBB ; UNASSIGNED # .. +0CBC..0CC4 ; PVALID # KANNADA SIGN NUKTA..KANNADA VOWEL SIGN VOCAL +0CC5 ; UNASSIGNED # +0CC6..0CC8 ; PVALID # KANNADA VOWEL SIGN E..KANNADA VOWEL SIGN AI +0CC9 ; UNASSIGNED # +0CCA..0CCD ; PVALID # KANNADA VOWEL SIGN O..KANNADA SIGN VIRAMA +0CCE..0CD4 ; UNASSIGNED # .. +0CD5..0CD6 ; PVALID # KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CD7..0CDD ; UNASSIGNED # .. +0CDE ; PVALID # KANNADA LETTER FA +0CDF ; UNASSIGNED # +0CE0..0CE3 ; PVALID # KANNADA LETTER VOCALIC RR..KANNADA VOWEL SIG +0CE4..0CE5 ; UNASSIGNED # .. +0CE6..0CEF ; PVALID # KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF0 ; UNASSIGNED # +0CF1..0CF2 ; DISALLOWED # KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADH +0CF3..0D01 ; UNASSIGNED # .. +0D02..0D03 ; PVALID # MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISA +0D04 ; UNASSIGNED # +0D05..0D0C ; PVALID # MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC +0D0D ; UNASSIGNED # +0D0E..0D10 ; PVALID # MALAYALAM LETTER E..MALAYALAM LETTER AI +0D11 ; UNASSIGNED # +0D12..0D28 ; PVALID # MALAYALAM LETTER O..MALAYALAM LETTER NA +0D29 ; UNASSIGNED # +0D2A..0D39 ; PVALID # MALAYALAM LETTER PA..MALAYALAM LETTER HA +0D3A..0D3C ; UNASSIGNED # .. +0D3D..0D44 ; PVALID # MALAYALAM SIGN AVAGRAHA..MALAYALAM VOWEL SIG +0D45 ; UNASSIGNED # +0D46..0D48 ; PVALID # MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN +0D49 ; UNASSIGNED # +0D4A..0D4D ; PVALID # MALAYALAM VOWEL SIGN O..MALAYALAM SIGN VIRAM +0D4E..0D56 ; UNASSIGNED # .. +0D57 ; PVALID # MALAYALAM AU LENGTH MARK +0D58..0D5F ; UNASSIGNED # .. +0D60..0D63 ; PVALID # MALAYALAM LETTER VOCALIC RR..MALAYALAM VOWEL +0D64..0D65 ; UNASSIGNED # .. +0D66..0D6F ; PVALID # MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D75 ; DISALLOWED # MALAYALAM NUMBER TEN..MALAYALAM FRACTION THR +0D76..0D78 ; UNASSIGNED # .. +0D79 ; DISALLOWED # MALAYALAM DATE MARK +0D7A..0D7F ; PVALID # MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER +0D80..0D81 ; UNASSIGNED # .. +0D82..0D83 ; PVALID # SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARG +0D84 ; UNASSIGNED # +0D85..0D96 ; PVALID # SINHALA LETTER AYANNA..SINHALA LETTER AUYANN +0D97..0D99 ; UNASSIGNED # .. +0D9A..0DB1 ; PVALID # SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA L +0DB2 ; UNASSIGNED # +0DB3..0DBB ; PVALID # SINHALA LETTER SANYAKA DAYANNA..SINHALA LETT +0DBC ; UNASSIGNED # +0DBD ; PVALID # SINHALA LETTER DANTAJA LAYANNA +0DBE..0DBF ; UNASSIGNED # .. +0DC0..0DC6 ; PVALID # SINHALA LETTER VAYANNA..SINHALA LETTER FAYAN +0DC7..0DC9 ; UNASSIGNED # .. +0DCA ; PVALID # SINHALA SIGN AL-LAKUNA +0DCB..0DCE ; UNASSIGNED # .. +0DCF..0DD4 ; PVALID # SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL +0DD5 ; UNASSIGNED # +0DD6 ; PVALID # SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD7 ; UNASSIGNED # +0DD8..0DDF ; PVALID # SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOW +0DE0..0DF1 ; UNASSIGNED # .. +0DF2..0DF3 ; PVALID # SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHAL +0DF4 ; DISALLOWED # SINHALA PUNCTUATION KUNDDALIYA +0DF5..0E00 ; UNASSIGNED # .. +0E01..0E32 ; PVALID # THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E33 ; DISALLOWED # THAI CHARACTER SARA AM +0E34..0E3A ; PVALID # THAI CHARACTER SARA I..THAI CHARACTER PHINTH +0E3B..0E3E ; UNASSIGNED # .. +0E3F ; DISALLOWED # THAI CURRENCY SYMBOL BAHT +0E40..0E4E ; PVALID # THAI CHARACTER SARA E..THAI CHARACTER YAMAKK +0E4F ; DISALLOWED # THAI CHARACTER FONGMAN +0E50..0E59 ; PVALID # THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; DISALLOWED # THAI CHARACTER ANGKHANKHU..THAI CHARACTER KH +0E5C..0E80 ; UNASSIGNED # .. +0E81..0E82 ; PVALID # LAO LETTER KO..LAO LETTER KHO SUNG +0E83 ; UNASSIGNED # +0E84 ; PVALID # LAO LETTER KHO TAM +0E85..0E86 ; UNASSIGNED # .. +0E87..0E88 ; PVALID # LAO LETTER NGO..LAO LETTER CO +0E89 ; UNASSIGNED # +0E8A ; PVALID # LAO LETTER SO TAM +0E8B..0E8C ; UNASSIGNED # .. +0E8D ; PVALID # LAO LETTER NYO +0E8E..0E93 ; UNASSIGNED # .. +0E94..0E97 ; PVALID # LAO LETTER DO..LAO LETTER THO TAM +0E98 ; UNASSIGNED # +0E99..0E9F ; PVALID # LAO LETTER NO..LAO LETTER FO SUNG +0EA0 ; UNASSIGNED # +0EA1..0EA3 ; PVALID # LAO LETTER MO..LAO LETTER LO LING +0EA4 ; UNASSIGNED # +0EA5 ; PVALID # LAO LETTER LO LOOT +0EA6 ; UNASSIGNED # +0EA7 ; PVALID # LAO LETTER WO +0EA8..0EA9 ; UNASSIGNED # .. +0EAA..0EAB ; PVALID # LAO LETTER SO SUNG..LAO LETTER HO SUNG +0EAC ; UNASSIGNED # +0EAD..0EB2 ; PVALID # LAO LETTER O..LAO VOWEL SIGN AA +0EB3 ; DISALLOWED # LAO VOWEL SIGN AM +0EB4..0EB9 ; PVALID # LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBA ; UNASSIGNED # +0EBB..0EBD ; PVALID # LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN N +0EBE..0EBF ; UNASSIGNED # .. +0EC0..0EC4 ; PVALID # LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC5 ; UNASSIGNED # +0EC6 ; PVALID # LAO KO LA +0EC7 ; UNASSIGNED # +0EC8..0ECD ; PVALID # LAO TONE MAI EK..LAO NIGGAHITA +0ECE..0ECF ; UNASSIGNED # .. +0ED0..0ED9 ; PVALID # LAO DIGIT ZERO..LAO DIGIT NINE +0EDA..0EDB ; UNASSIGNED # .. +0EDC..0EDD ; DISALLOWED # LAO HO NO..LAO HO MO +0EDE..0EFF ; UNASSIGNED # .. +0F00 ; PVALID # TIBETAN SYLLABLE OM +0F01..0F0A ; DISALLOWED # TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBET +0F0B ; PVALID # TIBETAN MARK INTERSYLLABIC TSHEG +0F0C..0F17 ; DISALLOWED # TIBETAN MARK DELIMITER TSHEG BSTAR..TIBETAN +0F18..0F19 ; PVALID # TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN +0F1A..0F1F ; DISALLOWED # TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RD +0F20..0F29 ; PVALID # TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F34 ; DISALLOWED # TIBETAN DIGIT HALF ONE..TIBETAN MARK BSDUS R +0F35 ; PVALID # TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; DISALLOWED # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; PVALID # TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; DISALLOWED # TIBETAN MARK CHE MGO +0F39 ; PVALID # TIBETAN MARK TSA -PHRU +0F3A..0F3D ; DISALLOWED # TIBETAN MARK GUG RTAGS GYON..TIBETAN MARK AN +0F3E..0F42 ; PVALID # TIBETAN SIGN YAR TSHES..TIBETAN LETTER GA +0F43 ; DISALLOWED # TIBETAN LETTER GHA +0F44..0F47 ; PVALID # TIBETAN LETTER NGA..TIBETAN LETTER JA +0F48 ; UNASSIGNED # +0F49..0F4C ; PVALID # TIBETAN LETTER NYA..TIBETAN LETTER DDA +0F4D ; DISALLOWED # TIBETAN LETTER DDHA +0F4E..0F51 ; PVALID # TIBETAN LETTER NNA..TIBETAN LETTER DA +0F52 ; DISALLOWED # TIBETAN LETTER DHA +0F53..0F56 ; PVALID # TIBETAN LETTER NA..TIBETAN LETTER BA +0F57 ; DISALLOWED # TIBETAN LETTER BHA +0F58..0F5B ; PVALID # TIBETAN LETTER MA..TIBETAN LETTER DZA +0F5C ; DISALLOWED # TIBETAN LETTER DZHA +0F5D..0F68 ; PVALID # TIBETAN LETTER WA..TIBETAN LETTER A +0F69 ; DISALLOWED # TIBETAN LETTER KSSA +0F6A..0F6C ; PVALID # TIBETAN LETTER FIXED-FORM RA..TIBETAN LETTER +0F6D..0F70 ; UNASSIGNED # .. +0F71..0F72 ; PVALID # TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I +0F73 ; DISALLOWED # TIBETAN VOWEL SIGN II +0F74 ; PVALID # TIBETAN VOWEL SIGN U +0F75..0F79 ; DISALLOWED # TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VO +0F7A..0F80 ; PVALID # TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN REV +0F81 ; DISALLOWED # TIBETAN VOWEL SIGN REVERSED II +0F82..0F84 ; PVALID # TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HA +0F85 ; DISALLOWED # TIBETAN MARK PALUTA +0F86..0F8B ; PVALID # TIBETAN SIGN LCI RTAGS..TIBETAN SIGN GRU MED +0F8C..0F8F ; UNASSIGNED # .. +0F90..0F92 ; PVALID # TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOIN +0F93 ; DISALLOWED # TIBETAN SUBJOINED LETTER GHA +0F94..0F97 ; PVALID # TIBETAN SUBJOINED LETTER NGA..TIBETAN SUBJOI +0F98 ; UNASSIGNED # +0F99..0F9C ; PVALID # TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOI +0F9D ; DISALLOWED # TIBETAN SUBJOINED LETTER DDHA +0F9E..0FA1 ; PVALID # TIBETAN SUBJOINED LETTER NNA..TIBETAN SUBJOI +0FA2 ; DISALLOWED # TIBETAN SUBJOINED LETTER DHA +0FA3..0FA6 ; PVALID # TIBETAN SUBJOINED LETTER NA..TIBETAN SUBJOIN +0FA7 ; DISALLOWED # TIBETAN SUBJOINED LETTER BHA +0FA8..0FAB ; PVALID # TIBETAN SUBJOINED LETTER MA..TIBETAN SUBJOIN +0FAC ; DISALLOWED # TIBETAN SUBJOINED LETTER DZHA +0FAD..0FB8 ; PVALID # TIBETAN SUBJOINED LETTER WA..TIBETAN SUBJOIN +0FB9 ; DISALLOWED # TIBETAN SUBJOINED LETTER KSSA +0FBA..0FBC ; PVALID # TIBETAN SUBJOINED LETTER FIXED-FORM WA..TIBE +0FBD ; UNASSIGNED # +0FBE..0FC5 ; DISALLOWED # TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; PVALID # TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; DISALLOWED # TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SY +0FCD ; UNASSIGNED # +0FCE..0FD8 ; DISALLOWED # TIBETAN SIGN RDEL NAG RDEL DKAR..LEFT-FACING +0FD9..0FFF ; UNASSIGNED # .. +1000..1049 ; PVALID # MYANMAR LETTER KA..MYANMAR DIGIT NINE +10000..1000B; PVALID # LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE +1000C ; UNASSIGNED # +1000D..10026; PVALID # LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE +10027 ; UNASSIGNED # +10028..1003A; PVALID # LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE +1003B ; UNASSIGNED # +1003C..1003D; PVALID # LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE +1003E ; UNASSIGNED # +1003F..1004D; PVALID # LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE +1004E..1004F; UNASSIGNED # .. +10050..1005D; PVALID # LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +1005E..1007F; UNASSIGNED # .. +10080..100FA; PVALID # LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRA +100FB..100FF; UNASSIGNED # .. +10100..10102; DISALLOWED # AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MAR +10103..10106; UNASSIGNED # .. +10107..10133; DISALLOWED # AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOU +10134..10136; UNASSIGNED # .. +10137..1018A; DISALLOWED # AEGEAN WEIGHT BASE UNIT..GREEK ZERO SIGN +1018B..1018F; UNASSIGNED # .. +10190..1019B; DISALLOWED # ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN +1019C..101CF; UNASSIGNED # .. +101D0..101FC; DISALLOWED # PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC +101FD ; PVALID # PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +101FE..1027F; UNASSIGNED # .. +10280..1029C; PVALID # LYCIAN LETTER A..LYCIAN LETTER X +1029D..1029F; UNASSIGNED # .. +102A0..102D0; PVALID # CARIAN LETTER A..CARIAN LETTER UUU3 +102D1..102FF; UNASSIGNED # .. +10300..1031E; PVALID # OLD ITALIC LETTER A..OLD ITALIC LETTER UU +1031F ; UNASSIGNED # +10320..10323; DISALLOWED # OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL F +10324..1032F; UNASSIGNED # .. +10330..10340; PVALID # GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; DISALLOWED # GOTHIC LETTER NINETY +10342..10349; PVALID # GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; DISALLOWED # GOTHIC LETTER NINE HUNDRED +1034B..1037F; UNASSIGNED # .. +10380..1039D; PVALID # UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039E ; UNASSIGNED # +1039F ; DISALLOWED # UGARITIC WORD DIVIDER +103A0..103C3; PVALID # OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C4..103C7; UNASSIGNED # .. +103C8..103CF; PVALID # OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIG +103D0..103D5; DISALLOWED # OLD PERSIAN WORD DIVIDER..OLD PERSIAN NUMBER +103D6..103FF; UNASSIGNED # .. +10400..10427; DISALLOWED # DESERET CAPITAL LETTER LONG I..DESERET CAPIT +10428..1049D; PVALID # DESERET SMALL LETTER LONG I..OSMANYA LETTER +1049E..1049F; UNASSIGNED # .. +104A..104F ; DISALLOWED # MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL +104A0..104A9; PVALID # OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104AA..107FF; UNASSIGNED # .. +1050..109D ; PVALID # MYANMAR LETTER SHA..MYANMAR VOWEL SIGN AITON +10800..10805; PVALID # CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10806..10807; UNASSIGNED # .. +10808 ; PVALID # CYPRIOT SYLLABLE JO +10809 ; UNASSIGNED # +1080A..10835; PVALID # CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10836 ; UNASSIGNED # +10837..10838; PVALID # CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +10839..1083B; UNASSIGNED # .. +1083C ; PVALID # CYPRIOT SYLLABLE ZA +1083D..1083E; UNASSIGNED # .. +1083F..10855; PVALID # CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER +10856 ; UNASSIGNED # +10857..1085F; DISALLOWED # IMPERIAL ARAMAIC SECTION SIGN..IMPERIAL ARAM +10860..108FF; UNASSIGNED # .. +10900..10915; PVALID # PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B; DISALLOWED # PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THR +1091C..1091E; UNASSIGNED # .. +1091F ; DISALLOWED # PHOENICIAN WORD SEPARATOR +10920..10939; PVALID # LYDIAN LETTER A..LYDIAN LETTER C +1093A..1093E; UNASSIGNED # .. +1093F ; DISALLOWED # LYDIAN TRIANGULAR MARK +10940..109FF; UNASSIGNED # .. +109E..10C5 ; DISALLOWED # MYANMAR SYMBOL SHAN ONE..GEORGIAN CAPITAL LE +10A00..10A03; PVALID # KHAROSHTHI LETTER A..KHAROSHTHI VOWEL SIGN V +10A04 ; UNASSIGNED # +10A05..10A06; PVALID # KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SI +10A07..10A0B; UNASSIGNED # .. +10A0C..10A13; PVALID # KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI LET +10A14 ; UNASSIGNED # +10A15..10A17; PVALID # KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A18 ; UNASSIGNED # +10A19..10A33; PVALID # KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTT +10A34..10A37; UNASSIGNED # .. +10A38..10A3A; PVALID # KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN D +10A3B..10A3E; UNASSIGNED # .. +10A3F ; PVALID # KHAROSHTHI VIRAMA +10A40..10A47; DISALLOWED # KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE +10A48..10A4F; UNASSIGNED # .. +10A50..10A58; DISALLOWED # KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCT +10A59..10A5F; UNASSIGNED # .. +10A60..10A7C; PVALID # OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABI +10A7D..10A7F; DISALLOWED # OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARAB +10A80..10AFF; UNASSIGNED # .. +10B00..10B35; PVALID # AVESTAN LETTER A..AVESTAN LETTER HE +10B36..10B38; UNASSIGNED # .. +10B39..10B3F; DISALLOWED # AVESTAN ABBREVIATION MARK..LARGE ONE RING OV +10B40..10B55; PVALID # INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIP +10B56..10B57; UNASSIGNED # .. +10B58..10B5F; DISALLOWED # INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTI +10B60..10B72; PVALID # INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPT +10B73..10B77; UNASSIGNED # .. +10B78..10B7F; DISALLOWED # INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIO +10B80..10BFF; UNASSIGNED # .. +10C00..10C48; PVALID # OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTE +10C49..10E5F; UNASSIGNED # .. +10C6..10CF ; UNASSIGNED # .. +10D0..10FA ; PVALID # GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10E60..10E7E; DISALLOWED # RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E7F..1107F; UNASSIGNED # .. +10FB..10FC ; DISALLOWED # GEORGIAN PARAGRAPH SEPARATOR..MODIFIER LETTE +10FD..10FF ; UNASSIGNED # .. +1100..11FF ; DISALLOWED # HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSA +11080..110BA; PVALID # KAITHI SIGN CANDRABINDU..KAITHI SIGN NUKTA +110BB..110C1; DISALLOWED # KAITHI ABBREVIATION SIGN..KAITHI DOUBLE DAND +110C2..11FFF; UNASSIGNED # .. +1200..1248 ; PVALID # ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +12000..1236E; PVALID # CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM +1236F..123FF; UNASSIGNED # .. +12400..12462; DISALLOWED # CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NU +12463..1246F; UNASSIGNED # .. +12470..12473; DISALLOWED # CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD +12474..12FFF; UNASSIGNED # .. +1249 ; UNASSIGNED # +124A..124D ; PVALID # ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +124E..124F ; UNASSIGNED # .. +1250..1256 ; PVALID # ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1257 ; UNASSIGNED # +1258 ; PVALID # ETHIOPIC SYLLABLE QHWA +1259 ; UNASSIGNED # +125A..125D ; PVALID # ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QH +125E..125F ; UNASSIGNED # .. +1260..1288 ; PVALID # ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +1289 ; UNASSIGNED # +128A..128D ; PVALID # ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +128E..128F ; UNASSIGNED # .. +1290..12B0 ; PVALID # ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B1 ; UNASSIGNED # +12B2..12B5 ; PVALID # ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B6..12B7 ; UNASSIGNED # .. +12B8..12BE ; PVALID # ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12BF ; UNASSIGNED # +12C0 ; PVALID # ETHIOPIC SYLLABLE KXWA +12C1 ; UNASSIGNED # +12C2..12C5 ; PVALID # ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KX +12C6..12C7 ; UNASSIGNED # .. +12C8..12D6 ; PVALID # ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHAR +12D7 ; UNASSIGNED # +12D8..1310 ; PVALID # ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +13000..1342E; PVALID # EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYP +1311 ; UNASSIGNED # +1312..1315 ; PVALID # ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1316..1317 ; UNASSIGNED # .. +1318..135A ; PVALID # ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1342F..1CFFF; UNASSIGNED # .. +135B..135E ; UNASSIGNED # .. +135F ; PVALID # ETHIOPIC COMBINING GEMINATION MARK +1360..137C ; DISALLOWED # ETHIOPIC SECTION MARK..ETHIOPIC NUMBER TEN T +137D..137F ; UNASSIGNED # .. +1380..138F ; PVALID # ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SY +1390..1399 ; DISALLOWED # ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MA +139A..139F ; UNASSIGNED # .. +13A0..13F4 ; PVALID # CHEROKEE LETTER A..CHEROKEE LETTER YV +13F5..13FF ; UNASSIGNED # .. +1400 ; DISALLOWED # CANADIAN SYLLABICS HYPHEN +1401..166C ; PVALID # CANADIAN SYLLABICS E..CANADIAN SYLLABICS CAR +166D..166E ; DISALLOWED # CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLAB +166F..167F ; PVALID # CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS B +1680 ; DISALLOWED # OGHAM SPACE MARK +1681..169A ; PVALID # OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B..169C ; DISALLOWED # OGHAM FEATHER MARK..OGHAM REVERSED FEATHER M +169D..169F ; UNASSIGNED # .. +16A0..16EA ; PVALID # RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16F0 ; DISALLOWED # RUNIC SINGLE PUNCTUATION..RUNIC BELGTHOR SYM +16F1..16FF ; UNASSIGNED # .. +1700..170C ; PVALID # TAGALOG LETTER A..TAGALOG LETTER YA +170D ; UNASSIGNED # +170E..1714 ; PVALID # TAGALOG LETTER LA..TAGALOG SIGN VIRAMA +1715..171F ; UNASSIGNED # .. +1720..1734 ; PVALID # HANUNOO LETTER A..HANUNOO SIGN PAMUDPOD +1735..1736 ; DISALLOWED # PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DO +1737..173F ; UNASSIGNED # .. +1740..1753 ; PVALID # BUHID LETTER A..BUHID VOWEL SIGN U +1754..175F ; UNASSIGNED # .. +1760..176C ; PVALID # TAGBANWA LETTER A..TAGBANWA LETTER YA +176D ; UNASSIGNED # +176E..1770 ; PVALID # TAGBANWA LETTER LA..TAGBANWA LETTER SA +1771 ; UNASSIGNED # +1772..1773 ; PVALID # TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1774..177F ; UNASSIGNED # .. +1780..17B3 ; PVALID # KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; DISALLOWED # KHMER VOWEL INHERENT AQ..KHMER VOWEL INHEREN +17B6..17D3 ; PVALID # KHMER VOWEL SIGN AA..KHMER SIGN BATHAMASAT +17D4..17D6 ; DISALLOWED # KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; PVALID # KHMER SIGN LEK TOO +17D8..17DB ; DISALLOWED # KHMER SIGN BEYYAL..KHMER CURRENCY SYMBOL RIE +17DC..17DD ; PVALID # KHMER SIGN AVAKRAHASANYA..KHMER SIGN ATTHACA +17DE..17DF ; UNASSIGNED # .. +17E0..17E9 ; PVALID # KHMER DIGIT ZERO..KHMER DIGIT NINE +17EA..17EF ; UNASSIGNED # .. +17F0..17F9 ; DISALLOWED # KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK +17FA..17FF ; UNASSIGNED # .. +1800..180E ; DISALLOWED # MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR +180F ; UNASSIGNED # +1810..1819 ; PVALID # MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +181A..181F ; UNASSIGNED # .. +1820..1877 ; PVALID # MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU +1878..187F ; UNASSIGNED # .. +1880..18AA ; PVALID # MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONG +18AB..18AF ; UNASSIGNED # .. +18B0..18F5 ; PVALID # CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CA +18F6..18FF ; UNASSIGNED # .. +1900..191C ; PVALID # LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA +191D..191F ; UNASSIGNED # .. +1920..192B ; PVALID # LIMBU VOWEL SIGN A..LIMBU SUBJOINED LETTER W +192C..192F ; UNASSIGNED # .. +1930..193B ; PVALID # LIMBU SMALL LETTER KA..LIMBU SIGN SA-I +193C..193F ; UNASSIGNED # .. +1940 ; DISALLOWED # LIMBU SIGN LOO +1941..1943 ; UNASSIGNED # .. +1944..1945 ; DISALLOWED # LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..196D ; PVALID # LIMBU DIGIT ZERO..TAI LE LETTER AI +196E..196F ; UNASSIGNED # .. +1970..1974 ; PVALID # TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1975..197F ; UNASSIGNED # .. +1980..19AB ; PVALID # NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETT +19AC..19AF ; UNASSIGNED # .. +19B0..19C9 ; PVALID # NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW +19CA..19CF ; UNASSIGNED # .. +19D0..19DA ; PVALID # NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIG +19DB..19DD ; UNASSIGNED # .. +19DE..19FF ; DISALLOWED # NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM +1A00..1A1B ; PVALID # BUGINESE LETTER KA..BUGINESE VOWEL SIGN AE +1A1C..1A1D ; UNASSIGNED # .. +1A1E..1A1F ; DISALLOWED # BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A5E ; PVALID # TAI THAM LETTER HIGH KA..TAI THAM CONSONANT +1A5F ; UNASSIGNED # +1A60..1A7C ; PVALID # TAI THAM SIGN SAKOT..TAI THAM SIGN KHUEN-LUE +1A7D..1A7E ; UNASSIGNED # .. +1A7F..1A89 ; PVALID # TAI THAM COMBINING CRYPTOGRAMMIC DOT..TAI TH +1A8A..1A8F ; UNASSIGNED # .. +1A90..1A99 ; PVALID # TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGI +1A9A..1A9F ; UNASSIGNED # .. +1AA0..1AA6 ; DISALLOWED # TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED +1AA7 ; PVALID # TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; DISALLOWED # TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1AAE..1AFF ; UNASSIGNED # .. +1B00..1B4B ; PVALID # BALINESE SIGN ULU RICEM..BALINESE LETTER ASY +1B4C..1B4F ; UNASSIGNED # .. +1B50..1B59 ; PVALID # BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B6A ; DISALLOWED # BALINESE PANTI..BALINESE MUSICAL SYMBOL DANG +1B6B..1B73 ; PVALID # BALINESE MUSICAL SYMBOL COMBINING TEGEH..BAL +1B74..1B7C ; DISALLOWED # BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG. +1B7D..1B7F ; UNASSIGNED # .. +1B80..1BAA ; PVALID # SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PAMA +1BAB..1BAD ; UNASSIGNED # .. +1BAE..1BB9 ; PVALID # SUNDANESE LETTER KHA..SUNDANESE DIGIT NINE +1BBA..1BFF ; UNASSIGNED # .. +1C00..1C37 ; PVALID # LEPCHA LETTER KA..LEPCHA SIGN NUKTA +1C38..1C3A ; UNASSIGNED # .. +1C3B..1C3F ; DISALLOWED # LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATIO +1C40..1C49 ; PVALID # LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4A..1C4C ; UNASSIGNED # .. +1C4D..1C7D ; PVALID # LEPCHA LETTER TTA..OL CHIKI AHAD +1C7E..1C7F ; DISALLOWED # OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTU +1C80..1CCF ; UNASSIGNED # .. +1CD0..1CD2 ; PVALID # VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; DISALLOWED # VEDIC SIGN NIHSHVASA +1CD4..1CF2 ; PVALID # VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC +1CF3..1CFF ; UNASSIGNED # .. +1D00..1D2B ; PVALID # LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTE +1D000..1D0F5; DISALLOWED # BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MU +1D0F6..1D0FF; UNASSIGNED # .. +1D100..1D126; DISALLOWED # MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBO +1D127..1D128; UNASSIGNED # .. +1D129..1D1DD; DISALLOWED # MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICA +1D1DE..1D1FF; UNASSIGNED # .. +1D200..1D245; DISALLOWED # GREEK VOCAL NOTATION SYMBOL-1..GREEK MUSICAL +1D246..1D2FF; UNASSIGNED # .. +1D2C..1D2E ; DISALLOWED # MODIFIER LETTER CAPITAL A..MODIFIER LETTER C +1D2F ; PVALID # MODIFIER LETTER CAPITAL BARRED B +1D30..1D3A ; DISALLOWED # MODIFIER LETTER CAPITAL D..MODIFIER LETTER C +1D300..1D356; DISALLOWED # MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D357..1D35F; UNASSIGNED # .. +1D360..1D371; DISALLOWED # COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TE +1D372..1D3FF; UNASSIGNED # .. +1D3B ; PVALID # MODIFIER LETTER CAPITAL REVERSED N +1D3C..1D4D ; DISALLOWED # MODIFIER LETTER CAPITAL O..MODIFIER LETTER S +1D400..1D454; DISALLOWED # MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL IT +1D455 ; UNASSIGNED # +1D456..1D49C; DISALLOWED # MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SC +1D49D ; UNASSIGNED # +1D49E..1D49F; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL +1D4A0..1D4A1; UNASSIGNED # .. +1D4A2 ; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL G +1D4A3..1D4A4; UNASSIGNED # .. +1D4A5..1D4A6; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL +1D4A7..1D4A8; UNASSIGNED # .. +1D4A9..1D4AC; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL +1D4AD ; UNASSIGNED # +1D4AE..1D4B9; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL +1D4BA ; UNASSIGNED # +1D4BB ; DISALLOWED # MATHEMATICAL SCRIPT SMALL F +1D4BC ; UNASSIGNED # +1D4BD..1D4C3; DISALLOWED # MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SC +1D4C4 ; UNASSIGNED # +1D4C5..1D505; DISALLOWED # MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FR +1D4E ; PVALID # MODIFIER LETTER SMALL TURNED I +1D4F..1D6A ; DISALLOWED # MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMA +1D506 ; UNASSIGNED # +1D507..1D50A; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL +1D50B..1D50C; UNASSIGNED # .. +1D50D..1D514; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL +1D515 ; UNASSIGNED # +1D516..1D51C; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL +1D51D ; UNASSIGNED # +1D51E..1D539; DISALLOWED # MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL D +1D53A ; UNASSIGNED # +1D53B..1D53E; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEM +1D53F ; UNASSIGNED # +1D540..1D544; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEM +1D545 ; UNASSIGNED # +1D546 ; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D547..1D549; UNASSIGNED # .. +1D54A..1D550; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEM +1D551 ; UNASSIGNED # +1D552..1D6A5; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMAT +1D6A6..1D6A7; UNASSIGNED # .. +1D6A8..1D7CB; DISALLOWED # MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICA +1D6B..1D77 ; PVALID # LATIN SMALL LETTER UE..LATIN SMALL LETTER TU +1D78 ; DISALLOWED # MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; PVALID # LATIN SMALL LETTER INSULAR G..LATIN SMALL LE +1D7CC..1D7CD; UNASSIGNED # .. +1D7CE..1D7FF; DISALLOWED # MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL M +1D800..1EFFF; UNASSIGNED # .. +1D9B..1DBF ; DISALLOWED # MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER +1DC0..1DE6 ; PVALID # COMBINING DOTTED GRAVE ACCENT..COMBINING LAT +1DE7..1DFC ; UNASSIGNED # .. +1DFD..1DFF ; PVALID # COMBINING ALMOST EQUAL TO BELOW..COMBINING R +1E00 ; DISALLOWED # LATIN CAPITAL LETTER A WITH RING BELOW +1E01 ; PVALID # LATIN SMALL LETTER A WITH RING BELOW +1E02 ; DISALLOWED # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E03 ; PVALID # LATIN SMALL LETTER B WITH DOT ABOVE +1E04 ; DISALLOWED # LATIN CAPITAL LETTER B WITH DOT BELOW +1E05 ; PVALID # LATIN SMALL LETTER B WITH DOT BELOW +1E06 ; DISALLOWED # LATIN CAPITAL LETTER B WITH LINE BELOW +1E07 ; PVALID # LATIN SMALL LETTER B WITH LINE BELOW +1E08 ; DISALLOWED # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUT +1E09 ; PVALID # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0A ; DISALLOWED # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0B ; PVALID # LATIN SMALL LETTER D WITH DOT ABOVE +1E0C ; DISALLOWED # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0D ; PVALID # LATIN SMALL LETTER D WITH DOT BELOW +1E0E ; DISALLOWED # LATIN CAPITAL LETTER D WITH LINE BELOW +1E0F ; PVALID # LATIN SMALL LETTER D WITH LINE BELOW +1E10 ; DISALLOWED # LATIN CAPITAL LETTER D WITH CEDILLA +1E11 ; PVALID # LATIN SMALL LETTER D WITH CEDILLA +1E12 ; DISALLOWED # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E13 ; PVALID # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E15 ; PVALID # LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E16 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E17 ; PVALID # LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E18 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E19 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; DISALLOWED # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1B ; PVALID # LATIN SMALL LETTER E WITH TILDE BELOW +1E1C ; DISALLOWED # LATIN CAPITAL LETTER E WITH CEDILLA AND BREV +1E1D ; PVALID # LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1E ; DISALLOWED # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E1F ; PVALID # LATIN SMALL LETTER F WITH DOT ABOVE +1E20 ; DISALLOWED # LATIN CAPITAL LETTER G WITH MACRON +1E21 ; PVALID # LATIN SMALL LETTER G WITH MACRON +1E22 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E23 ; PVALID # LATIN SMALL LETTER H WITH DOT ABOVE +1E24 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DOT BELOW +1E25 ; PVALID # LATIN SMALL LETTER H WITH DOT BELOW +1E26 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DIAERESIS +1E27 ; PVALID # LATIN SMALL LETTER H WITH DIAERESIS +1E28 ; DISALLOWED # LATIN CAPITAL LETTER H WITH CEDILLA +1E29 ; PVALID # LATIN SMALL LETTER H WITH CEDILLA +1E2A ; DISALLOWED # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2B ; PVALID # LATIN SMALL LETTER H WITH BREVE BELOW +1E2C ; DISALLOWED # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2D ; PVALID # LATIN SMALL LETTER I WITH TILDE BELOW +1E2E ; DISALLOWED # LATIN CAPITAL LETTER I WITH DIAERESIS AND AC +1E2F ; PVALID # LATIN SMALL LETTER I WITH DIAERESIS AND ACUT +1E30 ; DISALLOWED # LATIN CAPITAL LETTER K WITH ACUTE +1E31 ; PVALID # LATIN SMALL LETTER K WITH ACUTE +1E32 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DOT BELOW +1E33 ; PVALID # LATIN SMALL LETTER K WITH DOT BELOW +1E34 ; DISALLOWED # LATIN CAPITAL LETTER K WITH LINE BELOW +1E35 ; PVALID # LATIN SMALL LETTER K WITH LINE BELOW +1E36 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOT BELOW +1E37 ; PVALID # LATIN SMALL LETTER L WITH DOT BELOW +1E38 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOT BELOW AND MA +1E39 ; PVALID # LATIN SMALL LETTER L WITH DOT BELOW AND MACR +1E3A ; DISALLOWED # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3B ; PVALID # LATIN SMALL LETTER L WITH LINE BELOW +1E3C ; DISALLOWED # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3D ; PVALID # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; DISALLOWED # LATIN CAPITAL LETTER M WITH ACUTE +1E3F ; PVALID # LATIN SMALL LETTER M WITH ACUTE +1E40 ; DISALLOWED # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E41 ; PVALID # LATIN SMALL LETTER M WITH DOT ABOVE +1E42 ; DISALLOWED # LATIN CAPITAL LETTER M WITH DOT BELOW +1E43 ; PVALID # LATIN SMALL LETTER M WITH DOT BELOW +1E44 ; DISALLOWED # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E45 ; PVALID # LATIN SMALL LETTER N WITH DOT ABOVE +1E46 ; DISALLOWED # LATIN CAPITAL LETTER N WITH DOT BELOW +1E47 ; PVALID # LATIN SMALL LETTER N WITH DOT BELOW +1E48 ; DISALLOWED # LATIN CAPITAL LETTER N WITH LINE BELOW +1E49 ; PVALID # LATIN SMALL LETTER N WITH LINE BELOW +1E4A ; DISALLOWED # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4B ; PVALID # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4D ; PVALID # LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4E ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND DIAERE +1E4F ; PVALID # LATIN SMALL LETTER O WITH TILDE AND DIAERESI +1E50 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E51 ; PVALID # LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E52 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E53 ; PVALID # LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E54 ; DISALLOWED # LATIN CAPITAL LETTER P WITH ACUTE +1E55 ; PVALID # LATIN SMALL LETTER P WITH ACUTE +1E56 ; DISALLOWED # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E57 ; PVALID # LATIN SMALL LETTER P WITH DOT ABOVE +1E58 ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E59 ; PVALID # LATIN SMALL LETTER R WITH DOT ABOVE +1E5A ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5B ; PVALID # LATIN SMALL LETTER R WITH DOT BELOW +1E5C ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT BELOW AND MA +1E5D ; PVALID # LATIN SMALL LETTER R WITH DOT BELOW AND MACR +1E5E ; DISALLOWED # LATIN CAPITAL LETTER R WITH LINE BELOW +1E5F ; PVALID # LATIN SMALL LETTER R WITH LINE BELOW +1E60 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E61 ; PVALID # LATIN SMALL LETTER S WITH DOT ABOVE +1E62 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT BELOW +1E63 ; PVALID # LATIN SMALL LETTER S WITH DOT BELOW +1E64 ; DISALLOWED # LATIN CAPITAL LETTER S WITH ACUTE AND DOT AB +1E65 ; PVALID # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOV +1E66 ; DISALLOWED # LATIN CAPITAL LETTER S WITH CARON AND DOT AB +1E67 ; PVALID # LATIN SMALL LETTER S WITH CARON AND DOT ABOV +1E68 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT BELOW AND DO +1E69 ; PVALID # LATIN SMALL LETTER S WITH DOT BELOW AND DOT +1E6A ; DISALLOWED # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6B ; PVALID # LATIN SMALL LETTER T WITH DOT ABOVE +1E6C ; DISALLOWED # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6D ; PVALID # LATIN SMALL LETTER T WITH DOT BELOW +1E6E ; DISALLOWED # LATIN CAPITAL LETTER T WITH LINE BELOW +1E6F ; PVALID # LATIN SMALL LETTER T WITH LINE BELOW +1E70 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E71 ; PVALID # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E73 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E74 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E75 ; PVALID # LATIN SMALL LETTER U WITH TILDE BELOW +1E76 ; DISALLOWED # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E77 ; PVALID # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E79 ; PVALID # LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7A ; DISALLOWED # LATIN CAPITAL LETTER U WITH MACRON AND DIAER +1E7B ; PVALID # LATIN SMALL LETTER U WITH MACRON AND DIAERES +1E7C ; DISALLOWED # LATIN CAPITAL LETTER V WITH TILDE +1E7D ; PVALID # LATIN SMALL LETTER V WITH TILDE +1E7E ; DISALLOWED # LATIN CAPITAL LETTER V WITH DOT BELOW +1E7F ; PVALID # LATIN SMALL LETTER V WITH DOT BELOW +1E80 ; DISALLOWED # LATIN CAPITAL LETTER W WITH GRAVE +1E81 ; PVALID # LATIN SMALL LETTER W WITH GRAVE +1E82 ; DISALLOWED # LATIN CAPITAL LETTER W WITH ACUTE +1E83 ; PVALID # LATIN SMALL LETTER W WITH ACUTE +1E84 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DIAERESIS +1E85 ; PVALID # LATIN SMALL LETTER W WITH DIAERESIS +1E86 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E87 ; PVALID # LATIN SMALL LETTER W WITH DOT ABOVE +1E88 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DOT BELOW +1E89 ; PVALID # LATIN SMALL LETTER W WITH DOT BELOW +1E8A ; DISALLOWED # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8B ; PVALID # LATIN SMALL LETTER X WITH DOT ABOVE +1E8C ; DISALLOWED # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8D ; PVALID # LATIN SMALL LETTER X WITH DIAERESIS +1E8E ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E8F ; PVALID # LATIN SMALL LETTER Y WITH DOT ABOVE +1E90 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E91 ; PVALID # LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E92 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E93 ; PVALID # LATIN SMALL LETTER Z WITH DOT BELOW +1E94 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E95..1E99 ; PVALID # LATIN SMALL LETTER Z WITH LINE BELOW..LATIN +1E9A..1E9B ; DISALLOWED # LATIN SMALL LETTER A WITH RIGHT HALF RING..L +1E9C..1E9D ; PVALID # LATIN SMALL LETTER LONG S WITH DIAGONAL STRO +1E9E ; DISALLOWED # LATIN CAPITAL LETTER SHARP S +1E9F ; PVALID # LATIN SMALL LETTER DELTA +1EA0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA1 ; PVALID # LATIN SMALL LETTER A WITH DOT BELOW +1EA2 ; DISALLOWED # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA3 ; PVALID # LATIN SMALL LETTER A WITH HOOK ABOVE +1EA4 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND A +1EA5 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACU +1EA6 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND G +1EA7 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRA +1EA8 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND H +1EA9 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOO +1EAA ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND T +1EAB ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TIL +1EAC ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND D +1EAD ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT +1EAE ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EAF ; PVALID # LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB1 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB2 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND HOOK A +1EB3 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND HOOK ABO +1EB4 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB5 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB6 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND DOT BE +1EB7 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND DOT BELO +1EB8 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOT BELOW +1EB9 ; PVALID # LATIN SMALL LETTER E WITH DOT BELOW +1EBA ; DISALLOWED # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBB ; PVALID # LATIN SMALL LETTER E WITH HOOK ABOVE +1EBC ; DISALLOWED # LATIN CAPITAL LETTER E WITH TILDE +1EBD ; PVALID # LATIN SMALL LETTER E WITH TILDE +1EBE ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND A +1EBF ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACU +1EC0 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND G +1EC1 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRA +1EC2 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND H +1EC3 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOO +1EC4 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND T +1EC5 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TIL +1EC6 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND D +1EC7 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT +1EC8 ; DISALLOWED # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1EC9 ; PVALID # LATIN SMALL LETTER I WITH HOOK ABOVE +1ECA ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECB ; PVALID # LATIN SMALL LETTER I WITH DOT BELOW +1ECC ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECD ; PVALID # LATIN SMALL LETTER O WITH DOT BELOW +1ECE ; DISALLOWED # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ECF ; PVALID # LATIN SMALL LETTER O WITH HOOK ABOVE +1ED0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND A +1ED1 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACU +1ED2 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND G +1ED3 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRA +1ED4 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND H +1ED5 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOO +1ED6 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND T +1ED7 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TIL +1ED8 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND D +1ED9 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT +1EDA ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDB ; PVALID # LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDC ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDD ; PVALID # LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDE ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND HOOK AB +1EDF ; PVALID # LATIN SMALL LETTER O WITH HORN AND HOOK ABOV +1EE0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE1 ; PVALID # LATIN SMALL LETTER O WITH HORN AND TILDE +1EE2 ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND DOT BEL +1EE3 ; PVALID # LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE4 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE5 ; PVALID # LATIN SMALL LETTER U WITH DOT BELOW +1EE6 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE7 ; PVALID # LATIN SMALL LETTER U WITH HOOK ABOVE +1EE8 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EE9 ; PVALID # LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEA ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEB ; PVALID # LATIN SMALL LETTER U WITH HORN AND GRAVE +1EEC ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND HOOK AB +1EED ; PVALID # LATIN SMALL LETTER U WITH HORN AND HOOK ABOV +1EEE ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EEF ; PVALID # LATIN SMALL LETTER U WITH HORN AND TILDE +1EF0 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND DOT BEL +1EF1 ; PVALID # LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF2 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH GRAVE +1EF3 ; PVALID # LATIN SMALL LETTER Y WITH GRAVE +1EF4 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF5 ; PVALID # LATIN SMALL LETTER Y WITH DOT BELOW +1EF6 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF7 ; PVALID # LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF8 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH TILDE +1EF9 ; PVALID # LATIN SMALL LETTER Y WITH TILDE +1EFA ; DISALLOWED # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFB ; PVALID # LATIN SMALL LETTER MIDDLE-WELSH LL +1EFC ; DISALLOWED # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFD ; PVALID # LATIN SMALL LETTER MIDDLE-WELSH V +1EFE ; DISALLOWED # LATIN CAPITAL LETTER Y WITH LOOP +1EFF..1F07 ; PVALID # LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL +1F000..1F02B; DISALLOWED # MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F02C..1F02F; UNASSIGNED # .. +1F030..1F093; DISALLOWED # DOMINO TILE HORIZONTAL BACK..DOMINO TILE VER +1F08..1F0F ; DISALLOWED # GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK +1F094..1F0FF; UNASSIGNED # .. +1F10..1F15 ; PVALID # GREEK SMALL LETTER EPSILON WITH PSILI..GREEK +1F100..1F10A; DISALLOWED # DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F10B..1F10F; UNASSIGNED # .. +1F110..1F12E; DISALLOWED # PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLE +1F12F..1F130; UNASSIGNED # .. +1F131 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER B +1F132..1F13C; UNASSIGNED # .. +1F13D ; DISALLOWED # SQUARED LATIN CAPITAL LETTER N +1F13E ; UNASSIGNED # +1F13F ; DISALLOWED # SQUARED LATIN CAPITAL LETTER P +1F140..1F141; UNASSIGNED # .. +1F142 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER S +1F143..1F145; UNASSIGNED # .. +1F146 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER W +1F147..1F149; UNASSIGNED # .. +1F14A..1F14E; DISALLOWED # SQUARED HV..SQUARED PPV +1F14F..1F156; UNASSIGNED # .. +1F157 ; DISALLOWED # NEGATIVE CIRCLED LATIN CAPITAL LETTER H +1F158..1F15E; UNASSIGNED # .. +1F15F ; DISALLOWED # NEGATIVE CIRCLED LATIN CAPITAL LETTER P +1F16..1F17 ; UNASSIGNED # .. +1F160..1F178; UNASSIGNED # .. +1F179 ; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER J +1F17A ; UNASSIGNED # +1F17B..1F17C; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEG +1F17D..1F17E; UNASSIGNED # .. +1F17F ; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F18..1F1D ; DISALLOWED # GREEK CAPITAL LETTER EPSILON WITH PSILI..GRE +1F180..1F189; UNASSIGNED # .. +1F18A..1F18D; DISALLOWED # CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTE +1F18E..1F18F; UNASSIGNED # .. +1F190 ; DISALLOWED # SQUARE DJ +1F191..1F1FF; UNASSIGNED # .. +1F1E..1F1F ; UNASSIGNED # .. +1F20..1F27 ; PVALID # GREEK SMALL LETTER ETA WITH PSILI..GREEK SMA +1F200 ; DISALLOWED # SQUARE HIRAGANA HOKA +1F201..1F20F; UNASSIGNED # .. +1F210..1F231; DISALLOWED # SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED +1F232..1F23F; UNASSIGNED # .. +1F240..1F248; DISALLOWED # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRA +1F249..1FFFD; UNASSIGNED # .. +1F28..1F2F ; DISALLOWED # GREEK CAPITAL LETTER ETA WITH PSILI..GREEK C +1F30..1F37 ; PVALID # GREEK SMALL LETTER IOTA WITH PSILI..GREEK SM +1F38..1F3F ; DISALLOWED # GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK +1F40..1F45 ; PVALID # GREEK SMALL LETTER OMICRON WITH PSILI..GREEK +1F46..1F47 ; UNASSIGNED # .. +1F48..1F4D ; DISALLOWED # GREEK CAPITAL LETTER OMICRON WITH PSILI..GRE +1F4E..1F4F ; UNASSIGNED # .. +1F50..1F57 ; PVALID # GREEK SMALL LETTER UPSILON WITH PSILI..GREEK +1F58 ; UNASSIGNED # +1F59 ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5A ; UNASSIGNED # +1F5B ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND +1F5C ; UNASSIGNED # +1F5D ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND +1F5E ; UNASSIGNED # +1F5F ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND +1F60..1F67 ; PVALID # GREEK SMALL LETTER OMEGA WITH PSILI..GREEK S +1F68..1F6F ; DISALLOWED # GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK +1F70 ; PVALID # GREEK SMALL LETTER ALPHA WITH VARIA +1F71 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH OXIA +1F72 ; PVALID # GREEK SMALL LETTER EPSILON WITH VARIA +1F73 ; DISALLOWED # GREEK SMALL LETTER EPSILON WITH OXIA +1F74 ; PVALID # GREEK SMALL LETTER ETA WITH VARIA +1F75 ; DISALLOWED # GREEK SMALL LETTER ETA WITH OXIA +1F76 ; PVALID # GREEK SMALL LETTER IOTA WITH VARIA +1F77 ; DISALLOWED # GREEK SMALL LETTER IOTA WITH OXIA +1F78 ; PVALID # GREEK SMALL LETTER OMICRON WITH VARIA +1F79 ; DISALLOWED # GREEK SMALL LETTER OMICRON WITH OXIA +1F7A ; PVALID # GREEK SMALL LETTER UPSILON WITH VARIA +1F7B ; DISALLOWED # GREEK SMALL LETTER UPSILON WITH OXIA +1F7C ; PVALID # GREEK SMALL LETTER OMEGA WITH VARIA +1F7D ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH OXIA +1F7E..1F7F ; UNASSIGNED # .. +1F80..1FAF ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOG +1FB0..1FB1 ; PVALID # GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK +1FB2..1FB4 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOG +1FB5 ; UNASSIGNED # +1FB6 ; PVALID # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7..1FC4 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AN +1FC5 ; UNASSIGNED # +1FC6 ; PVALID # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7..1FCF ; DISALLOWED # GREEK SMALL LETTER ETA WITH PERISPOMENI AND +1FD0..1FD2 ; PVALID # GREEK SMALL LETTER IOTA WITH VRACHY..GREEK S +1FD3 ; DISALLOWED # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND O +1FD4..1FD5 ; UNASSIGNED # .. +1FD6..1FD7 ; PVALID # GREEK SMALL LETTER IOTA WITH PERISPOMENI..GR +1FD8..1FDB ; DISALLOWED # GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK +1FDC ; UNASSIGNED # +1FDD..1FDF ; DISALLOWED # GREEK DASIA AND VARIA..GREEK DASIA AND PERIS +1FE0..1FE2 ; PVALID # GREEK SMALL LETTER UPSILON WITH VRACHY..GREE +1FE3 ; DISALLOWED # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AN +1FE4..1FE7 ; PVALID # GREEK SMALL LETTER RHO WITH PSILI..GREEK SMA +1FE8..1FEF ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH VRACHY..GR +1FF0..1FF1 ; UNASSIGNED # .. +1FF2..1FF4 ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOG +1FF5 ; UNASSIGNED # +1FF6 ; PVALID # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7..1FFE ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AN +1FFF ; UNASSIGNED # +1FFFE..1FFFF; DISALLOWED # .. +2000..200B ; DISALLOWED # EN QUAD..ZERO WIDTH SPACE +20000..2A6D6; PVALID # .... +206A..2071 ; DISALLOWED # INHIBIT SYMMETRIC SWAPPING..SUPERSCRIPT LATI +2072..2073 ; UNASSIGNED # .. +2074..208E ; DISALLOWED # SUPERSCRIPT FOUR..SUBSCRIPT RIGHT PARENTHESI +208F ; UNASSIGNED # +2090..2094 ; DISALLOWED # LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCR +2095..209F ; UNASSIGNED # .. +20A0..20B8 ; DISALLOWED # EURO-CURRENCY SIGN..TENGE SIGN +20B9..20CF ; UNASSIGNED # .. +20D0..20F0 ; DISALLOWED # COMBINING LEFT HARPOON ABOVE..COMBINING ASTE +20F1..20FF ; UNASSIGNED # .. +2100..214D ; DISALLOWED # ACCOUNT OF..AKTIESELSKAB +214E ; PVALID # TURNED SMALL F +214F..2183 ; DISALLOWED # SYMBOL FOR SAMARITAN SOURCE..ROMAN NUMERAL R +2184 ; PVALID # LATIN SMALL LETTER REVERSED C +2185..2189 ; DISALLOWED # ROMAN NUMERAL SIX LATE FORM..VULGAR FRACTION +218A..218F ; UNASSIGNED # .. +2190..23E8 ; DISALLOWED # LEFTWARDS ARROW..DECIMAL EXPONENT SYMBOL +23E9..23FF ; UNASSIGNED # .. +2400..2426 ; DISALLOWED # SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM +2427..243F ; UNASSIGNED # .. +2440..244A ; DISALLOWED # OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; UNASSIGNED # .. +2460..26CD ; DISALLOWED # CIRCLED DIGIT ONE..DISABLED CAR +26CE ; UNASSIGNED # +26CF..26E1 ; DISALLOWED # PICK..RESTRICTED LEFT ENTRY-2 +26E2 ; UNASSIGNED # +26E3 ; DISALLOWED # HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E4..26E7 ; UNASSIGNED # .. +26E8..26FF ; DISALLOWED # BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZ +2700 ; UNASSIGNED # +2701..2704 ; DISALLOWED # UPPER BLADE SCISSORS..WHITE SCISSORS +2705 ; UNASSIGNED # +2706..2709 ; DISALLOWED # TELEPHONE LOCATION SIGN..ENVELOPE +270A..270B ; UNASSIGNED # .. +270C..2727 ; DISALLOWED # VICTORY HAND..WHITE FOUR POINTED STAR +2728 ; UNASSIGNED # +2729..274B ; DISALLOWED # STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEAR +274C ; UNASSIGNED # +274D ; DISALLOWED # SHADOWED WHITE CIRCLE +274E ; UNASSIGNED # +274F..2752 ; DISALLOWED # LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPE +2753..2755 ; UNASSIGNED # .. +2756..275E ; DISALLOWED # BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE CO +275F..2760 ; UNASSIGNED # .. +2761..2794 ; DISALLOWED # CURVED STEM PARAGRAPH SIGN ORNAMENT..HEAVY W +2795..2797 ; UNASSIGNED # .. +2798..27AF ; DISALLOWED # HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT- +27B0 ; UNASSIGNED # +27B1..27BE ; DISALLOWED # NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARD +27BF ; UNASSIGNED # +27C0..27CA ; DISALLOWED # THREE DIMENSIONAL ANGLE..VERTICAL BAR WITH H +27CB ; UNASSIGNED # +27CC ; DISALLOWED # LONG DIVISION +27CD..27CF ; UNASSIGNED # .. +27D0..2B4C ; DISALLOWED # WHITE DIAMOND WITH CENTRED DOT..RIGHTWARDS A +2A6D7..2A6FF; UNASSIGNED # .. +2A700..2B734; PVALID # .... +2B50..2B59 ; DISALLOWED # WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE +2B5A..2BFF ; UNASSIGNED # .. +2B735..2F7FF; UNASSIGNED # .. +2C00..2C2E ; DISALLOWED # GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CA +2C2F ; UNASSIGNED # +2C30..2C5E ; PVALID # GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMAL +2C5F ; UNASSIGNED # +2C60 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C61 ; PVALID # LATIN SMALL LETTER L WITH DOUBLE BAR +2C62..2C64 ; DISALLOWED # LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LA +2C65..2C66 ; PVALID # LATIN SMALL LETTER A WITH STROKE..LATIN SMAL +2C67 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DESCENDER +2C68 ; PVALID # LATIN SMALL LETTER H WITH DESCENDER +2C69 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DESCENDER +2C6A ; PVALID # LATIN SMALL LETTER K WITH DESCENDER +2C6B ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6C ; PVALID # LATIN SMALL LETTER Z WITH DESCENDER +2C6D..2C70 ; DISALLOWED # LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LE +2C71 ; PVALID # LATIN SMALL LETTER V WITH RIGHT HOOK +2C72 ; DISALLOWED # LATIN CAPITAL LETTER W WITH HOOK +2C73..2C74 ; PVALID # LATIN SMALL LETTER W WITH HOOK..LATIN SMALL +2C75 ; DISALLOWED # LATIN CAPITAL LETTER HALF H +2C76..2C7B ; PVALID # LATIN SMALL LETTER HALF H..LATIN LETTER SMAL +2C7C..2C80 ; DISALLOWED # LATIN SUBSCRIPT SMALL LETTER J..COPTIC CAPIT +2C81 ; PVALID # COPTIC SMALL LETTER ALFA +2C82 ; DISALLOWED # COPTIC CAPITAL LETTER VIDA +2C83 ; PVALID # COPTIC SMALL LETTER VIDA +2C84 ; DISALLOWED # COPTIC CAPITAL LETTER GAMMA +2C85 ; PVALID # COPTIC SMALL LETTER GAMMA +2C86 ; DISALLOWED # COPTIC CAPITAL LETTER DALDA +2C87 ; PVALID # COPTIC SMALL LETTER DALDA +2C88 ; DISALLOWED # COPTIC CAPITAL LETTER EIE +2C89 ; PVALID # COPTIC SMALL LETTER EIE +2C8A ; DISALLOWED # COPTIC CAPITAL LETTER SOU +2C8B ; PVALID # COPTIC SMALL LETTER SOU +2C8C ; DISALLOWED # COPTIC CAPITAL LETTER ZATA +2C8D ; PVALID # COPTIC SMALL LETTER ZATA +2C8E ; DISALLOWED # COPTIC CAPITAL LETTER HATE +2C8F ; PVALID # COPTIC SMALL LETTER HATE +2C90 ; DISALLOWED # COPTIC CAPITAL LETTER THETHE +2C91 ; PVALID # COPTIC SMALL LETTER THETHE +2C92 ; DISALLOWED # COPTIC CAPITAL LETTER IAUDA +2C93 ; PVALID # COPTIC SMALL LETTER IAUDA +2C94 ; DISALLOWED # COPTIC CAPITAL LETTER KAPA +2C95 ; PVALID # COPTIC SMALL LETTER KAPA +2C96 ; DISALLOWED # COPTIC CAPITAL LETTER LAULA +2C97 ; PVALID # COPTIC SMALL LETTER LAULA +2C98 ; DISALLOWED # COPTIC CAPITAL LETTER MI +2C99 ; PVALID # COPTIC SMALL LETTER MI +2C9A ; DISALLOWED # COPTIC CAPITAL LETTER NI +2C9B ; PVALID # COPTIC SMALL LETTER NI +2C9C ; DISALLOWED # COPTIC CAPITAL LETTER KSI +2C9D ; PVALID # COPTIC SMALL LETTER KSI +2C9E ; DISALLOWED # COPTIC CAPITAL LETTER O +2C9F ; PVALID # COPTIC SMALL LETTER O +2CA0 ; DISALLOWED # COPTIC CAPITAL LETTER PI +2CA1 ; PVALID # COPTIC SMALL LETTER PI +2CA2 ; DISALLOWED # COPTIC CAPITAL LETTER RO +2CA3 ; PVALID # COPTIC SMALL LETTER RO +2CA4 ; DISALLOWED # COPTIC CAPITAL LETTER SIMA +2CA5 ; PVALID # COPTIC SMALL LETTER SIMA +2CA6 ; DISALLOWED # COPTIC CAPITAL LETTER TAU +2CA7 ; PVALID # COPTIC SMALL LETTER TAU +2CA8 ; DISALLOWED # COPTIC CAPITAL LETTER UA +2CA9 ; PVALID # COPTIC SMALL LETTER UA +2CAA ; DISALLOWED # COPTIC CAPITAL LETTER FI +2CAB ; PVALID # COPTIC SMALL LETTER FI +2CAC ; DISALLOWED # COPTIC CAPITAL LETTER KHI +2CAD ; PVALID # COPTIC SMALL LETTER KHI +2CAE ; DISALLOWED # COPTIC CAPITAL LETTER PSI +2CAF ; PVALID # COPTIC SMALL LETTER PSI +2CB0 ; DISALLOWED # COPTIC CAPITAL LETTER OOU +2CB1 ; PVALID # COPTIC SMALL LETTER OOU +2CB2 ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB3 ; PVALID # COPTIC SMALL LETTER DIALECT-P ALEF +2CB4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC AIN +2CB6 ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB7 ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB8 ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CB9 ; PVALID # COPTIC SMALL LETTER DIALECT-P KAPA +2CBA ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P NI +2CBB ; PVALID # COPTIC SMALL LETTER DIALECT-P NI +2CBC ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBD ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBE ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CBF ; PVALID # COPTIC SMALL LETTER OLD COPTIC OOU +2CC0 ; DISALLOWED # COPTIC CAPITAL LETTER SAMPI +2CC1 ; PVALID # COPTIC SMALL LETTER SAMPI +2CC2 ; DISALLOWED # COPTIC CAPITAL LETTER CROSSED SHEI +2CC3 ; PVALID # COPTIC SMALL LETTER CROSSED SHEI +2CC4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC SHEI +2CC6 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC7 ; PVALID # COPTIC SMALL LETTER OLD COPTIC ESH +2CC8 ; DISALLOWED # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CC9 ; PVALID # COPTIC SMALL LETTER AKHMIMIC KHEI +2CCA ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCB ; PVALID # COPTIC SMALL LETTER DIALECT-P HORI +2CCC ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCD ; PVALID # COPTIC SMALL LETTER OLD COPTIC HORI +2CCE ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HA +2CCF ; PVALID # COPTIC SMALL LETTER OLD COPTIC HA +2CD0 ; DISALLOWED # COPTIC CAPITAL LETTER L-SHAPED HA +2CD1 ; PVALID # COPTIC SMALL LETTER L-SHAPED HA +2CD2 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD3 ; PVALID # COPTIC SMALL LETTER OLD COPTIC HEI +2CD4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC HAT +2CD6 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD7 ; PVALID # COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD8 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CD9 ; PVALID # COPTIC SMALL LETTER OLD COPTIC DJA +2CDA ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDB ; PVALID # COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDC ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDD ; PVALID # COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDE ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CDF ; PVALID # COPTIC SMALL LETTER OLD NUBIAN NGI +2CE0 ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE1 ; PVALID # COPTIC SMALL LETTER OLD NUBIAN NYI +2CE2 ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CE3..2CE4 ; PVALID # COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC S +2CE5..2CEB ; DISALLOWED # COPTIC SYMBOL MI RO..COPTIC CAPITAL LETTER C +2CEC ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CED ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CEE..2CF1 ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA..CO +2CF2..2CF8 ; UNASSIGNED # .. +2CF9..2CFF ; DISALLOWED # COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLO +2D00..2D25 ; PVALID # GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LET +2D26..2D2F ; UNASSIGNED # .. +2D30..2D65 ; PVALID # TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D66..2D6E ; UNASSIGNED # .. +2D6F ; DISALLOWED # TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70..2D7F ; UNASSIGNED # .. +2D80..2D96 ; PVALID # ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGW +2D97..2D9F ; UNASSIGNED # .. +2DA0..2DA6 ; PVALID # ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA7 ; UNASSIGNED # +2DA8..2DAE ; PVALID # ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DAF ; UNASSIGNED # +2DB0..2DB6 ; PVALID # ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB7 ; UNASSIGNED # +2DB8..2DBE ; PVALID # ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CC +2DBF ; UNASSIGNED # +2DC0..2DC6 ; PVALID # ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC7 ; UNASSIGNED # +2DC8..2DCE ; PVALID # ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DCF ; UNASSIGNED # +2DD0..2DD6 ; PVALID # ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD7 ; UNASSIGNED # +2DD8..2DDE ; PVALID # ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DDF ; UNASSIGNED # +2DE0..2DFF ; PVALID # COMBINING CYRILLIC LETTER BE..COMBINING CYRI +2E00..2E2E ; DISALLOWED # RIGHT ANGLE SUBSTITUTION MARKER..REVERSED QU +2E2F ; PVALID # VERTICAL TILDE +2E30..2E31 ; DISALLOWED # RING POINT..WORD SEPARATOR MIDDLE DOT +2E32..2E7F ; UNASSIGNED # .. +2E80..2E99 ; DISALLOWED # CJK RADICAL REPEAT..CJK RADICAL RAP +2E9A ; UNASSIGNED # +2E9B..2EF3 ; DISALLOWED # CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED +2EF4..2EFF ; UNASSIGNED # .. +2F00..2FD5 ; DISALLOWED # KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2F800..2FA1D; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPA +2FA1E..2FFFD; UNASSIGNED # .. +2FD6..2FEF ; UNASSIGNED # .. +2FF0..2FFB ; DISALLOWED # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RI +2FFC..2FFF ; UNASSIGNED # .. +2FFFE..2FFFF; DISALLOWED # .. +3000..3004 ; DISALLOWED # IDEOGRAPHIC SPACE..JAPANESE INDUSTRIAL STAND +30000..3FFFD; UNASSIGNED # .. +3005..3007 ; PVALID # IDEOGRAPHIC ITERATION MARK..IDEOGRAPHIC NUMB +3008..3029 ; DISALLOWED # LEFT ANGLE BRACKET..HANGZHOU NUMERAL NINE +302A..302D ; PVALID # IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENT +302E..303B ; DISALLOWED # HANGUL SINGLE DOT TONE MARK..VERTICAL IDEOGR +303C ; PVALID # MASU MARK +303D..303F ; DISALLOWED # PART ALTERNATION MARK..IDEOGRAPHIC HALF FILL +3040 ; UNASSIGNED # +3041..3096 ; PVALID # HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMA +3097..3098 ; UNASSIGNED # .. +3099..309A ; PVALID # COMBINING KATAKANA-HIRAGANA VOICED SOUND MAR +309B..309C ; DISALLOWED # KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKAN +309D..309E ; PVALID # HIRAGANA ITERATION MARK..HIRAGANA VOICED ITE +309F..30A0 ; DISALLOWED # HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOU +30A1..30FA ; PVALID # KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; CONTEXTO # KATAKANA MIDDLE DOT +30FC..30FE ; PVALID # KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATA +30FF ; DISALLOWED # KATAKANA DIGRAPH KOTO +3100..3104 ; UNASSIGNED # .. +3105..312D ; PVALID # BOPOMOFO LETTER B..BOPOMOFO LETTER IH +312E..3130 ; UNASSIGNED # .. +3131..318E ; DISALLOWED # HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +318F ; UNASSIGNED # +3190..319F ; DISALLOWED # IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRA +31A0..31B7 ; PVALID # BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H +31B8..31BF ; UNASSIGNED # .. +31C0..31E3 ; DISALLOWED # CJK STROKE T..CJK STROKE Q +31E4..31EF ; UNASSIGNED # .. +31F0..31FF ; PVALID # KATAKANA LETTER SMALL KU..KATAKANA LETTER SM +3200..321E ; DISALLOWED # PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED K +321F ; UNASSIGNED # +3220..32FE ; DISALLOWED # PARENTHESIZED IDEOGRAPH ONE..CIRCLED KATAKAN +32FF ; UNASSIGNED # +3300..33FF ; DISALLOWED # SQUARE APAATO..SQUARE GAL +3400..4DB5 ; PVALID # .... +40000..4FFFD; UNASSIGNED # .. +4DB6..4DBF ; UNASSIGNED # .. +4DC0..4DFF ; DISALLOWED # HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM F +4E00..9FCB ; PVALID # .. +4FFFE..4FFFF; DISALLOWED # .. +50000..5FFFD; UNASSIGNED # .. +5FFFE..5FFFF; DISALLOWED # .. +60000..6FFFD; UNASSIGNED # .. +6FFFE..6FFFF; DISALLOWED # .. +70000..7FFFD; UNASSIGNED # .. +7FFFE..7FFFF; DISALLOWED # .. +80000..8FFFD; UNASSIGNED # .. +8FFFE..8FFFF; DISALLOWED # .. +90000..9FFFD; UNASSIGNED # .. +9FCC..9FFF ; UNASSIGNED # .. +9FFFE..9FFFF; DISALLOWED # .. +A000..A48C ; PVALID # YI SYLLABLE IT..YI SYLLABLE YYR +A0000..AFFFD; UNASSIGNED # .. +A48D..A48F ; UNASSIGNED # .. +A490..A4C6 ; DISALLOWED # YI RADICAL QOT..YI RADICAL KE +A4C7..A4CF ; UNASSIGNED # .. +A4D0..A4FD ; PVALID # LISU LETTER BA..LISU LETTER TONE MYA JEU +A4FE..A4FF ; DISALLOWED # LISU PUNCTUATION COMMA..LISU PUNCTUATION FUL +A500..A60C ; PVALID # VAI SYLLABLE EE..VAI SYLLABLE LENGTHENER +A60D..A60F ; DISALLOWED # VAI COMMA..VAI QUESTION MARK +A610..A62B ; PVALID # VAI SYLLABLE NDOLE FA..VAI SYLLABLE NDOLE DO +A62C..A63F ; UNASSIGNED # .. +A640 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZEMLYA +A641 ; PVALID # CYRILLIC SMALL LETTER ZEMLYA +A642 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZELO +A643 ; PVALID # CYRILLIC SMALL LETTER DZELO +A644 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED DZE +A645 ; PVALID # CYRILLIC SMALL LETTER REVERSED DZE +A646 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTA +A647 ; PVALID # CYRILLIC SMALL LETTER IOTA +A648 ; DISALLOWED # CYRILLIC CAPITAL LETTER DJERV +A649 ; PVALID # CYRILLIC SMALL LETTER DJERV +A64A ; DISALLOWED # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64B ; PVALID # CYRILLIC SMALL LETTER MONOGRAPH UK +A64C ; DISALLOWED # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64D ; PVALID # CYRILLIC SMALL LETTER BROAD OMEGA +A64E ; DISALLOWED # CYRILLIC CAPITAL LETTER NEUTRAL YER +A64F ; PVALID # CYRILLIC SMALL LETTER NEUTRAL YER +A650 ; DISALLOWED # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A651 ; PVALID # CYRILLIC SMALL LETTER YERU WITH BACK YER +A652 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A653 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED YAT +A654 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED YU +A655 ; PVALID # CYRILLIC SMALL LETTER REVERSED YU +A656 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED A +A657 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED A +A658 ; DISALLOWED # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A659 ; PVALID # CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65A ; DISALLOWED # CYRILLIC CAPITAL LETTER BLENDED YUS +A65B ; PVALID # CYRILLIC SMALL LETTER BLENDED YUS +A65C ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITT +A65D ; PVALID # CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE +A65E ; DISALLOWED # CYRILLIC CAPITAL LETTER YN +A65F ; PVALID # CYRILLIC SMALL LETTER YN +A660..A661 ; UNASSIGNED # .. +A662 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT DE +A663 ; PVALID # CYRILLIC SMALL LETTER SOFT DE +A664 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT EL +A665 ; PVALID # CYRILLIC SMALL LETTER SOFT EL +A666 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT EM +A667 ; PVALID # CYRILLIC SMALL LETTER SOFT EM +A668 ; DISALLOWED # CYRILLIC CAPITAL LETTER MONOCULAR O +A669 ; PVALID # CYRILLIC SMALL LETTER MONOCULAR O +A66A ; DISALLOWED # CYRILLIC CAPITAL LETTER BINOCULAR O +A66B ; PVALID # CYRILLIC SMALL LETTER BINOCULAR O +A66C ; DISALLOWED # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A66D..A66F ; PVALID # CYRILLIC SMALL LETTER DOUBLE MONOCULAR O..CO +A670..A673 ; DISALLOWED # COMBINING CYRILLIC TEN MILLIONS SIGN..SLAVON +A674..A67B ; UNASSIGNED # .. +A67C..A67D ; PVALID # COMBINING CYRILLIC KAVYKA..COMBINING CYRILLI +A67E ; DISALLOWED # CYRILLIC KAVYKA +A67F ; PVALID # CYRILLIC PAYEROK +A680 ; DISALLOWED # CYRILLIC CAPITAL LETTER DWE +A681 ; PVALID # CYRILLIC SMALL LETTER DWE +A682 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZWE +A683 ; PVALID # CYRILLIC SMALL LETTER DZWE +A684 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHWE +A685 ; PVALID # CYRILLIC SMALL LETTER ZHWE +A686 ; DISALLOWED # CYRILLIC CAPITAL LETTER CCHE +A687 ; PVALID # CYRILLIC SMALL LETTER CCHE +A688 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZZE +A689 ; PVALID # CYRILLIC SMALL LETTER DZZE +A68A ; DISALLOWED # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68B ; PVALID # CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68C ; DISALLOWED # CYRILLIC CAPITAL LETTER TWE +A68D ; PVALID # CYRILLIC SMALL LETTER TWE +A68E ; DISALLOWED # CYRILLIC CAPITAL LETTER TSWE +A68F ; PVALID # CYRILLIC SMALL LETTER TSWE +A690 ; DISALLOWED # CYRILLIC CAPITAL LETTER TSSE +A691 ; PVALID # CYRILLIC SMALL LETTER TSSE +A692 ; DISALLOWED # CYRILLIC CAPITAL LETTER TCHE +A693 ; PVALID # CYRILLIC SMALL LETTER TCHE +A694 ; DISALLOWED # CYRILLIC CAPITAL LETTER HWE +A695 ; PVALID # CYRILLIC SMALL LETTER HWE +A696 ; DISALLOWED # CYRILLIC CAPITAL LETTER SHWE +A697 ; PVALID # CYRILLIC SMALL LETTER SHWE +A698..A69F ; UNASSIGNED # .. +A6A0..A6E5 ; PVALID # BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; DISALLOWED # BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; PVALID # BAMUM COMBINING MARK KOQNDON..BAMUM COMBININ +A6F2..A6F7 ; DISALLOWED # BAMUM NJAEMLI..BAMUM QUESTION MARK +A6F8..A6FF ; UNASSIGNED # .. +A700..A716 ; DISALLOWED # MODIFIER LETTER CHINESE TONE YIN PING..MODIF +A717..A71F ; PVALID # MODIFIER LETTER DOT VERTICAL BAR..MODIFIER L +A720..A722 ; DISALLOWED # MODIFIER LETTER STRESS AND HIGH TONE..LATIN +A723 ; PVALID # LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A724 ; DISALLOWED # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A725 ; PVALID # LATIN SMALL LETTER EGYPTOLOGICAL AIN +A726 ; DISALLOWED # LATIN CAPITAL LETTER HENG +A727 ; PVALID # LATIN SMALL LETTER HENG +A728 ; DISALLOWED # LATIN CAPITAL LETTER TZ +A729 ; PVALID # LATIN SMALL LETTER TZ +A72A ; DISALLOWED # LATIN CAPITAL LETTER TRESILLO +A72B ; PVALID # LATIN SMALL LETTER TRESILLO +A72C ; DISALLOWED # LATIN CAPITAL LETTER CUATRILLO +A72D ; PVALID # LATIN SMALL LETTER CUATRILLO +A72E ; DISALLOWED # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A72F..A731 ; PVALID # LATIN SMALL LETTER CUATRILLO WITH COMMA..LAT +A732 ; DISALLOWED # LATIN CAPITAL LETTER AA +A733 ; PVALID # LATIN SMALL LETTER AA +A734 ; DISALLOWED # LATIN CAPITAL LETTER AO +A735 ; PVALID # LATIN SMALL LETTER AO +A736 ; DISALLOWED # LATIN CAPITAL LETTER AU +A737 ; PVALID # LATIN SMALL LETTER AU +A738 ; DISALLOWED # LATIN CAPITAL LETTER AV +A739 ; PVALID # LATIN SMALL LETTER AV +A73A ; DISALLOWED # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73B ; PVALID # LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73C ; DISALLOWED # LATIN CAPITAL LETTER AY +A73D ; PVALID # LATIN SMALL LETTER AY +A73E ; DISALLOWED # LATIN CAPITAL LETTER REVERSED C WITH DOT +A73F ; PVALID # LATIN SMALL LETTER REVERSED C WITH DOT +A740 ; DISALLOWED # LATIN CAPITAL LETTER K WITH STROKE +A741 ; PVALID # LATIN SMALL LETTER K WITH STROKE +A742 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A743 ; PVALID # LATIN SMALL LETTER K WITH DIAGONAL STROKE +A744 ; DISALLOWED # LATIN CAPITAL LETTER K WITH STROKE AND DIAGO +A745 ; PVALID # LATIN SMALL LETTER K WITH STROKE AND DIAGONA +A746 ; DISALLOWED # LATIN CAPITAL LETTER BROKEN L +A747 ; PVALID # LATIN SMALL LETTER BROKEN L +A748 ; DISALLOWED # LATIN CAPITAL LETTER L WITH HIGH STROKE +A749 ; PVALID # LATIN SMALL LETTER L WITH HIGH STROKE +A74A ; DISALLOWED # LATIN CAPITAL LETTER O WITH LONG STROKE OVER +A74B ; PVALID # LATIN SMALL LETTER O WITH LONG STROKE OVERLA +A74C ; DISALLOWED # LATIN CAPITAL LETTER O WITH LOOP +A74D ; PVALID # LATIN SMALL LETTER O WITH LOOP +A74E ; DISALLOWED # LATIN CAPITAL LETTER OO +A74F ; PVALID # LATIN SMALL LETTER OO +A750 ; DISALLOWED # LATIN CAPITAL LETTER P WITH STROKE THROUGH D +A751 ; PVALID # LATIN SMALL LETTER P WITH STROKE THROUGH DES +A752 ; DISALLOWED # LATIN CAPITAL LETTER P WITH FLOURISH +A753 ; PVALID # LATIN SMALL LETTER P WITH FLOURISH +A754 ; DISALLOWED # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A755 ; PVALID # LATIN SMALL LETTER P WITH SQUIRREL TAIL +A756 ; DISALLOWED # LATIN CAPITAL LETTER Q WITH STROKE THROUGH D +A757 ; PVALID # LATIN SMALL LETTER Q WITH STROKE THROUGH DES +A758 ; DISALLOWED # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A759 ; PVALID # LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75A ; DISALLOWED # LATIN CAPITAL LETTER R ROTUNDA +A75B ; PVALID # LATIN SMALL LETTER R ROTUNDA +A75C ; DISALLOWED # LATIN CAPITAL LETTER RUM ROTUNDA +A75D ; PVALID # LATIN SMALL LETTER RUM ROTUNDA +A75E ; DISALLOWED # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A75F ; PVALID # LATIN SMALL LETTER V WITH DIAGONAL STROKE +A760 ; DISALLOWED # LATIN CAPITAL LETTER VY +A761 ; PVALID # LATIN SMALL LETTER VY +A762 ; DISALLOWED # LATIN CAPITAL LETTER VISIGOTHIC Z +A763 ; PVALID # LATIN SMALL LETTER VISIGOTHIC Z +A764 ; DISALLOWED # LATIN CAPITAL LETTER THORN WITH STROKE +A765 ; PVALID # LATIN SMALL LETTER THORN WITH STROKE +A766 ; DISALLOWED # LATIN CAPITAL LETTER THORN WITH STROKE THROU +A767 ; PVALID # LATIN SMALL LETTER THORN WITH STROKE THROUGH +A768 ; DISALLOWED # LATIN CAPITAL LETTER VEND +A769 ; PVALID # LATIN SMALL LETTER VEND +A76A ; DISALLOWED # LATIN CAPITAL LETTER ET +A76B ; PVALID # LATIN SMALL LETTER ET +A76C ; DISALLOWED # LATIN CAPITAL LETTER IS +A76D ; PVALID # LATIN SMALL LETTER IS +A76E ; DISALLOWED # LATIN CAPITAL LETTER CON +A76F ; PVALID # LATIN SMALL LETTER CON +A770 ; DISALLOWED # MODIFIER LETTER US +A771..A778 ; PVALID # LATIN SMALL LETTER DUM..LATIN SMALL LETTER U +A779 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR D +A77A ; PVALID # LATIN SMALL LETTER INSULAR D +A77B ; DISALLOWED # LATIN CAPITAL LETTER INSULAR F +A77C ; PVALID # LATIN SMALL LETTER INSULAR F +A77D..A77E ; DISALLOWED # LATIN CAPITAL LETTER INSULAR G..LATIN CAPITA +A77F ; PVALID # LATIN SMALL LETTER TURNED INSULAR G +A780 ; DISALLOWED # LATIN CAPITAL LETTER TURNED L +A781 ; PVALID # LATIN SMALL LETTER TURNED L +A782 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR R +A783 ; PVALID # LATIN SMALL LETTER INSULAR R +A784 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR S +A785 ; PVALID # LATIN SMALL LETTER INSULAR S +A786 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR T +A787..A788 ; PVALID # LATIN SMALL LETTER INSULAR T..MODIFIER LETTE +A789..A78B ; DISALLOWED # MODIFIER LETTER COLON..LATIN CAPITAL LETTER +A78C ; PVALID # LATIN SMALL LETTER SALTILLO +A78D..A7FA ; UNASSIGNED # .. +A7FB..A827 ; PVALID # LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI N +A828..A82B ; DISALLOWED # SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POE +A82C..A82F ; UNASSIGNED # .. +A830..A839 ; DISALLOWED # NORTH INDIC FRACTION ONE QUARTER..NORTH INDI +A83A..A83F ; UNASSIGNED # .. +A840..A873 ; PVALID # PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABI +A874..A877 ; DISALLOWED # PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU +A878..A87F ; UNASSIGNED # .. +A880..A8C4 ; PVALID # SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VI +A8C5..A8CD ; UNASSIGNED # .. +A8CE..A8CF ; DISALLOWED # SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; PVALID # SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8DA..A8DF ; UNASSIGNED # .. +A8E0..A8F7 ; PVALID # COMBINING DEVANAGARI DIGIT ZERO..DEVANAGARI +A8F8..A8FA ; DISALLOWED # DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; PVALID # DEVANAGARI HEADSTROKE +A8FC..A8FF ; UNASSIGNED # .. +A900..A92D ; PVALID # KAYAH LI DIGIT ZERO..KAYAH LI TONE CALYA PLO +A92E..A92F ; DISALLOWED # KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A953 ; PVALID # REJANG LETTER KA..REJANG VIRAMA +A954..A95E ; UNASSIGNED # .. +A95F..A97C ; DISALLOWED # REJANG SECTION MARK..HANGUL CHOSEONG SSANGYE +A97D..A97F ; UNASSIGNED # .. +A980..A9C0 ; PVALID # JAVANESE SIGN PANYANGGA..JAVANESE PANGKON +A9C1..A9CD ; DISALLOWED # JAVANESE LEFT RERENGGAN..JAVANESE TURNED PAD +A9CE ; UNASSIGNED # +A9CF..A9D9 ; PVALID # JAVANESE PANGRANGKEP..JAVANESE DIGIT NINE +A9DA..A9DD ; UNASSIGNED # .. +A9DE..A9DF ; DISALLOWED # JAVANESE PADA TIRTA TUMETES..JAVANESE PADA I +A9E0..A9FF ; UNASSIGNED # .. +AA00..AA36 ; PVALID # CHAM LETTER A..CHAM CONSONANT SIGN WA +AA37..AA3F ; UNASSIGNED # .. +AA40..AA4D ; PVALID # CHAM LETTER FINAL K..CHAM CONSONANT SIGN FIN +AA4E..AA4F ; UNASSIGNED # .. +AA50..AA59 ; PVALID # CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5A..AA5B ; UNASSIGNED # .. +AA5C..AA5F ; DISALLOWED # CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TR +AA60..AA76 ; PVALID # MYANMAR LETTER KHAMTI GA..MYANMAR LOGOGRAM K +AA77..AA79 ; DISALLOWED # MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SY +AA7A..AA7B ; PVALID # MYANMAR LETTER AITON RA..MYANMAR SIGN PAO KA +AA7C..AA7F ; UNASSIGNED # .. +AA80..AAC2 ; PVALID # TAI VIET LETTER LOW KO..TAI VIET TONE MAI SO +AAC3..AADA ; UNASSIGNED # .. +AADB..AADD ; PVALID # TAI VIET SYMBOL KON..TAI VIET SYMBOL SAM +AADE..AADF ; DISALLOWED # TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI +AAE0..ABBF ; UNASSIGNED # .. +ABC0..ABEA ; PVALID # MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL +ABEB ; DISALLOWED # MEETEI MAYEK CHEIKHEI +ABEC..ABED ; PVALID # MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYE +ABEE..ABEF ; UNASSIGNED # .. +ABF0..ABF9 ; PVALID # MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT +ABFA..ABFF ; UNASSIGNED # .. +AC00..D7A3 ; PVALID # .. +AFFFE..AFFFF; DISALLOWED # .. +B0000..BFFFD; UNASSIGNED # .. +BFFFE..BFFFF; DISALLOWED # .. +C0000..CFFFD; UNASSIGNED # .. +CFFFE..CFFFF; DISALLOWED # .. +D0000..DFFFD; UNASSIGNED # .. +D7A4..D7AF ; UNASSIGNED # .. +D7B0..D7C6 ; DISALLOWED # HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARA +D7C7..D7CA ; UNASSIGNED # .. +D7CB..D7FB ; DISALLOWED # HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEO +D7FC..D7FF ; UNASSIGNED # .. +D800..FA0D ; DISALLOWED # ..CJK COMPAT +DFFFE..DFFFF; DISALLOWED # .. +E0000 ; UNASSIGNED # +E0001 ; DISALLOWED # LANGUAGE TAG +E0002..E001F; UNASSIGNED # .. +E0020..E007F; DISALLOWED # TAG SPACE..CANCEL TAG +E0080..E00FF; UNASSIGNED # .. +E0100..E01EF; DISALLOWED # VARIATION SELECTOR-17..VARIATION SELECTOR-25 +E01F0..EFFFD; UNASSIGNED # .. +EFFFE..10FFFF; DISALLOWED # .. +FA0E..FA0F ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPAT +FA10 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA10 +FA11 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA11 +FA12 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA12 +FA13..FA14 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPAT +FA15..FA1E ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPAT +FA1F ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA1F +FA20 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA20 +FA21 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA21 +FA22 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA22 +FA23..FA24 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPAT +FA25..FA26 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPAT +FA27..FA29 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPAT +FA2A..FA2D ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPAT +FA2E..FA2F ; UNASSIGNED # .. +FA30..FA6D ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPAT +FA6E..FA6F ; UNASSIGNED # .. +FA70..FAD9 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPAT +FADA..FAFF ; UNASSIGNED # .. +FB00..FB06 ; DISALLOWED # LATIN SMALL LIGATURE FF..LATIN SMALL LIGATUR +FB07..FB12 ; UNASSIGNED # .. +FB13..FB17 ; DISALLOWED # ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SM +FB18..FB1C ; UNASSIGNED # .. +FB1D ; DISALLOWED # HEBREW LETTER YOD WITH HIRIQ +FB1E ; PVALID # HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB36 ; DISALLOWED # HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBRE +FB37 ; UNASSIGNED # +FB38..FB3C ; DISALLOWED # HEBREW LETTER TET WITH DAGESH..HEBREW LETTER +FB3D ; UNASSIGNED # +FB3E ; DISALLOWED # HEBREW LETTER MEM WITH DAGESH +FB3F ; UNASSIGNED # +FB40..FB41 ; DISALLOWED # HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER +FB42 ; UNASSIGNED # +FB43..FB44 ; DISALLOWED # HEBREW LETTER FINAL PE WITH DAGESH..HEBREW L +FB45 ; UNASSIGNED # +FB46..FBB1 ; DISALLOWED # HEBREW LETTER TSADI WITH DAGESH..ARABIC LETT +FBB2..FBD2 ; UNASSIGNED # .. +FBD3..FD3F ; DISALLOWED # ARABIC LETTER NG ISOLATED FORM..ORNATE RIGHT +FD40..FD4F ; UNASSIGNED # .. +FD50..FD8F ; DISALLOWED # ARABIC LIGATURE TEH WITH JEEM WITH MEEM INIT +FD90..FD91 ; UNASSIGNED # .. +FD92..FDC7 ; DISALLOWED # ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INI +FDC8..FDCF ; UNASSIGNED # .. +FDD0..FDFD ; DISALLOWED # ..ARABIC LIGATURE BISMILLAH AR +FDFE..FDFF ; UNASSIGNED # .. +FE00..FE19 ; DISALLOWED # VARIATION SELECTOR-1..PRESENTATION FORM FOR +FE1A..FE1F ; UNASSIGNED # .. +FE20..FE26 ; PVALID # COMBINING LIGATURE LEFT HALF..COMBINING CONJ +FE27..FE2F ; UNASSIGNED # .. +FE30..FE52 ; DISALLOWED # PRESENTATION FORM FOR VERTICAL TWO DOT LEADE +FE53 ; UNASSIGNED # +FE54..FE66 ; DISALLOWED # SMALL SEMICOLON..SMALL EQUALS SIGN +FE67 ; UNASSIGNED # +FE68..FE6B ; DISALLOWED # SMALL REVERSE SOLIDUS..SMALL COMMERCIAL AT +FE6C..FE6F ; UNASSIGNED # .. +FE70..FE72 ; DISALLOWED # ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMAT +FE73 ; PVALID # ARABIC TAIL FRAGMENT +FE74 ; DISALLOWED # ARABIC KASRATAN ISOLATED FORM +FE75 ; UNASSIGNED # +FE76..FEFC ; DISALLOWED # ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE +FEFD..FEFE ; UNASSIGNED # .. +FEFF ; DISALLOWED # ZERO WIDTH NO-BREAK SPACE +FF00 ; UNASSIGNED # +FF01..FFBE ; DISALLOWED # FULLWIDTH EXCLAMATION MARK..HALFWIDTH HANGUL +FFBF..FFC1 ; UNASSIGNED # .. +FFC2..FFC7 ; DISALLOWED # HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL +FFC8..FFC9 ; UNASSIGNED # .. +FFCA..FFCF ; DISALLOWED # HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGU +FFD0..FFD1 ; UNASSIGNED # .. +FFD2..FFD7 ; DISALLOWED # HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL +FFD8..FFD9 ; UNASSIGNED # .. +FFDA..FFDC ; DISALLOWED # HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL +FFDD..FFDF ; UNASSIGNED # .. +FFE0..FFE6 ; DISALLOWED # FULLWIDTH CENT SIGN..FULLWIDTH WON SIGN +FFE7 ; UNASSIGNED # +FFE8..FFEE ; DISALLOWED # HALFWIDTH FORMS LIGHT VERTICAL..HALFWIDTH WH +FFEF..FFF8 ; UNASSIGNED # .. +FFF9..FFFF ; DISALLOWED # INTERLINEAR ANNOTATION ANCHOR..