[Ada] AI12-0004 Normalization and allowed characters

gcc/ada/

	* scng.adb (Scan): Detect wide characters not in NFKC.
	* libgnat/a-chahan.adb, libgnat/a-chahan.ads,
	libgnat/a-wichha.adb, libgnat/a-wichha.ads,
	libgnat/a-wichun.adb, libgnat/a-wichun.ads,
	libgnat/a-zchhan.adb, libgnat/a-zchhan.ads,
	libgnat/a-zchuni.adb, libgnat/a-zchuni.ads (Is_NFKC): New.
	* libgnat/s-utf_32.ads, libgnat/s-utf_32.adb (Is_UTF_32_NFKC):
	New.
This commit is contained in:
Arnaud Charlet 2020-06-10 08:26:06 -04:00 committed by Pierre-Marie de Rodat
parent 504dc34720
commit 8e640a5f32
13 changed files with 490 additions and 6 deletions

View File

@ -399,6 +399,17 @@ package body Ada.Characters.Handling is
return False;
end Is_Mark;
-------------
-- Is_NFKC --
-------------
function Is_NFKC (Item : Character) return Boolean is
begin
return Character'Pos (Item) not in
160 | 168 | 170 | 175 | 178 | 179 | 180 | 181 | 184 | 185 | 186 |
188 | 189 | 190;
end Is_NFKC;
---------------------
-- Is_Other_Format --
---------------------

View File

@ -58,6 +58,7 @@ package Ada.Characters.Handling is
function Is_Other_Format (Item : Character) return Boolean;
function Is_Punctuation_Connector (Item : Character) return Boolean;
function Is_Space (Item : Character) return Boolean;
function Is_NFKC (Item : Character) return Boolean;
---------------------------------------------------
-- Conversion Functions for Character and String --

View File

@ -124,6 +124,13 @@ package body Ada.Wide_Characters.Handling is
function Is_Mark (Item : Wide_Character) return Boolean
renames Ada.Wide_Characters.Unicode.Is_Mark;
-------------
-- Is_NFKC --
-------------
function Is_NFKC (Item : Wide_Character) return Boolean
renames Ada.Wide_Characters.Unicode.Is_NFKC;
---------------------
-- Is_Other_Format --
---------------------

View File

@ -101,6 +101,12 @@ package Ada.Wide_Characters.Handling is
-- Returns True if the Wide_Character designated by Item is categorized as
-- separator_space, otherwise returns False.
function Is_NFKC (Item : Wide_Character) return Boolean;
pragma Inline (Is_NFKC);
-- Returns True if the Wide_Character designated by Item could be present
-- in a string normalized to Normalization Form KC (as defined by Clause
-- 21 of ISO/IEC 10646:2017), otherwise returns False.
function Is_Graphic (Item : Wide_Character) return Boolean;
pragma Inline (Is_Graphic);
-- Returns True if the Wide_Character designated by Item is categorized as

View File

@ -116,6 +116,15 @@ package body Ada.Wide_Characters.Unicode is
return G.Is_UTF_32_Non_Graphic (G.Category (C));
end Is_Non_Graphic;
-------------
-- Is_NFKC --
-------------
function Is_NFKC (U : Wide_Character) return Boolean is
begin
return G.Is_UTF_32_NFKC (Wide_Character'Pos (U));
end Is_NFKC;
--------------
-- Is_Other --
--------------

View File

@ -131,7 +131,7 @@ package Ada.Wide_Characters.Unicode is
pragma Inline (Is_Other);
-- Returns true iff U is an other format character, which means that it
-- can be used to extend an identifier, but is ignored for the purposes of
-- matching of identiers, or if C is one of the corresponding categories,
-- matching of identifiers, or if C is one of the corresponding categories,
-- which are the following:
-- Other, Format (Cf)
@ -150,6 +150,12 @@ package Ada.Wide_Characters.Unicode is
-- of the corresponding categories, which are the following:
-- Separator, Space (Zs)
function Is_NFKC (U : Wide_Character) return Boolean;
pragma Inline (Is_NFKC);
-- Returns True if the Wide_Character designated by U could be present
-- in a string normalized to Normalization Form KC (as defined by Clause
-- 21 of ISO/IEC 10646:2017), otherwise returns False.
function Is_Non_Graphic (U : Wide_Character) return Boolean;
function Is_Non_Graphic (C : Category) return Boolean;
pragma Inline (Is_Non_Graphic);

View File

@ -108,6 +108,13 @@ package body Ada.Wide_Wide_Characters.Handling is
function Is_Mark (Item : Wide_Wide_Character) return Boolean
renames Ada.Wide_Wide_Characters.Unicode.Is_Mark;
-------------
-- Is_NFKC --
-------------
function Is_NFKC (Item : Wide_Wide_Character) return Boolean
renames Ada.Wide_Wide_Characters.Unicode.Is_NFKC;
---------------------
-- Is_Other_Format --
---------------------

View File

@ -98,6 +98,12 @@ package Ada.Wide_Wide_Characters.Handling is
-- Returns True if the Wide_Wide_Character designated by Item is
-- categorized as separator_space, otherwise returns false.
function Is_NFKC (Item : Wide_Wide_Character) return Boolean;
pragma Inline (Is_NFKC);
-- Returns True if the Wide_Wide_Character designated by Item could be
-- present in a string normalized to Normalization Form KC (as defined by
-- Clause 21 of ISO/IEC 10646:2017), otherwise returns False.
function Is_Graphic (Item : Wide_Wide_Character) return Boolean;
pragma Inline (Is_Graphic);
-- Returns True if the Wide_Wide_Character designated by Item is

View File

@ -107,6 +107,15 @@ package body Ada.Wide_Wide_Characters.Unicode is
return G.Is_UTF_32_Non_Graphic (G.Category (C));
end Is_Non_Graphic;
-------------
-- Is_NFKC --
-------------
function Is_NFKC (U : Wide_Wide_Character) return Boolean is
begin
return G.Is_UTF_32_NFKC (Wide_Wide_Character'Pos (U));
end Is_NFKC;
--------------
-- Is_Other --
--------------

View File

@ -147,6 +147,12 @@ package Ada.Wide_Wide_Characters.Unicode is
-- of the corresponding categories, which are the following:
-- Separator, Space (Zs)
function Is_NFKC (U : Wide_Wide_Character) return Boolean;
pragma Inline (Is_NFKC);
-- Returns True if the Wide_Wide_Character designated by U could be present
-- in a string normalized to Normalization Form KC (as defined by Clause
-- 21 of ISO/IEC 10646:2017), otherwise returns False.
function Is_Non_Graphic (U : Wide_Wide_Character) return Boolean;
function Is_Non_Graphic (C : Category) return Boolean;
pragma Inline (Is_Non_Graphic);

View File

@ -49,7 +49,7 @@ package body System.UTF_32 is
----------------------
-- Note these tables are derived from those given in AI-285. For details
-- see //www.ada-auth.org/cgi-bin/cvsweb.cgi/AIs/AI-00285.TXT?rev=1.22.
-- see www.ada-auth.org/cgi-bin/cvsweb.cgi/AIs/AI-00285.TXT?rev=1.22.
type UTF_32_Range is record
Lo : UTF_32;
@ -6071,9 +6071,6 @@ package body System.UTF_32 is
40, -- DESERET CAPITAL LETTER LONG I .. DESERET CAPITAL LETTER EW
32); -- TAG LATIN CAPITAL LETTER A .. TAG LATIN CAPITAL LETTER Z
pragma Warnings (On);
-- Temporary until pragma Warnings at start can be activated ???
-- The following is a list of the 10646 names for CAPITAL LETTER entries
-- that have no matching SMALL LETTER entry and are thus not folded
@ -6117,6 +6114,403 @@ package body System.UTF_32 is
-- GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
-- GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
-- The following array includes all characters in the Unicode table with
-- the category NFKC_Quick_Check=No, taken from
-- www.unicode.org/Public/UCD/latest/ucd/DerivedNormalizationProps.txt
UTF_32_NFKC_QC_No : constant UTF_32_Ranges := (
(16#00A0#, 16#00A0#), -- NO-BREAK SPACE
(16#00A8#, 16#00A8#), -- DIAERESIS
(16#00AA#, 16#00AA#), -- FEMININE ORDINAL INDICATOR
(16#00AF#, 16#00AF#), -- MACRON
(16#00B2#, 16#00B3#), -- SUPERSCRIPT TWO..SUPERSCRIPT THREE
(16#00B4#, 16#00B4#), -- ACUTE ACCENT
(16#00B5#, 16#00B5#), -- MICRO SIGN
(16#00B8#, 16#00B8#), -- CEDILLA
(16#00B9#, 16#00B9#), -- SUPERSCRIPT ONE
(16#00BA#, 16#00BA#), -- MASCULINE ORDINAL INDICATOR
(16#00BC#, 16#00BE#), -- VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
(16#0132#, 16#0133#), -- LATIN CAPITAL LIGATURE IJ..LATIN SMALL LIGATURE IJ
(16#013F#, 16#0140#), -- LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH MIDDLE DOT
(16#0149#, 16#0149#), -- LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
(16#017F#, 16#017F#), -- LATIN SMALL LETTER LONG S
(16#01C4#, 16#01CC#), -- LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER NJ
(16#01F1#, 16#01F3#), -- LATIN CAPITAL LETTER DZ..LATIN SMALL LETTER DZ
(16#02B0#, 16#02B8#), -- MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
(16#02D8#, 16#02DD#), -- BREVE..DOUBLE ACUTE ACCENT
(16#02E0#, 16#02E4#), -- MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
(16#0340#, 16#0341#), -- COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
(16#0343#, 16#0344#), -- COMBINING GREEK KORONIS..COMBINING GREEK DIALYTIKA TONOS
(16#0374#, 16#0374#), -- GREEK NUMERAL SIGN
(16#037A#, 16#037A#), -- GREEK YPOGEGRAMMENI
(16#037E#, 16#037E#), -- GREEK QUESTION MARK
(16#0384#, 16#0385#), -- GREEK TONOS..GREEK DIALYTIKA TONOS
(16#0387#, 16#0387#), -- GREEK ANO TELEIA
(16#03D0#, 16#03D6#), -- GREEK BETA SYMBOL..GREEK PI SYMBOL
(16#03F0#, 16#03F2#), -- GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBOL
(16#03F4#, 16#03F5#), -- GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
(16#03F9#, 16#03F9#), -- GREEK CAPITAL LUNATE SIGMA SYMBOL
(16#0587#, 16#0587#), -- ARMENIAN SMALL LIGATURE ECH YIWN
(16#0675#, 16#0678#), -- ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER HIGH HAMZA YEH
(16#0958#, 16#095F#), -- DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
(16#09DC#, 16#09DD#), -- BENGALI LETTER RRA..BENGALI LETTER RHA
(16#09DF#, 16#09DF#), -- BENGALI LETTER YYA
(16#0A33#, 16#0A33#), -- GURMUKHI LETTER LLA
(16#0A36#, 16#0A36#), -- GURMUKHI LETTER SHA
(16#0A59#, 16#0A5B#), -- GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA
(16#0A5E#, 16#0A5E#), -- GURMUKHI LETTER FA
(16#0B5C#, 16#0B5D#), -- ORIYA LETTER RRA..ORIYA LETTER RHA
(16#0E33#, 16#0E33#), -- THAI CHARACTER SARA AM
(16#0EB3#, 16#0EB3#), -- LAO VOWEL SIGN AM
(16#0EDC#, 16#0EDD#), -- LAO HO NO..LAO HO MO
(16#0F0C#, 16#0F0C#), -- TIBETAN MARK DELIMITER TSHEG BSTAR
(16#0F43#, 16#0F43#), -- TIBETAN LETTER GHA
(16#0F4D#, 16#0F4D#), -- TIBETAN LETTER DDHA
(16#0F52#, 16#0F52#), -- TIBETAN LETTER DHA
(16#0F57#, 16#0F57#), -- TIBETAN LETTER BHA
(16#0F5C#, 16#0F5C#), -- TIBETAN LETTER DZHA
(16#0F69#, 16#0F69#), -- TIBETAN LETTER KSSA
(16#0F73#, 16#0F73#), -- TIBETAN VOWEL SIGN II
(16#0F75#, 16#0F79#), -- TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VOCALIC LL
(16#0F81#, 16#0F81#), -- TIBETAN VOWEL SIGN REVERSED II
(16#0F93#, 16#0F93#), -- TIBETAN SUBJOINED LETTER GHA
(16#0F9D#, 16#0F9D#), -- TIBETAN SUBJOINED LETTER DDHA
(16#0FA2#, 16#0FA2#), -- TIBETAN SUBJOINED LETTER DHA
(16#0FA7#, 16#0FA7#), -- TIBETAN SUBJOINED LETTER BHA
(16#0FAC#, 16#0FAC#), -- TIBETAN SUBJOINED LETTER DZHA
(16#0FB9#, 16#0FB9#), -- TIBETAN SUBJOINED LETTER KSSA
(16#10FC#, 16#10FC#), -- MODIFIER LETTER GEORGIAN NAR
(16#1D2C#, 16#1D2E#), -- MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
(16#1D30#, 16#1D3A#), -- MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
(16#1D3C#, 16#1D4D#), -- MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
(16#1D4F#, 16#1D6A#), -- MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI
(16#1D78#, 16#1D78#), -- MODIFIER LETTER CYRILLIC EN
(16#1D9B#, 16#1DBF#), -- MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
(16#1E9A#, 16#1E9B#), -- LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE
(16#1F71#, 16#1F71#), -- GREEK SMALL LETTER ALPHA WITH OXIA
(16#1F73#, 16#1F73#), -- GREEK SMALL LETTER EPSILON WITH OXIA
(16#1F75#, 16#1F75#), -- GREEK SMALL LETTER ETA WITH OXIA
(16#1F77#, 16#1F77#), -- GREEK SMALL LETTER IOTA WITH OXIA
(16#1F79#, 16#1F79#), -- GREEK SMALL LETTER OMICRON WITH OXIA
(16#1F7B#, 16#1F7B#), -- GREEK SMALL LETTER UPSILON WITH OXIA
(16#1F7D#, 16#1F7D#), -- GREEK SMALL LETTER OMEGA WITH OXIA
(16#1FBB#, 16#1FBB#), -- GREEK CAPITAL LETTER ALPHA WITH OXIA
(16#1FBD#, 16#1FBD#), -- GREEK KORONIS
(16#1FBE#, 16#1FBE#), -- GREEK PROSGEGRAMMENI
(16#1FBF#, 16#1FC1#), -- GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
(16#1FC9#, 16#1FC9#), -- GREEK CAPITAL LETTER EPSILON WITH OXIA
(16#1FCB#, 16#1FCB#), -- GREEK CAPITAL LETTER ETA WITH OXIA
(16#1FCD#, 16#1FCF#), -- GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
(16#1FD3#, 16#1FD3#), -- GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
(16#1FDB#, 16#1FDB#), -- GREEK CAPITAL LETTER IOTA WITH OXIA
(16#1FDD#, 16#1FDF#), -- GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
(16#1FE3#, 16#1FE3#), -- GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
(16#1FEB#, 16#1FEB#), -- GREEK CAPITAL LETTER UPSILON WITH OXIA
(16#1FED#, 16#1FEF#), -- GREEK DIALYTIKA AND VARIA..GREEK VARIA
(16#1FF9#, 16#1FF9#), -- GREEK CAPITAL LETTER OMICRON WITH OXIA
(16#1FFB#, 16#1FFB#), -- GREEK CAPITAL LETTER OMEGA WITH OXIA
(16#1FFD#, 16#1FFE#), -- GREEK OXIA..GREEK DASIA
(16#2000#, 16#200A#), -- EN QUAD..HAIR SPACE
(16#2011#, 16#2011#), -- NON-BREAKING HYPHEN
(16#2017#, 16#2017#), -- DOUBLE LOW LINE
(16#2024#, 16#2026#), -- ONE DOT LEADER..HORIZONTAL ELLIPSIS
(16#202F#, 16#202F#), -- NARROW NO-BREAK SPACE
(16#2033#, 16#2034#), -- DOUBLE PRIME..TRIPLE PRIME
(16#2036#, 16#2037#), -- REVERSED DOUBLE PRIME..REVERSED TRIPLE PRIME
(16#203C#, 16#203C#), -- DOUBLE EXCLAMATION MARK
(16#203E#, 16#203E#), -- OVERLINE
(16#2047#, 16#2049#), -- DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
(16#2057#, 16#2057#), -- QUADRUPLE PRIME
(16#205F#, 16#205F#), -- MEDIUM MATHEMATICAL SPACE
(16#2070#, 16#2070#), -- SUPERSCRIPT ZERO
(16#2071#, 16#2071#), -- SUPERSCRIPT LATIN SMALL LETTER I
(16#2074#, 16#2079#), -- SUPERSCRIPT FOUR..SUPERSCRIPT NINE
(16#207A#, 16#207C#), -- SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
(16#207D#, 16#207D#), -- SUPERSCRIPT LEFT PARENTHESIS
(16#207E#, 16#207E#), -- SUPERSCRIPT RIGHT PARENTHESIS
(16#207F#, 16#207F#), -- SUPERSCRIPT LATIN SMALL LETTER N
(16#2080#, 16#2089#), -- SUBSCRIPT ZERO..SUBSCRIPT NINE
(16#208A#, 16#208C#), -- SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
(16#208D#, 16#208D#), -- SUBSCRIPT LEFT PARENTHESIS
(16#208E#, 16#208E#), -- SUBSCRIPT RIGHT PARENTHESIS
(16#2090#, 16#209C#), -- LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
(16#20A8#, 16#20A8#), -- RUPEE SIGN
(16#2100#, 16#2101#), -- ACCOUNT OF..ADDRESSED TO THE SUBJECT
(16#2102#, 16#2102#), -- DOUBLE-STRUCK CAPITAL C
(16#2103#, 16#2103#), -- DEGREE CELSIUS
(16#2105#, 16#2106#), -- CARE OF..CADA UNA
(16#2107#, 16#2107#), -- EULER CONSTANT
(16#2109#, 16#2109#), -- DEGREE FAHRENHEIT
(16#210A#, 16#2113#), -- SCRIPT SMALL G..SCRIPT SMALL L
(16#2115#, 16#2115#), -- DOUBLE-STRUCK CAPITAL N
(16#2116#, 16#2116#), -- NUMERO SIGN
(16#2119#, 16#211D#), -- DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
(16#2120#, 16#2122#), -- SERVICE MARK..TRADE MARK SIGN
(16#2124#, 16#2124#), -- DOUBLE-STRUCK CAPITAL Z
(16#2126#, 16#2126#), -- OHM SIGN
(16#2128#, 16#2128#), -- BLACK-LETTER CAPITAL Z
(16#212A#, 16#212D#), -- KELVIN SIGN..BLACK-LETTER CAPITAL C
(16#212F#, 16#2131#), -- SCRIPT SMALL E..SCRIPT CAPITAL F
(16#2133#, 16#2134#), -- SCRIPT CAPITAL M..SCRIPT SMALL O
(16#2135#, 16#2138#), -- ALEF SYMBOL..DALET SYMBOL
(16#2139#, 16#2139#), -- INFORMATION SOURCE
(16#213B#, 16#213B#), -- FACSIMILE SIGN
(16#213C#, 16#213F#), -- DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
(16#2140#, 16#2140#), -- DOUBLE-STRUCK N-ARY SUMMATION
(16#2145#, 16#2149#), -- DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
(16#2150#, 16#215F#), -- VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
(16#2160#, 16#217F#), -- ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
(16#2189#, 16#2189#), -- VULGAR FRACTION ZERO THIRDS
(16#222C#, 16#222D#), -- DOUBLE INTEGRAL..TRIPLE INTEGRAL
(16#222F#, 16#2230#), -- SURFACE INTEGRAL..VOLUME INTEGRAL
(16#2329#, 16#2329#), -- LEFT-POINTING ANGLE BRACKET
(16#232A#, 16#232A#), -- RIGHT-POINTING ANGLE BRACKET
(16#2460#, 16#249B#), -- CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
(16#249C#, 16#24E9#), -- PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
(16#24EA#, 16#24EA#), -- CIRCLED DIGIT ZERO
(16#2A0C#, 16#2A0C#), -- QUADRUPLE INTEGRAL OPERATOR
(16#2A74#, 16#2A76#), -- DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS
(16#2ADC#, 16#2ADC#), -- FORKING
(16#2C7C#, 16#2C7D#), -- LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
(16#2D6F#, 16#2D6F#), -- TIFINAGH MODIFIER LETTER LABIALIZATION MARK
(16#2E9F#, 16#2E9F#), -- CJK RADICAL MOTHER
(16#2EF3#, 16#2EF3#), -- CJK RADICAL C-SIMPLIFIED TURTLE
(16#2F00#, 16#2FD5#), -- KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
(16#3000#, 16#3000#), -- IDEOGRAPHIC SPACE
(16#3036#, 16#3036#), -- CIRCLED POSTAL MARK
(16#3038#, 16#303A#), -- HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
(16#309B#, 16#309C#), -- KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
(16#309F#, 16#309F#), -- HIRAGANA DIGRAPH YORI
(16#30FF#, 16#30FF#), -- KATAKANA DIGRAPH KOTO
(16#3131#, 16#318E#), -- HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
(16#3192#, 16#3195#), -- IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
(16#3196#, 16#319F#), -- IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
(16#3200#, 16#321E#), -- PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
(16#3220#, 16#3229#), -- PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
(16#322A#, 16#3247#), -- PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
(16#3250#, 16#3250#), -- PARTNERSHIP SIGN
(16#3251#, 16#325F#), -- CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
(16#3260#, 16#327E#), -- CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
(16#3280#, 16#3289#), -- CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
(16#328A#, 16#32B0#), -- CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
(16#32B1#, 16#32BF#), -- CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
(16#32C0#, 16#33FF#), -- IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL
(16#A69C#, 16#A69D#), -- MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
(16#A770#, 16#A770#), -- MODIFIER LETTER US
(16#A7F8#, 16#A7F9#), -- MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
(16#AB5C#, 16#AB5F#), -- MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
(16#AB69#, 16#AB69#), -- MODIFIER LETTER SMALL TURNED W
(16#F900#, 16#FA0D#), -- CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
(16#FA10#, 16#FA10#), -- CJK COMPATIBILITY IDEOGRAPH-FA10
(16#FA12#, 16#FA12#), -- CJK COMPATIBILITY IDEOGRAPH-FA12
(16#FA15#, 16#FA1E#), -- CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
(16#FA20#, 16#FA20#), -- CJK COMPATIBILITY IDEOGRAPH-FA20
(16#FA22#, 16#FA22#), -- CJK COMPATIBILITY IDEOGRAPH-FA22
(16#FA25#, 16#FA26#), -- CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
(16#FA2A#, 16#FA6D#), -- CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
(16#FA70#, 16#FAD9#), -- CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
(16#FB00#, 16#FB06#), -- LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
(16#FB13#, 16#FB17#), -- ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
(16#FB1D#, 16#FB1D#), -- HEBREW LETTER YOD WITH HIRIQ
(16#FB1F#, 16#FB28#), -- HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
(16#FB29#, 16#FB29#), -- HEBREW LETTER ALTERNATIVE PLUS SIGN
(16#FB2A#, 16#FB36#), -- HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
(16#FB38#, 16#FB3C#), -- HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
(16#FB3E#, 16#FB3E#), -- HEBREW LETTER MEM WITH DAGESH
(16#FB40#, 16#FB41#), -- HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
(16#FB43#, 16#FB44#), -- HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
(16#FB46#, 16#FBB1#), -- HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
(16#FBD3#, 16#FD3D#), -- ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
(16#FD50#, 16#FD8F#), -- ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
(16#FD92#, 16#FDC7#), -- ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
(16#FDF0#, 16#FDFB#), -- ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
(16#FDFC#, 16#FDFC#), -- RIAL SIGN
(16#FE10#, 16#FE16#), -- PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
(16#FE17#, 16#FE17#), -- PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
(16#FE18#, 16#FE18#), -- PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
(16#FE19#, 16#FE19#), -- PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
(16#FE30#, 16#FE30#), -- PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
(16#FE31#, 16#FE32#), -- PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
(16#FE33#, 16#FE34#), -- PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
(16#FE35#, 16#FE35#), -- PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
(16#FE36#, 16#FE36#), -- PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
(16#FE37#, 16#FE37#), -- PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
(16#FE38#, 16#FE38#), -- PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
(16#FE39#, 16#FE39#), -- PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
(16#FE3A#, 16#FE3A#), -- PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
(16#FE3B#, 16#FE3B#), -- PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
(16#FE3C#, 16#FE3C#), -- PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
(16#FE3D#, 16#FE3D#), -- PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
(16#FE3E#, 16#FE3E#), -- PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
(16#FE3F#, 16#FE3F#), -- PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
(16#FE40#, 16#FE40#), -- PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
(16#FE41#, 16#FE41#), -- PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
(16#FE42#, 16#FE42#), -- PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
(16#FE43#, 16#FE43#), -- PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
(16#FE44#, 16#FE44#), -- PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
(16#FE47#, 16#FE47#), -- PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
(16#FE48#, 16#FE48#), -- PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
(16#FE49#, 16#FE4C#), -- DASHED OVERLINE..DOUBLE WAVY OVERLINE
(16#FE4D#, 16#FE4F#), -- DASHED LOW LINE..WAVY LOW LINE
(16#FE50#, 16#FE52#), -- SMALL COMMA..SMALL FULL STOP
(16#FE54#, 16#FE57#), -- SMALL SEMICOLON..SMALL EXCLAMATION MARK
(16#FE58#, 16#FE58#), -- SMALL EM DASH
(16#FE59#, 16#FE59#), -- SMALL LEFT PARENTHESIS
(16#FE5A#, 16#FE5A#), -- SMALL RIGHT PARENTHESIS
(16#FE5B#, 16#FE5B#), -- SMALL LEFT CURLY BRACKET
(16#FE5C#, 16#FE5C#), -- SMALL RIGHT CURLY BRACKET
(16#FE5D#, 16#FE5D#), -- SMALL LEFT TORTOISE SHELL BRACKET
(16#FE5E#, 16#FE5E#), -- SMALL RIGHT TORTOISE SHELL BRACKET
(16#FE5F#, 16#FE61#), -- SMALL NUMBER SIGN..SMALL ASTERISK
(16#FE62#, 16#FE62#), -- SMALL PLUS SIGN
(16#FE63#, 16#FE63#), -- SMALL HYPHEN-MINUS
(16#FE64#, 16#FE66#), -- SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
(16#FE68#, 16#FE68#), -- SMALL REVERSE SOLIDUS
(16#FE69#, 16#FE69#), -- SMALL DOLLAR SIGN
(16#FE6A#, 16#FE6B#), -- SMALL PERCENT SIGN..SMALL COMMERCIAL AT
(16#FE70#, 16#FE72#), -- ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMATAN ISOLATED FORM
(16#FE74#, 16#FE74#), -- ARABIC KASRATAN ISOLATED FORM
(16#FE76#, 16#FEFC#), -- ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
(16#FF01#, 16#FF03#), -- FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
(16#FF04#, 16#FF04#), -- FULLWIDTH DOLLAR SIGN
(16#FF05#, 16#FF07#), -- FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
(16#FF08#, 16#FF08#), -- FULLWIDTH LEFT PARENTHESIS
(16#FF09#, 16#FF09#), -- FULLWIDTH RIGHT PARENTHESIS
(16#FF0A#, 16#FF0A#), -- FULLWIDTH ASTERISK
(16#FF0B#, 16#FF0B#), -- FULLWIDTH PLUS SIGN
(16#FF0C#, 16#FF0C#), -- FULLWIDTH COMMA
(16#FF0D#, 16#FF0D#), -- FULLWIDTH HYPHEN-MINUS
(16#FF0E#, 16#FF0F#), -- FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
(16#FF10#, 16#FF19#), -- FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
(16#FF1A#, 16#FF1B#), -- FULLWIDTH COLON..FULLWIDTH SEMICOLON
(16#FF1C#, 16#FF1E#), -- FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
(16#FF1F#, 16#FF20#), -- FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
(16#FF21#, 16#FF3A#), -- FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
(16#FF3B#, 16#FF3B#), -- FULLWIDTH LEFT SQUARE BRACKET
(16#FF3C#, 16#FF3C#), -- FULLWIDTH REVERSE SOLIDUS
(16#FF3D#, 16#FF3D#), -- FULLWIDTH RIGHT SQUARE BRACKET
(16#FF3E#, 16#FF3E#), -- FULLWIDTH CIRCUMFLEX ACCENT
(16#FF3F#, 16#FF3F#), -- FULLWIDTH LOW LINE
(16#FF40#, 16#FF40#), -- FULLWIDTH GRAVE ACCENT
(16#FF41#, 16#FF5A#), -- FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
(16#FF5B#, 16#FF5B#), -- FULLWIDTH LEFT CURLY BRACKET
(16#FF5C#, 16#FF5C#), -- FULLWIDTH VERTICAL LINE
(16#FF5D#, 16#FF5D#), -- FULLWIDTH RIGHT CURLY BRACKET
(16#FF5E#, 16#FF5E#), -- FULLWIDTH TILDE
(16#FF5F#, 16#FF5F#), -- FULLWIDTH LEFT WHITE PARENTHESIS
(16#FF60#, 16#FF60#), -- FULLWIDTH RIGHT WHITE PARENTHESIS
(16#FF61#, 16#FF61#), -- HALFWIDTH IDEOGRAPHIC FULL STOP
(16#FF62#, 16#FF62#), -- HALFWIDTH LEFT CORNER BRACKET
(16#FF63#, 16#FF63#), -- HALFWIDTH RIGHT CORNER BRACKET
(16#FF64#, 16#FF65#), -- HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
(16#FF66#, 16#FF6F#), -- HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
(16#FF70#, 16#FF70#), -- HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
(16#FF71#, 16#FF9D#), -- HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
(16#FF9E#, 16#FF9F#), -- HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
(16#FFA0#, 16#FFBE#), -- HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
(16#FFC2#, 16#FFC7#), -- HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
(16#FFCA#, 16#FFCF#), -- HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
(16#FFD2#, 16#FFD7#), -- HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
(16#FFDA#, 16#FFDC#), -- HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
(16#FFE0#, 16#FFE1#), -- FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
(16#FFE2#, 16#FFE2#), -- FULLWIDTH NOT SIGN
(16#FFE3#, 16#FFE3#), -- FULLWIDTH MACRON
(16#FFE4#, 16#FFE4#), -- FULLWIDTH BROKEN BAR
(16#FFE5#, 16#FFE6#), -- FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
(16#FFE8#, 16#FFE8#), -- HALFWIDTH FORMS LIGHT VERTICAL
(16#FFE9#, 16#FFEC#), -- HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
(16#FFED#, 16#FFEE#), -- HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
(16#1D15E#, 16#1D164#), -- MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
(16#1D1BB#, 16#1D1C0#), -- MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
(16#1D400#, 16#1D454#), -- MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
(16#1D456#, 16#1D49C#), -- MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
(16#1D49E#, 16#1D49F#), -- MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
(16#1D4A2#, 16#1D4A2#), -- MATHEMATICAL SCRIPT CAPITAL G
(16#1D4A5#, 16#1D4A6#), -- MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
(16#1D4A9#, 16#1D4AC#), -- MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
(16#1D4AE#, 16#1D4B9#), -- MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
(16#1D4BB#, 16#1D4BB#), -- MATHEMATICAL SCRIPT SMALL F
(16#1D4BD#, 16#1D4C3#), -- MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
(16#1D4C5#, 16#1D505#), -- MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
(16#1D507#, 16#1D50A#), -- MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
(16#1D50D#, 16#1D514#), -- MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
(16#1D516#, 16#1D51C#), -- MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
(16#1D51E#, 16#1D539#), -- MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
(16#1D53B#, 16#1D53E#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
(16#1D540#, 16#1D544#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
(16#1D546#, 16#1D546#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL O
(16#1D54A#, 16#1D550#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
(16#1D552#, 16#1D6A5#), -- MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
(16#1D6A8#, 16#1D6C0#), -- MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
(16#1D6C1#, 16#1D6C1#), -- MATHEMATICAL BOLD NABLA
(16#1D6C2#, 16#1D6DA#), -- MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
(16#1D6DB#, 16#1D6DB#), -- MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
(16#1D6DC#, 16#1D6FA#), -- MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
(16#1D6FB#, 16#1D6FB#), -- MATHEMATICAL ITALIC NABLA
(16#1D6FC#, 16#1D714#), -- MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
(16#1D715#, 16#1D715#), -- MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
(16#1D716#, 16#1D734#), -- MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
(16#1D735#, 16#1D735#), -- MATHEMATICAL BOLD ITALIC NABLA
(16#1D736#, 16#1D74E#), -- MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
(16#1D74F#, 16#1D74F#), -- MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
(16#1D750#, 16#1D76E#), -- MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
(16#1D76F#, 16#1D76F#), -- MATHEMATICAL SANS-SERIF BOLD NABLA
(16#1D770#, 16#1D788#), -- MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
(16#1D789#, 16#1D789#), -- MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
(16#1D78A#, 16#1D7A8#), -- MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
(16#1D7A9#, 16#1D7A9#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
(16#1D7AA#, 16#1D7C2#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
(16#1D7C3#, 16#1D7C3#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
(16#1D7C4#, 16#1D7CB#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
(16#1D7CE#, 16#1D7FF#), -- MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
(16#1EE00#, 16#1EE03#), -- ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
(16#1EE05#, 16#1EE1F#), -- ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
(16#1EE21#, 16#1EE22#), -- ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
(16#1EE24#, 16#1EE24#), -- ARABIC MATHEMATICAL INITIAL HEH
(16#1EE27#, 16#1EE27#), -- ARABIC MATHEMATICAL INITIAL HAH
(16#1EE29#, 16#1EE32#), -- ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF
(16#1EE34#, 16#1EE37#), -- ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH
(16#1EE39#, 16#1EE39#), -- ARABIC MATHEMATICAL INITIAL DAD
(16#1EE3B#, 16#1EE3B#), -- ARABIC MATHEMATICAL INITIAL GHAIN
(16#1EE42#, 16#1EE42#), -- ARABIC MATHEMATICAL TAILED JEEM
(16#1EE47#, 16#1EE47#), -- ARABIC MATHEMATICAL TAILED HAH
(16#1EE49#, 16#1EE49#), -- ARABIC MATHEMATICAL TAILED YEH
(16#1EE4B#, 16#1EE4B#), -- ARABIC MATHEMATICAL TAILED LAM
(16#1EE4D#, 16#1EE4F#), -- ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN
(16#1EE51#, 16#1EE52#), -- ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF
(16#1EE54#, 16#1EE54#), -- ARABIC MATHEMATICAL TAILED SHEEN
(16#1EE57#, 16#1EE57#), -- ARABIC MATHEMATICAL TAILED KHAH
(16#1EE59#, 16#1EE59#), -- ARABIC MATHEMATICAL TAILED DAD
(16#1EE5B#, 16#1EE5B#), -- ARABIC MATHEMATICAL TAILED GHAIN
(16#1EE5D#, 16#1EE5D#), -- ARABIC MATHEMATICAL TAILED DOTLESS NOON
(16#1EE5F#, 16#1EE5F#), -- ARABIC MATHEMATICAL TAILED DOTLESS QAF
(16#1EE61#, 16#1EE62#), -- ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM
(16#1EE64#, 16#1EE64#), -- ARABIC MATHEMATICAL STRETCHED HEH
(16#1EE67#, 16#1EE6A#), -- ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF
(16#1EE6C#, 16#1EE72#), -- ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF
(16#1EE74#, 16#1EE77#), -- ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH
(16#1EE79#, 16#1EE7C#), -- ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
(16#1EE7E#, 16#1EE7E#), -- ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
(16#1EE80#, 16#1EE89#), -- ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH
(16#1EE8B#, 16#1EE9B#), -- ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN
(16#1EEA1#, 16#1EEA3#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
(16#1EEA5#, 16#1EEA9#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
(16#1EEAB#, 16#1EEBB#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
(16#1F100#, 16#1F10A#), -- DIGIT ZERO FULL STOP..DIGIT NINE COMMA
(16#1F110#, 16#1F12E#), -- PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
(16#1F130#, 16#1F14F#), -- SQUARED LATIN CAPITAL LETTER A..SQUARED WC
(16#1F16A#, 16#1F16C#), -- RAISED MC SIGN..RAISED MR SIGN
(16#1F190#, 16#1F190#), -- SQUARE DJ
(16#1F200#, 16#1F202#), -- SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
(16#1F210#, 16#1F23B#), -- SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
(16#1F240#, 16#1F248#), -- TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
(16#1F250#, 16#1F251#), -- CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
(16#1FBF0#, 16#1FBF9#), -- SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
(16#2F800#, 16#2FA1D#)); -- CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
pragma Warnings (On);
-- Temporary until pragma Warnings at start can be activated ???
type Decomposition_Mapping is record
Item : UTF_32;
First_Char_Mapping : UTF_32;
@ -12001,6 +12395,15 @@ package body System.UTF_32 is
return Non_Graphic (C);
end Is_UTF_32_Non_Graphic;
--------------------
-- Is_UTF_32_NFKC --
--------------------
function Is_UTF_32_NFKC (U : UTF_32) return Boolean is
begin
return U < 160 or else Range_Search (U, UTF_32_NFKC_QC_No) = 0;
end Is_UTF_32_NFKC;
---------------------
-- Is_UTF_32_Other --
---------------------

View File

@ -189,6 +189,12 @@ package System.UTF_32 is
-- letters to upper case using this routine. A corresponding routine to
-- fold to lower case is also provided.
function Is_UTF_32_NFKC (U : UTF_32) return Boolean;
pragma Inline (Is_UTF_32_NFKC);
-- Return True if U could be present in a string normalized to
-- Normalization Form KC (as defined by Clause 21 of ISO/IEC 10646:2017),
-- otherwise returns False.
function Is_UTF_32_Basic (U : UTF_32) return Boolean;
pragma Inline (Is_UTF_32_Basic);
-- Return True if U has no Decomposition Mapping in the code charts of

View File

@ -2485,10 +2485,17 @@ package body Scng is
("wide character not allowed in identifier", Wptr);
end if;
-- AI12-0004: An identifier shall only contain characters
-- that may be present in Normalization Form KC.
if not Is_UTF_32_NFKC (UTF_32 (Code)) then
Error_Msg
("invalid wide character in identifier", Wptr);
-- If OK letter, store it folding to upper case. Note
-- that we include the folded letter in the checksum.
if Is_UTF_32_Letter (Cat) then
elsif Is_UTF_32_Letter (Cat) then
Code :=
Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
Accumulate_Checksum (Code);