mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
Put the correct Unicode version number 11.0.0 into the generated files
In some places there was still the old Unicode version 10.0.0 in the files. * localedata/charmaps/UTF-8: Use correct Unicode version 11.0.0 in comment. * localedata/locales/i18n_ctype: Use correct Unicode version in comments and headers. * localedata/unicode-gen/utf8_gen.py: Add option to specify Unicode version * localedata/unicode-gen/Makefile: Use option to specify Unicode version for utf8_gen.py
This commit is contained in:
parent
fd70af4552
commit
4beefeeb8e
@ -1,3 +1,12 @@
|
||||
2018-07-10 Mike FABIAN <mfabian@redhat.com>
|
||||
|
||||
* localedata/charmaps/UTF-8: Use correct Unicode version 11.0.0 in comment.
|
||||
* localedata/locales/i18n_ctype: Use correct Unicode version in comments
|
||||
and headers.
|
||||
* localedata/unicode-gen/utf8_gen.py: Add option to specify Unicode version
|
||||
* localedata/unicode-gen/Makefile: Use option to specify Unicode version
|
||||
for utf8_gen.py
|
||||
|
||||
2018-07-10 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
* io/Makefile (routines): Add statx.
|
||||
|
@ -47069,7 +47069,7 @@ CHARMAP
|
||||
<U0010FFC0>..<U0010FFFD> /xf4/x8f/xbf/x80 <Plane 16 Private Use>
|
||||
END CHARMAP
|
||||
|
||||
% Character width according to Unicode 10.0.0.
|
||||
% Character width according to Unicode 11.0.0.
|
||||
% - Default width is 1.
|
||||
% - Double-width characters have width 2; generated from
|
||||
% "grep '^[^;]*;[WF]' EastAsianWidth.txt"
|
||||
|
@ -13,10 +13,10 @@ comment_char %
|
||||
% information, but with different transliterations, can include it
|
||||
% directly.
|
||||
|
||||
% Generated automatically by gen_unicode_ctype.py for Unicode 10.0.0.
|
||||
% Generated automatically by gen_unicode_ctype.py for Unicode 11.0.0.
|
||||
|
||||
LC_IDENTIFICATION
|
||||
title "Unicode 10.0.0 FDCC-set"
|
||||
title "Unicode 11.0.0 FDCC-set"
|
||||
source "UnicodeData.txt, DerivedCoreProperties.txt"
|
||||
address ""
|
||||
contact ""
|
||||
@ -25,7 +25,7 @@ tel ""
|
||||
fax ""
|
||||
language ""
|
||||
territory "Earth"
|
||||
revision "10.0.0"
|
||||
revision "11.0.0"
|
||||
date "2018-06-20"
|
||||
category "i18n:2012";LC_CTYPE
|
||||
END LC_IDENTIFICATION
|
||||
|
@ -92,7 +92,9 @@ tr_TR: gen_unicode_ctype.py
|
||||
|
||||
UTF-8: UnicodeData.txt EastAsianWidth.txt
|
||||
UTF-8: utf8_gen.py
|
||||
$(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt PropList.txt
|
||||
$(PYTHON3) utf8_gen.py -u UnicodeData.txt \
|
||||
-e EastAsianWidth.txt -p PropList.txt \
|
||||
--unicode_version $(UNICODE_VERSION)
|
||||
|
||||
UTF-8-report: UTF-8 ../charmaps/UTF-8
|
||||
UTF-8-report: utf8_compatibility.py
|
||||
|
@ -27,6 +27,7 @@ Usage: python3 utf8_gen.py UnicodeData.txt EastAsianWidth.txt
|
||||
It will output UTF-8 file
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import re
|
||||
import unicode_utils
|
||||
@ -197,9 +198,10 @@ def write_header_charmap(outfile):
|
||||
outfile.write("% alias ISO-10646/UTF-8\n")
|
||||
outfile.write("CHARMAP\n")
|
||||
|
||||
def write_header_width(outfile):
|
||||
def write_header_width(outfile, unicode_version):
|
||||
'''Writes the header on top of the WIDTH section to the output file'''
|
||||
outfile.write('% Character width according to Unicode 10.0.0.\n')
|
||||
outfile.write('% Character width according to Unicode '
|
||||
+ '{:s}.\n'.format(unicode_version))
|
||||
outfile.write('% - Default width is 1.\n')
|
||||
outfile.write('% - Double-width characters have width 2; generated from\n')
|
||||
outfile.write('% "grep \'^[^;]*;[WF]\' EastAsianWidth.txt"\n')
|
||||
@ -292,41 +294,71 @@ def process_width(outfile, ulines, elines, plines):
|
||||
width_dict[same_width_list[0]]))
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
print("USAGE: python3 utf8_gen.py UnicodeData.txt EastAsianWidth.txt PropList.txt")
|
||||
else:
|
||||
with open(sys.argv[1], mode='r') as UNIDATA_FILE:
|
||||
UNICODE_DATA_LINES = UNIDATA_FILE.readlines()
|
||||
with open(sys.argv[2], mode='r') as EAST_ASIAN_WIDTH_FILE:
|
||||
EAST_ASIAN_WIDTH_LINES = []
|
||||
for LINE in EAST_ASIAN_WIDTH_FILE:
|
||||
# If characters from EastAasianWidth.txt which are from
|
||||
# from reserved ranges (i.e. not yet assigned code points)
|
||||
# are added to the WIDTH section of the UTF-8 file, then
|
||||
# “make check” produces “Unknown Character” errors for
|
||||
# these code points because such unassigned code points
|
||||
# are not in the CHARMAP section of the UTF-8 file.
|
||||
#
|
||||
# Therefore, we skip all reserved code points when reading
|
||||
# the EastAsianWidth.txt file.
|
||||
if re.match(r'.*<reserved-.+>\.\.<reserved-.+>.*', LINE):
|
||||
continue
|
||||
if re.match(r'^[^;]*;[WF]', LINE):
|
||||
EAST_ASIAN_WIDTH_LINES.append(LINE.strip())
|
||||
with open(sys.argv[3], mode='r') as PROP_LIST_FILE:
|
||||
PROP_LIST_LINES = []
|
||||
for LINE in PROP_LIST_FILE:
|
||||
if re.match(r'^[^;]*;[\s]*Prepended_Concatenation_Mark', LINE):
|
||||
PROP_LIST_LINES.append(LINE.strip())
|
||||
with open('UTF-8', mode='w') as OUTFILE:
|
||||
# Processing UnicodeData.txt and write CHARMAP to UTF-8 file
|
||||
write_header_charmap(OUTFILE)
|
||||
process_charmap(UNICODE_DATA_LINES, OUTFILE)
|
||||
OUTFILE.write("END CHARMAP\n\n")
|
||||
# Processing EastAsianWidth.txt and write WIDTH to UTF-8 file
|
||||
write_header_width(OUTFILE)
|
||||
process_width(OUTFILE,
|
||||
UNICODE_DATA_LINES,
|
||||
EAST_ASIAN_WIDTH_LINES,
|
||||
PROP_LIST_LINES)
|
||||
OUTFILE.write("END WIDTH\n")
|
||||
PARSER = argparse.ArgumentParser(
|
||||
description='''
|
||||
Generate a UTF-8 file from UnicodeData.txt, EastAsianWidth.txt, and PropList.txt.
|
||||
''')
|
||||
PARSER.add_argument(
|
||||
'-u', '--unicode_data_file',
|
||||
nargs='?',
|
||||
type=str,
|
||||
default='UnicodeData.txt',
|
||||
help=('The UnicodeData.txt file to read, '
|
||||
+ 'default: %(default)s'))
|
||||
PARSER.add_argument(
|
||||
'-e', '--east_asian_with_file',
|
||||
nargs='?',
|
||||
type=str,
|
||||
default='EastAsianWidth.txt',
|
||||
help=('The EastAsianWidth.txt file to read, '
|
||||
+ 'default: %(default)s'))
|
||||
PARSER.add_argument(
|
||||
'-p', '--prop_list_file',
|
||||
nargs='?',
|
||||
type=str,
|
||||
default='PropList.txt',
|
||||
help=('The PropList.txt file to read, '
|
||||
+ 'default: %(default)s'))
|
||||
PARSER.add_argument(
|
||||
'--unicode_version',
|
||||
nargs='?',
|
||||
required=True,
|
||||
type=str,
|
||||
help='The Unicode version of the input files used.')
|
||||
ARGS = PARSER.parse_args()
|
||||
|
||||
with open(ARGS.unicode_data_file, mode='r') as UNIDATA_FILE:
|
||||
UNICODE_DATA_LINES = UNIDATA_FILE.readlines()
|
||||
with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE:
|
||||
EAST_ASIAN_WIDTH_LINES = []
|
||||
for LINE in EAST_ASIAN_WIDTH_FILE:
|
||||
# If characters from EastAasianWidth.txt which are from
|
||||
# from reserved ranges (i.e. not yet assigned code points)
|
||||
# are added to the WIDTH section of the UTF-8 file, then
|
||||
# “make check” produces “Unknown Character” errors for
|
||||
# these code points because such unassigned code points
|
||||
# are not in the CHARMAP section of the UTF-8 file.
|
||||
#
|
||||
# Therefore, we skip all reserved code points when reading
|
||||
# the EastAsianWidth.txt file.
|
||||
if re.match(r'.*<reserved-.+>\.\.<reserved-.+>.*', LINE):
|
||||
continue
|
||||
if re.match(r'^[^;]*;[WF]', LINE):
|
||||
EAST_ASIAN_WIDTH_LINES.append(LINE.strip())
|
||||
with open(ARGS.prop_list_file, mode='r') as PROP_LIST_FILE:
|
||||
PROP_LIST_LINES = []
|
||||
for LINE in PROP_LIST_FILE:
|
||||
if re.match(r'^[^;]*;[\s]*Prepended_Concatenation_Mark', LINE):
|
||||
PROP_LIST_LINES.append(LINE.strip())
|
||||
with open('UTF-8', mode='w') as OUTFILE:
|
||||
# Processing UnicodeData.txt and write CHARMAP to UTF-8 file
|
||||
write_header_charmap(OUTFILE)
|
||||
process_charmap(UNICODE_DATA_LINES, OUTFILE)
|
||||
OUTFILE.write("END CHARMAP\n\n")
|
||||
# Processing EastAsianWidth.txt and write WIDTH to UTF-8 file
|
||||
write_header_width(OUTFILE, ARGS.unicode_version)
|
||||
process_width(OUTFILE,
|
||||
UNICODE_DATA_LINES,
|
||||
EAST_ASIAN_WIDTH_LINES,
|
||||
PROP_LIST_LINES)
|
||||
OUTFILE.write("END WIDTH\n")
|
||||
|
Loading…
Reference in New Issue
Block a user