mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
Make all unicode perl scripts to use strict, rearrange logic for clarity.
The loops were a bit difficult to understand, due to breaking out of them early. Also fix things that perlcritic complained about. Daniel Gustafsson
This commit is contained in:
parent
81c52728f8
commit
021d254d9a
@ -24,8 +24,8 @@
|
||||
# UCS-2 code in hex
|
||||
# # and Unicode name (not used in this script)
|
||||
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Load BIG5.TXT
|
||||
my $all = &read_source("BIG5.TXT");
|
||||
|
@ -13,24 +13,24 @@
|
||||
# where the "u" field is the Unicode code point in hex,
|
||||
# and the "b" field is the hex byte sequence for GB18030
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Read the input
|
||||
|
||||
$in_file = "gb-18030-2000.xml";
|
||||
my $in_file = "gb-18030-2000.xml";
|
||||
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
open(my $in, '<', $in_file) || die("cannot open $in_file");
|
||||
|
||||
my @mapping;
|
||||
|
||||
while (<FILE>)
|
||||
while (<$in>)
|
||||
{
|
||||
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
|
||||
$u = $1;
|
||||
$c = $2;
|
||||
my ($u, $c) = ($1, $2);
|
||||
$c =~ s/ //g;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
my $ucs = hex($u);
|
||||
my $code = hex($c);
|
||||
|
||||
# The GB-18030 character set, which we use as the source, contains
|
||||
# a lot of extra characters on top of the GB2312 character set that
|
||||
@ -71,6 +71,6 @@ while (<FILE>)
|
||||
direction => 'both'
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
close($in);
|
||||
|
||||
print_tables("EUC_CN", \@mapping);
|
||||
|
@ -7,27 +7,27 @@
|
||||
# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
|
||||
# "euc-jis-2004-std.txt" (http://x0213.org)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# first generate UTF-8 --> EUC_JIS_2004 table
|
||||
|
||||
$in_file = "euc-jis-2004-std.txt";
|
||||
my $in_file = "euc-jis-2004-std.txt";
|
||||
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
open(my $in, '<', $in_file) || die("cannot open $in_file");
|
||||
|
||||
my @all;
|
||||
|
||||
while ($line = <FILE>)
|
||||
while (my $line = <$in>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs1 = hex($u1);
|
||||
$ucs2 = hex($u2);
|
||||
# combined characters
|
||||
my ($c, $u1, $u2) = ($1, $2, $3);
|
||||
my $rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
my $code = hex($c);
|
||||
my $ucs1 = hex($u1);
|
||||
my $ucs2 = hex($u2);
|
||||
|
||||
push @all, { direction => 'both',
|
||||
ucs => $ucs1,
|
||||
@ -38,22 +38,16 @@ while ($line = <FILE>)
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
# non-combined characters
|
||||
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
|
||||
my $ucs = hex($u);
|
||||
my $code = hex($c);
|
||||
|
||||
next if ($code < 0x80 && $ucs < 0x80);
|
||||
|
||||
push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
|
||||
next if ($code < 0x80 && $ucs < 0x80);
|
||||
|
||||
push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
|
||||
}
|
||||
close(FILE);
|
||||
close($in);
|
||||
|
||||
print_tables("EUC_JIS_2004", \@all, 1);
|
||||
|
@ -12,7 +12,7 @@
|
||||
# organization's ftp site.
|
||||
|
||||
use strict;
|
||||
require "convutils.pm";
|
||||
require convutils;
|
||||
|
||||
# Load JIS0212.TXT
|
||||
my $jis0212 = &read_source("JIS0212.TXT");
|
||||
|
@ -16,7 +16,8 @@
|
||||
# UCS-2 code in hex
|
||||
# # and Unicode name (not used in this script)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Load the source file.
|
||||
|
||||
|
@ -17,7 +17,8 @@
|
||||
# UCS-2 code in hex
|
||||
# # and Unicode name (not used in this script)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
my $mapping = &read_source("CNS11643.TXT");
|
||||
|
||||
|
@ -13,24 +13,24 @@
|
||||
# where the "u" field is the Unicode code point in hex,
|
||||
# and the "b" field is the hex byte sequence for GB18030
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Read the input
|
||||
|
||||
$in_file = "gb-18030-2000.xml";
|
||||
my $in_file = "gb-18030-2000.xml";
|
||||
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
open(my $in, '<', $in_file) || die("cannot open $in_file");
|
||||
|
||||
my @mapping;
|
||||
|
||||
while (<FILE>)
|
||||
while (<$in>)
|
||||
{
|
||||
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
|
||||
$u = $1;
|
||||
$c = $2;
|
||||
my ($u, $c) = ($1, $2);
|
||||
$c =~ s/ //g;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
my $ucs = hex($u);
|
||||
my $code = hex($c);
|
||||
if ($code >= 0x80 && $ucs >= 0x0080)
|
||||
{
|
||||
push @mapping, {
|
||||
@ -40,6 +40,6 @@ while (<FILE>)
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
close($in);
|
||||
|
||||
print_tables("GB18030", \@mapping);
|
||||
|
@ -15,7 +15,8 @@
|
||||
# UCS-2 code in hex
|
||||
# # and Unicode name (not used in this script)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Load the source file.
|
||||
|
||||
|
@ -7,27 +7,27 @@
|
||||
# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
|
||||
# "sjis-0213-2004-std.txt" (http://x0213.org)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# first generate UTF-8 --> SHIFT_JIS_2004 table
|
||||
|
||||
$in_file = "sjis-0213-2004-std.txt";
|
||||
my $in_file = "sjis-0213-2004-std.txt";
|
||||
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
open(my $in, '<', $in_file) || die("cannot open $in_file");
|
||||
|
||||
my @mapping;
|
||||
|
||||
while ($line = <FILE>)
|
||||
while (my $line = <$in>)
|
||||
{
|
||||
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u1 = $2;
|
||||
$u2 = $3;
|
||||
$rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
$code = hex($c);
|
||||
$ucs1 = hex($u1);
|
||||
$ucs2 = hex($u2);
|
||||
# combined characters
|
||||
my ($c, $u1, $u2) = ($1, $2, $3);
|
||||
my $rest = "U+" . $u1 . "+" . $u2 . $4;
|
||||
my $code = hex($c);
|
||||
my $ucs1 = hex($u1);
|
||||
my $ucs2 = hex($u2);
|
||||
|
||||
push @mapping, {
|
||||
code => $code,
|
||||
@ -40,42 +40,37 @@ while ($line = <FILE>)
|
||||
}
|
||||
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
|
||||
{
|
||||
$c = $1;
|
||||
$u = $2;
|
||||
$rest = "U+" . $u . $3;
|
||||
}
|
||||
else
|
||||
{
|
||||
next;
|
||||
}
|
||||
# non-combined characters
|
||||
my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
|
||||
my $ucs = hex($u);
|
||||
my $code = hex($c);
|
||||
my $direction;
|
||||
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
if ($code < 0x80 && $ucs < 0x80)
|
||||
{
|
||||
next;
|
||||
}
|
||||
elsif ($code < 0x80)
|
||||
{
|
||||
$direction = 'from_unicode';
|
||||
}
|
||||
elsif ($ucs < 0x80)
|
||||
{
|
||||
$direction = 'to_unicode';
|
||||
}
|
||||
else
|
||||
{
|
||||
$direction = 'both';
|
||||
}
|
||||
|
||||
if ($code < 0x80 && $ucs < 0x80)
|
||||
{
|
||||
next;
|
||||
push @mapping, {
|
||||
code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $rest,
|
||||
direction => $direction
|
||||
};
|
||||
}
|
||||
elsif ($code < 0x80)
|
||||
{
|
||||
$direction = 'from_unicode';
|
||||
}
|
||||
elsif ($ucs < 0x80)
|
||||
{
|
||||
$direction = 'to_unicode';
|
||||
}
|
||||
else
|
||||
{
|
||||
$direction = 'both';
|
||||
}
|
||||
|
||||
push @mapping, {
|
||||
code => $code,
|
||||
ucs => $ucs,
|
||||
comment => $rest,
|
||||
direction => $direction
|
||||
};
|
||||
}
|
||||
close(FILE);
|
||||
close($in);
|
||||
|
||||
print_tables("SHIFT_JIS_2004", \@mapping, 1);
|
||||
|
@ -11,7 +11,7 @@
|
||||
# ftp site.
|
||||
|
||||
use strict;
|
||||
require "convutils.pm";
|
||||
require convutils;
|
||||
|
||||
my $charset = read_source("CP932.TXT");
|
||||
|
||||
|
@ -13,24 +13,24 @@
|
||||
# where the "u" field is the Unicode code point in hex,
|
||||
# and the "b" field is the hex byte sequence for UHC
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
# Read the input
|
||||
|
||||
$in_file = "windows-949-2000.xml";
|
||||
my $in_file = "windows-949-2000.xml";
|
||||
|
||||
open(FILE, $in_file) || die("cannot open $in_file");
|
||||
open(my $in, '<', $in_file) || die("cannot open $in_file");
|
||||
|
||||
my @mapping;
|
||||
|
||||
while (<FILE>)
|
||||
while (<$in>)
|
||||
{
|
||||
next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
|
||||
$u = $1;
|
||||
$c = $2;
|
||||
my ($u, $c) = ($1, $2);
|
||||
$c =~ s/ //g;
|
||||
$ucs = hex($u);
|
||||
$code = hex($c);
|
||||
my $ucs = hex($u);
|
||||
my $code = hex($c);
|
||||
|
||||
next if ($code == 0x0080 || $code == 0x00FF);
|
||||
|
||||
@ -43,7 +43,7 @@ while (<FILE>)
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
close($in);
|
||||
|
||||
# One extra character that's not in the source file.
|
||||
push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
|
||||
|
@ -15,9 +15,10 @@
|
||||
# UCS-2 code in hex
|
||||
# # and Unicode name (not used in this script)
|
||||
|
||||
require "convutils.pm";
|
||||
use strict;
|
||||
require convutils;
|
||||
|
||||
%filename = (
|
||||
my %filename = (
|
||||
'WIN866' => 'CP866.TXT',
|
||||
'WIN874' => 'CP874.TXT',
|
||||
'WIN1250' => 'CP1250.TXT',
|
||||
@ -46,9 +47,10 @@ require "convutils.pm";
|
||||
'KOI8U' => 'KOI8-U.TXT',
|
||||
'GBK' => 'CP936.TXT');
|
||||
|
||||
@charsets = keys(%filename);
|
||||
@charsets = @ARGV if scalar(@ARGV);
|
||||
foreach $charset (@charsets)
|
||||
# make maps for all encodings if not specified
|
||||
my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
|
||||
|
||||
foreach my $charset (@charsets)
|
||||
{
|
||||
my $mapping = &read_source($filename{$charset});
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user