Make all unicode perl scripts to use strict, rearrange logic for clarity.

The loops were a bit difficult to understand, due to breaking out of them early. Also fix things that perlcritic complained about. Daniel Gustafsson
2025-01-24 18:55:04 +08:00 · 2016-11-30 18:06:34 +02:00 · 2016-11-30 18:06:34 +02:00 · 021d254d9a
commit 021d254d9a
parent 81c52728f8
12 changed files with 103 additions and 109 deletions
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@ -24,8 +24,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)

-
-require "convutils.pm";
+use strict;
+require convutils;

 # Load BIG5.TXT
 my $all = &read_source("BIG5.TXT");
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030

-require "convutils.pm";
+use strict;
+require convutils;

 # Read the input

-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";

-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");

 my @mapping;

-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my ($u, $c) = ($1, $2);
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);

 	# The GB-18030 character set, which we use as the source, contains
 	# a lot of extra characters on top of the GB2312 character set that
@ -71,6 +71,6 @@ while (<FILE>)
 		direction => 'both'
 	}
 }
-close(FILE);
+close($in);

 print_tables("EUC_CN", \@mapping);
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@ -7,27 +7,27 @@
 # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
 # "euc-jis-2004-std.txt" (http://x0213.org)

-require "convutils.pm";
+use strict;
+require convutils;

 # first generate UTF-8 --> EUC_JIS_2004 table

-$in_file = "euc-jis-2004-std.txt";
+my $in_file = "euc-jis-2004-std.txt";

-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");

 my @all;

-while ($line = <FILE>)
+while (my $line = <$in>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
-		$u1             = $2;
-		$u2             = $3;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
-		$code           = hex($c);
-		$ucs1           = hex($u1);
-		$ucs2           = hex($u2);
+		# combined characters
+		my ($c, $u1, $u2) = ($1, $2, $3);
+		my $rest = "U+" . $u1 . "+" . $u2 . $4;
+		my $code = hex($c);
+		my $ucs1 = hex($u1);
+		my $ucs2 = hex($u2);

 		push @all, { direction => 'both',
 					 ucs => $ucs1,
@ -38,22 +38,16 @@ while ($line = <FILE>)
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
+		# non-combined characters
+		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
+		my $ucs  = hex($u);
+		my $code = hex($c);
+
+		next if ($code < 0x80 && $ucs < 0x80);
+
+		push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
 	}
-	else
-	{
-		next;
-	}
-
-	$ucs  = hex($u);
-	$code = hex($c);
-
-	next if ($code < 0x80 && $ucs < 0x80);
-
-	push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
 }
-close(FILE);
+close($in);

 print_tables("EUC_JIS_2004", \@all, 1);
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@ -12,7 +12,7 @@
 # organization's ftp site.

 use strict;
-require "convutils.pm";
+require convutils;

 # Load JIS0212.TXT
 my $jis0212 = &read_source("JIS0212.TXT");
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@ -16,7 +16,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)

-require "convutils.pm";
+use strict;
+require convutils;

 # Load the source file.

--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@ -17,7 +17,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)

-require "convutils.pm";
+use strict;
+require convutils;

 my $mapping = &read_source("CNS11643.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030

-require "convutils.pm";
+use strict;
+require convutils;

 # Read the input

-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";

-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");

 my @mapping;

-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my ($u, $c) = ($1, $2);
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);
 	if ($code >= 0x80 && $ucs >= 0x0080)
 	{
 		push @mapping, {
@ -40,6 +40,6 @@ while (<FILE>)
 		}
 	}
 }
-close(FILE);
+close($in);

 print_tables("GB18030", \@mapping);
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@ -15,7 +15,8 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)

-require "convutils.pm";
+use strict;
+require convutils;

 # Load the source file.

--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@ -7,27 +7,27 @@
 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
 # "sjis-0213-2004-std.txt" (http://x0213.org)

-require "convutils.pm";
+use strict;
+require convutils;

 # first generate UTF-8 --> SHIFT_JIS_2004 table

-$in_file = "sjis-0213-2004-std.txt";
+my $in_file = "sjis-0213-2004-std.txt";

-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");

 my @mapping;

-while ($line = <FILE>)
+while (my $line = <$in>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
-		$u1             = $2;
-		$u2             = $3;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
-		$code           = hex($c);
-		$ucs1           = hex($u1);
-		$ucs2           = hex($u2);
+		# combined characters
+		my ($c, $u1, $u2) = ($1, $2, $3);
+		my $rest = "U+" . $u1 . "+" . $u2 . $4;
+		my $code = hex($c);
+		my $ucs1 = hex($u1);
+		my $ucs2 = hex($u2);

 		push @mapping, {
 			code => $code,
@ -40,42 +40,37 @@ while ($line = <FILE>)
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
-	}
-	else
-	{
-		next;
-	}
+		# non-combined characters
+		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
+		my $ucs  = hex($u);
+		my $code = hex($c);
+		my $direction;

-	$ucs  = hex($u);
-	$code = hex($c);
+		if ($code < 0x80 && $ucs < 0x80)
+		{
+			next;
+		}
+		elsif ($code < 0x80)
+		{
+			$direction = 'from_unicode';
+		}
+		elsif ($ucs < 0x80)
+		{
+			$direction = 'to_unicode';
+		}
+		else
+		{
+			$direction = 'both';
+		}

-	if ($code < 0x80 && $ucs < 0x80)
-	{
-		next;
+		push @mapping, {
+			code => $code,
+			ucs => $ucs,
+			comment => $rest,
+			direction => $direction
+		};
 	}
-	elsif ($code < 0x80)
-	{
-		$direction = 'from_unicode';
-	}
-	elsif ($ucs < 0x80)
-	{
-		$direction = 'to_unicode';
-	}
-	else
-	{
-		$direction = 'both';
-	}
-
-	push @mapping, {
-		code => $code,
-		ucs => $ucs,
-		comment => $rest,
-		direction => $direction
-	};
 }
-close(FILE);
+close($in);

 print_tables("SHIFT_JIS_2004", \@mapping, 1);
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@ -11,7 +11,7 @@
 # ftp site.

 use strict;
-require "convutils.pm";
+require convutils;

 my $charset = read_source("CP932.TXT");

--- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@ -13,24 +13,24 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for UHC

-require "convutils.pm";
+use strict;
+require convutils;

 # Read the input

-$in_file = "windows-949-2000.xml";
+my $in_file = "windows-949-2000.xml";

-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");

 my @mapping;

-while (<FILE>)
+while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my ($u, $c) = ($1, $2);
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);

 	next if ($code == 0x0080 || $code == 0x00FF);

@ -43,7 +43,7 @@ while (<FILE>)
 		}
 	}
 }
-close(FILE);
+close($in);

 # One extra character that's not in the source file.
 push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@ -15,9 +15,10 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)

-require "convutils.pm";
+use strict;
+require convutils;

-%filename = (
+my %filename = (
 	'WIN866'     => 'CP866.TXT',
 	'WIN874'     => 'CP874.TXT',
 	'WIN1250'    => 'CP1250.TXT',
@ -46,9 +47,10 @@ require "convutils.pm";
 	'KOI8U'      => 'KOI8-U.TXT',
 	'GBK'        => 'CP936.TXT');

-@charsets = keys(%filename);
-@charsets = @ARGV if scalar(@ARGV);
-foreach $charset (@charsets)
+# make maps for all encodings if not specified
+my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
+
+foreach my $charset (@charsets)
 {
 	my $mapping = &read_source($filename{$charset});