From 5602265f770f400a03b4a4ca22b3cd530c294bc2 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 25 Apr 2018 16:01:47 -0400
Subject: [PATCH] Convert unused_oids and duplicate_oids to use Catalog.pm
 infrastructure.

unused_oids was previously a shell script, which of course didn't work at
all on Windows.  Also, commit 372728b0d introduced some other portability
problems, as complained of by Stas Kelvich.  We can improve matters by
converting it to Perl.

While we're at it, let's future-proof both this script and duplicate_oids
to use Catalog.pm rather than having a bunch of ad-hoc logic for parsing
catalog headers and .dat files.  These scripts are thereby a bit slower,
which doesn't seem like a problem for typical manual use.  It is a little
annoying for buildfarm purposes, but we should be able to fix that case
by having genbki.pl make the check instead of parsing the headers twice.
(That's not done in this commit, though.)

Stas Kelvich, adjusted a bit by me

Discussion: https://postgr.es/m/37D774E4-FE1F-437E-B3D2-593F314B7505@postgrespro.ru
---
 src/backend/catalog/Catalog.pm     | 51 ++++++++++++++++++++++++
 src/include/catalog/duplicate_oids | 23 ++++-------
 src/include/catalog/unused_oids    | 63 ++++++++++++++----------------
 3 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 823e09aa56..0b057a8f5a 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -384,4 +384,55 @@ sub FindDefinedSymbolFromData
 	die "no definition found for $symbol\n";
 }
 
+# Extract an array of all the OIDs assigned in the specified catalog headers
+# and their associated data files (if any).
+sub FindAllOidsFromHeaders
+{
+	my @input_files = @_;
+
+	my @oids = ();
+
+	foreach my $header (@input_files)
+	{
+		$header =~ /(.+)\.h$/
+		  or die "Input files need to be header files.\n";
+		my $datfile = "$1.dat";
+
+		my $catalog = Catalog::ParseHeader($header);
+
+		# We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
+		# as those are expected to appear in the initial data for pg_class
+		# and pg_type.  For regular catalogs, include these OIDs.
+		if (!$catalog->{bootstrap})
+		{
+			push @oids, $catalog->{relation_oid}
+			  if ($catalog->{relation_oid});
+			push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
+		}
+
+		# Not all catalogs have a data file.
+		if (-e $datfile)
+		{
+			my $catdata =
+			  Catalog::ParseData($datfile, $catalog->{columns}, 0);
+
+			foreach my $row (@$catdata)
+			{
+				push @oids, $row->{oid} if defined $row->{oid};
+			}
+		}
+
+		foreach my $toast (@{ $catalog->{toasting} })
+		{
+			push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
+		}
+		foreach my $index (@{ $catalog->{indexing} })
+		{
+			push @oids, $index->{index_oid};
+		}
+	}
+
+	return \@oids;
+}
+
 1;
diff --git a/src/include/catalog/duplicate_oids b/src/include/catalog/duplicate_oids
index db9b370c2f..0d7aa15559 100755
--- a/src/include/catalog/duplicate_oids
+++ b/src/include/catalog/duplicate_oids
@@ -1,27 +1,20 @@
 #!/usr/bin/perl
 
+use lib '../../backend/catalog/';
+use Catalog;
+
 use strict;
 use warnings;
 
-BEGIN
-{
-	@ARGV = (glob("pg_*.h"), glob("pg_*.dat"), qw(indexing.h toasting.h));
-}
+my @input_files = (glob("pg_*.h"), qw(indexing.h toasting.h));
+
+my $oids = Catalog::FindAllOidsFromHeaders(@input_files);
 
 my %oidcounts;
 
-while (<>)
+foreach my $oid (@{$oids})
 {
-	next if /^CATALOG\(.*BKI_BOOTSTRAP/;
-	next
-	  unless /\boid *=> *'(\d+)'/
-	  || /^CATALOG\([^,]*, *(\d+).*BKI_ROWTYPE_OID\((\d+),/
-	  || /^CATALOG\([^,]*, *(\d+)/
-	  || /^DECLARE_INDEX\([^,]*, *(\d+)/
-	  || /^DECLARE_UNIQUE_INDEX\([^,]*, *(\d+)/
-	  || /^DECLARE_TOAST\([^,]*, *(\d+), *(\d+)/;
-	$oidcounts{$1}++;
-	$oidcounts{$2}++ if $2;
+	$oidcounts{$oid}++;
 }
 
 my $found = 0;
diff --git a/src/include/catalog/unused_oids b/src/include/catalog/unused_oids
index f71222d50d..a727225abc 100755
--- a/src/include/catalog/unused_oids
+++ b/src/include/catalog/unused_oids
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/perl
 #
 # unused_oids
 #
@@ -15,43 +15,40 @@
 #
 #	run this script in src/include/catalog.
 #
+use lib '../../backend/catalog/';
+use Catalog;
 
+use strict;
+use warnings;
 
-AWK="awk"
+my @input_files = (glob("pg_*.h"), qw(indexing.h toasting.h));
 
-# Get FirstBootstrapObjectId from access/transam.h
-FIRSTOBJECTID=`grep '#define[ 	]*FirstBootstrapObjectId' ../access/transam.h | $AWK '{ print $3 }'`
-export FIRSTOBJECTID
+my $oids = Catalog::FindAllOidsFromHeaders(@input_files);
 
-# this part (down to the uniq step) should match the duplicate_oids script
-# note: we exclude BKI_BOOTSTRAP relations since they are expected to have
-# matching data entries in pg_class.dat and pg_type.dat
+# Also push FirstBootstrapObjectId to serve as a terminator for the last gap.
+my $FirstBootstrapObjectId =
+  Catalog::FindDefinedSymbol('access/transam.h', [".."],
+	'FirstBootstrapObjectId');
+push @{$oids}, $FirstBootstrapObjectId;
 
-cat pg_*.h pg_*.dat toasting.h indexing.h |
-egrep -v -e '^CATALOG\(.*BKI_BOOTSTRAP' | \
-sed -n	-e 's/.*\boid *=> *'\''\([0-9][0-9]*\)'\''.*$/\1/p' \
-	-e 's/^CATALOG([^,]*, *\([0-9][0-9]*\).*BKI_ROWTYPE_OID(\([0-9][0-9]*\),.*$/\1,\2/p' \
-	-e 's/^CATALOG([^,]*, *\([0-9][0-9]*\).*$/\1/p' \
-	-e 's/^DECLARE_INDEX([^,]*, *\([0-9][0-9]*\).*$/\1/p' \
-	-e 's/^DECLARE_UNIQUE_INDEX([^,]*, *\([0-9][0-9]*\).*$/\1/p' \
-	-e 's/^DECLARE_TOAST([^,]*, *\([0-9][0-9]*\), *\([0-9][0-9]*\).*$/\1,\2/p' | \
-tr ',' '\n' | \
-sort -n | \
-uniq | \
-$AWK '
-BEGIN {
-	last = 0;
-}
-/^[0-9]/ {
-	if ($1 > last + 1) {
-		if ($1 > last + 2) {
-			print last + 1, "-", $1 - 1;
-		} else {
-			print last + 1;
+my $prev_oid = 0;
+foreach my $oid (sort { $a <=> $b } @{$oids})
+{
+	if ($oid > $prev_oid + 1)
+	{
+		if ($oid > $prev_oid + 2)
+		{
+			printf "%d - %d\n", $prev_oid + 1, $oid - 1;
+		}
+		else
+		{
+			printf "%d\n", $prev_oid + 1;
 		}
 	}
-	last = $1;
+	elsif ($oid == $prev_oid)
+	{
+		print "Duplicate oid detected: $oid\n";
+		exit 1;
+	}
+	$prev_oid = $oid;
 }
-END {
-	print last + 1, "-", ENVIRON["FIRSTOBJECTID"]-1;
-}'