copyright.pl: parse and use .reuse/dep5 for skips

Also scan skipped files to be able to find superfluous ignores, shown with -v.

Closes #9006
This commit is contained in:
Daniel Stenberg 2022-06-14 10:22:57 +02:00
parent 2b7f7d04f7
commit 4572489c59
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2

View File

@ -29,64 +29,27 @@
# Usage: copyright.pl [file]
#
# regexes of files to not scan
my @skiplist=(
'^tests\/data\/test(\d+)$', # test case data
my %skips;
# all uppercase file name, possibly with dot and dash. But do not exclude
# the man pages:
'(\/|^)[A-Z0-9_.-]+[^31]$',
'(\/|^)[A-Z0-9_-]+\.md$', # all uppercase file name with .md extension
'^tests/certs/.*', # generated certs
'^tests/stunnel.pem', # generated cert
'^tests/valgrind.supp', # valgrind suppressions
'^projects/Windows/.*.dsw$', # generated MSVC file
'^projects/Windows/.*.sln$', # generated MSVC file
'^projects/Windows/.*.tmpl$', # generated MSVC file
'^projects/Windows/.*.vcxproj.filters$', # generated MSVC file
'^m4/ax_compile_check_sizeof.m4$', # imported, leave be
'^.mailmap', # git control file
'\/readme',
"buildconf", # its nothing to copyright
# file names
my %skiplist = (
# REUSE-specific file
".reuse/dep5" => "<built-in>",
# docs/ files we're okay with without copyright
'INSTALL.cmake',
'TheArtOfHttpScripting',
'page-footer',
'curl_multi_socket_all.3',
'curl_strnequal.3',
'symbols-in-versions',
'options-in-versions',
# License texts
"LICENSES/BSD-3-Clause.txt" => "<built-in>",
"LICENSES/BSD-4-Clause-UC.txt" => "<built-in>",
"LICENSES/GPL-3.0-or-later.txt" => "<built-in>",
"LICENSES/ISC.txt" => "<built-in>",
"LICENSES/LicenseRef-OpenEvidence.txt" => "<built-in>",
"LICENSES/curl.txt" => "<built-in>",
"COPYING" => "<built-in>",
# macos-framework files
'^lib\/libcurl.plist.in',
'^lib\/libcurl.vers.in',
# vms files
'^packages\/vms\/build_vms.com',
'^packages\/vms\/curl_release_note_start.txt',
'^packages\/vms\/curlmsg.sdl',
'^packages\/vms\/macro32_exactcase.patch',
# XML junk
'^projects\/wolfssl_override.props',
# checksrc control files
'\.checksrc$',
# imported, leave be
'm4/ax_compile_check_sizeof.m4' => "<built-in>",
# an empty control file
"^zuul.d/playbooks/.zuul.ignore",
# markdown linkchecker config
"mlc_config.json",
# github template file
"^.github/ISSUE_TEMPLATE/bug_report.md",
# License texts and REUSE-specific files
".reuse/dep5",
"LICENSES/.*"
"zuul.d/playbooks/.zuul.ignore" => "<built-in>",
);
sub scanfile {
@ -122,13 +85,18 @@ sub scanfile {
}
sub checkfile {
my ($file) = @_;
my ($file, $skipped, $pattern) = @_;
my $fine = 0;
@copyright=();
$spdx = 0;
my $found = scanfile($file);
if($found < 1) {
if($skipped) {
# just move on
$skips{$pattern}++;
return 0;
}
if(!$found) {
print "$file:1: missing copyright range\n";
return 2;
@ -138,6 +106,11 @@ sub checkfile {
return 1;
}
if(!$spdx) {
if($skipped) {
# move on
$skips{$pattern}++;
return 0;
}
print "$file:1: missing SPDX-License-Identifier\n";
return 2;
}
@ -162,14 +135,56 @@ sub checkfile {
$copyright[0]{year} != $commityear) {
printf "$file:%d: copyright year out of date, should be $commityear, " .
"is $copyright[0]{year}\n",
$copyright[0]{line};
$copyright[0]{line} if(!$skipped || $verbose);
$skips{$pattern}++ if($skipped);
}
else {
$fine = 1;
}
if($skipped && $fine) {
print "$file:1: ignored superfluously by $pattern\n" if($verbose);
$superf{$pattern}++;
}
return $fine;
}
sub dep5 {
my ($file) = @_;
my @files;
my $copy;
open(F, "<$file") || die "can't open $file";
my $line = 0;
while(<F>) {
$line++;
if(/^Files: (.*)/i) {
push @files, `git ls-files $1`;
}
elsif(/^Copyright: (.*)/i) {
$copy = $1;
}
elsif(/^License: (.*)/i) {
my $license = $1;
for my $f (@files) {
chomp $f;
if($f =~ /\.gitignore\z/) {
# ignore .gitignore
}
else {
if($skiplist{$f}) {
print STDERR "$f already skipped at $skiplist{$f}\n";
}
$skiplist{$f} = "dep5:$line";
}
}
undef @files;
}
}
close(F);
}
dep5(".reuse/dep5");
my @all;
my $verbose;
if($ARGV[0] eq "-v") {
@ -182,22 +197,26 @@ if($ARGV[0]) {
else {
@all = `git ls-files`;
}
for my $f (@all) {
chomp $f;
my $skipped = 0;
for my $skip (@skiplist) {
#print "$f matches $skip ?\n";
if($f =~ /$skip/) {
$skiplisted++;
$skipped = 1;
#print "$f: SKIPPED ($skip)\n";
last;
}
my $miss;
my $wro;
my $pattern;
if($skiplist{$f}) {
$pattern = $skip;
$skiplisted++;
$skipped = 1;
}
my $r = checkfile($f, $skipped, $pattern);
$mis=1 if($r == 2);
$wro=1 if(!$r);
if(!$skipped) {
my $r = checkfile($f);
$missing++ if($r == 2);
$wrong++ if(!$r);
$missing += $mis;
$wrong += $wro;
}
}
@ -205,6 +224,16 @@ if($verbose) {
print STDERR "$missing files have no copyright\n" if($missing);
print STDERR "$wrong files have wrong copyright year\n" if ($wrong);
print STDERR "$skiplisted files are skipped\n" if ($skiplisted);
for my $s (@skiplist) {
if(!$skips{$s}) {
printf ("Never skipped pattern: %s\n", $s);
}
if($superf{$s}) {
printf ("%s was skipped superfluously %u times and legitimately %u times\n",
$s, $superf{$s}, $skips{$s});
}
}
}
exit 1 if($missing || $wrong);