From 4572489c5946b5a99d20d6b3692282b07fd4eccd Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Tue, 14 Jun 2022 10:22:57 +0200 Subject: [PATCH] copyright.pl: parse and use .reuse/dep5 for skips Also scan skipped files to be able to find superfluous ignores, shown with -v. Closes #9006 --- scripts/copyright.pl | 161 +++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 66 deletions(-) diff --git a/scripts/copyright.pl b/scripts/copyright.pl index f98558bc31..89f6b78749 100755 --- a/scripts/copyright.pl +++ b/scripts/copyright.pl @@ -29,64 +29,27 @@ # Usage: copyright.pl [file] # -# regexes of files to not scan -my @skiplist=( - '^tests\/data\/test(\d+)$', # test case data +my %skips; - # all uppercase file name, possibly with dot and dash. But do not exclude - # the man pages: - '(\/|^)[A-Z0-9_.-]+[^31]$', - '(\/|^)[A-Z0-9_-]+\.md$', # all uppercase file name with .md extension - '^tests/certs/.*', # generated certs - '^tests/stunnel.pem', # generated cert - '^tests/valgrind.supp', # valgrind suppressions - '^projects/Windows/.*.dsw$', # generated MSVC file - '^projects/Windows/.*.sln$', # generated MSVC file - '^projects/Windows/.*.tmpl$', # generated MSVC file - '^projects/Windows/.*.vcxproj.filters$', # generated MSVC file - '^m4/ax_compile_check_sizeof.m4$', # imported, leave be - '^.mailmap', # git control file - '\/readme', - "buildconf", # its nothing to copyright +# file names +my %skiplist = ( + # REUSE-specific file + ".reuse/dep5" => "", - # docs/ files we're okay with without copyright - 'INSTALL.cmake', - 'TheArtOfHttpScripting', - 'page-footer', - 'curl_multi_socket_all.3', - 'curl_strnequal.3', - 'symbols-in-versions', - 'options-in-versions', + # License texts + "LICENSES/BSD-3-Clause.txt" => "", + "LICENSES/BSD-4-Clause-UC.txt" => "", + "LICENSES/GPL-3.0-or-later.txt" => "", + "LICENSES/ISC.txt" => "", + "LICENSES/LicenseRef-OpenEvidence.txt" => "", + "LICENSES/curl.txt" => "", + "COPYING" => "", - # macos-framework files - '^lib\/libcurl.plist.in', - '^lib\/libcurl.vers.in', - - # vms files - '^packages\/vms\/build_vms.com', - '^packages\/vms\/curl_release_note_start.txt', - '^packages\/vms\/curlmsg.sdl', - '^packages\/vms\/macro32_exactcase.patch', - - # XML junk - '^projects\/wolfssl_override.props', - - # checksrc control files - '\.checksrc$', + # imported, leave be + 'm4/ax_compile_check_sizeof.m4' => "", # an empty control file - "^zuul.d/playbooks/.zuul.ignore", - - # markdown linkchecker config - "mlc_config.json", - - # github template file - "^.github/ISSUE_TEMPLATE/bug_report.md", - - # License texts and REUSE-specific files - ".reuse/dep5", - "LICENSES/.*" - + "zuul.d/playbooks/.zuul.ignore" => "", ); sub scanfile { @@ -122,13 +85,18 @@ sub scanfile { } sub checkfile { - my ($file) = @_; + my ($file, $skipped, $pattern) = @_; my $fine = 0; @copyright=(); $spdx = 0; my $found = scanfile($file); if($found < 1) { + if($skipped) { + # just move on + $skips{$pattern}++; + return 0; + } if(!$found) { print "$file:1: missing copyright range\n"; return 2; @@ -138,6 +106,11 @@ sub checkfile { return 1; } if(!$spdx) { + if($skipped) { + # move on + $skips{$pattern}++; + return 0; + } print "$file:1: missing SPDX-License-Identifier\n"; return 2; } @@ -162,14 +135,56 @@ sub checkfile { $copyright[0]{year} != $commityear) { printf "$file:%d: copyright year out of date, should be $commityear, " . "is $copyright[0]{year}\n", - $copyright[0]{line}; + $copyright[0]{line} if(!$skipped || $verbose); + $skips{$pattern}++ if($skipped); } else { $fine = 1; } + if($skipped && $fine) { + print "$file:1: ignored superfluously by $pattern\n" if($verbose); + $superf{$pattern}++; + } + return $fine; } +sub dep5 { + my ($file) = @_; + my @files; + my $copy; + open(F, "<$file") || die "can't open $file"; + my $line = 0; + while() { + $line++; + if(/^Files: (.*)/i) { + push @files, `git ls-files $1`; + } + elsif(/^Copyright: (.*)/i) { + $copy = $1; + } + elsif(/^License: (.*)/i) { + my $license = $1; + for my $f (@files) { + chomp $f; + if($f =~ /\.gitignore\z/) { + # ignore .gitignore + } + else { + if($skiplist{$f}) { + print STDERR "$f already skipped at $skiplist{$f}\n"; + } + $skiplist{$f} = "dep5:$line"; + } + } + undef @files; + } + } + close(F); +} + +dep5(".reuse/dep5"); + my @all; my $verbose; if($ARGV[0] eq "-v") { @@ -182,22 +197,26 @@ if($ARGV[0]) { else { @all = `git ls-files`; } + for my $f (@all) { chomp $f; my $skipped = 0; - for my $skip (@skiplist) { - #print "$f matches $skip ?\n"; - if($f =~ /$skip/) { - $skiplisted++; - $skipped = 1; - #print "$f: SKIPPED ($skip)\n"; - last; - } + my $miss; + my $wro; + my $pattern; + if($skiplist{$f}) { + $pattern = $skip; + $skiplisted++; + $skipped = 1; } + + my $r = checkfile($f, $skipped, $pattern); + $mis=1 if($r == 2); + $wro=1 if(!$r); + if(!$skipped) { - my $r = checkfile($f); - $missing++ if($r == 2); - $wrong++ if(!$r); + $missing += $mis; + $wrong += $wro; } } @@ -205,6 +224,16 @@ if($verbose) { print STDERR "$missing files have no copyright\n" if($missing); print STDERR "$wrong files have wrong copyright year\n" if ($wrong); print STDERR "$skiplisted files are skipped\n" if ($skiplisted); + + for my $s (@skiplist) { + if(!$skips{$s}) { + printf ("Never skipped pattern: %s\n", $s); + } + if($superf{$s}) { + printf ("%s was skipped superfluously %u times and legitimately %u times\n", + $s, $superf{$s}, $skips{$s}); + } + } } exit 1 if($missing || $wrong);