curl/scripts/copyright.pl
Daniel Stenberg 86d33001e4
reuse: add copyright + license info to individual docs/*.md files
Instead of use 'docs/*.md' in dep5. For clarity and avoiding a wide-
matching wildcard.

+ Remove mention of old files from .reuse/dep5
+ add info to .github/dependabot.yml
+ make scripts/copyright.pl warn on non-matching patterns

Closes #13245
2024-03-31 12:01:18 +02:00

234 lines
5.7 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
#
# Invoke script in the root of the git checkout. Scans all files in git unless
# given a specific single file.
#
# Usage: copyright.pl [file]
#
my %skips;
# file names
my %skiplist = (
# REUSE-specific file
".reuse/dep5" => "<built-in>",
# License texts
"LICENSES/BSD-3-Clause.txt" => "<built-in>",
"LICENSES/BSD-4-Clause-UC.txt" => "<built-in>",
"LICENSES/ISC.txt" => "<built-in>",
"LICENSES/curl.txt" => "<built-in>",
"COPYING" => "<built-in>",
);
sub scanfile {
my ($f) = @_;
my $line=1;
my $found = 0;
open(F, "<$f") || return -1;
while (<F>) {
chomp;
my $l = $_;
# check for a copyright statement and save the years
if($l =~ /.* ?copyright .* (\d\d\d\d|)/i) {
my $count = 0;
while($l =~ /([\d]{4})/g) {
push @copyright, {
year => $1,
line => $line,
col => index($l, $1),
code => $l
};
$count++;
}
if(!$count) {
# year-less
push @copyright, {
year => -1,
line => $line,
col => index($l, $1),
code => $l
};
$count++;
}
$found = $count;
}
if($l =~ /SPDX-License-Identifier:/) {
$spdx = 1;
}
# allow within the first 100 lines
if(++$line > 100) {
last;
}
}
close(F);
return $found;
}
sub checkfile {
my ($file, $skipped, $pattern) = @_;
$spdx = 0;
my $found = scanfile($file);
if($found < 1) {
if($skipped) {
# just move on
$skips{$pattern}++;
return 0;
}
if(!$found) {
print "$file:1: missing copyright range\n";
return 2;
}
# this means the file couldn't open - it might not exist, consider
# that fine
return 1;
}
if(!$spdx) {
if($skipped) {
# move on
$skips{$pattern}++;
return 0;
}
print "$file:1: missing SPDX-License-Identifier\n";
return 2;
}
if($skipped) {
print "$file:1: ignored superfluously by $pattern\n" if($verbose);
$superf{$pattern}++;
}
return 1;
}
sub dep5 {
my ($file) = @_;
my @files;
my $copy;
open(F, "<$file") || die "can't open $file";
my $line = 0;
while(<F>) {
$line++;
if(/^Files: (.*)/i) {
my @all = `git ls-files $1`;
if(!$all[0]) {
print STDERR "$1 matches no files\n";
}
else {
push @files, @all;
}
}
elsif(/^Copyright: (.*)/i) {
$copy = $1;
}
elsif(/^License: (.*)/i) {
my $license = $1;
for my $f (@files) {
chomp $f;
if($f =~ /\.gitignore\z/) {
# ignore .gitignore
}
else {
if($skiplist{$f}) {
print STDERR "$f already skipped at $skiplist{$f}\n";
}
$skiplist{$f} = "dep5:$line";
}
}
undef @files;
}
}
close(F);
}
dep5(".reuse/dep5");
my $checkall = 0;
my @all;
my $verbose;
if($ARGV[0] eq "-v") {
$verbose = 1;
shift @ARGV;
}
if($ARGV[0]) {
push @all, @ARGV;
}
else {
@all = `git ls-files`;
$checkall = 1;
}
for my $f (@all) {
chomp $f;
my $skipped = 0;
my $miss;
my $wro;
my $pattern;
if($skiplist{$f}) {
$pattern = $skip;
$skiplisted++;
$skipped = 1;
$skip{$f}++;
}
my $r = checkfile($f, $skipped, $pattern);
$mis=1 if($r == 2);
$wro=1 if(!$r);
if(!$skipped) {
$missing += $mis;
$wrong += $wro;
}
}
if($verbose) {
print STDERR "$missing files have no copyright\n" if($missing);
print STDERR "$wrong files have wrong copyright year\n" if ($wrong);
print STDERR "$skiplisted files are skipped\n" if ($skiplisted);
for my $s (@skiplist) {
if(!$skips{$s}) {
printf ("Never skipped pattern: %s\n", $s);
}
if($superf{$s}) {
printf ("%s was skipped superfluously %u times and legitimately %u times\n",
$s, $superf{$s}, $skips{$s});
}
}
}
if($checkall) {
for(keys %skiplist) {
if(!$skip{$_}) {
printf STDERR "$_ is marked for SKIP but is missing!\n";
}
}
}
exit 1 if($missing || $wrong);