curl/scripts/cd2nroff
Daniel Stenberg 69b6c10568
cd2nroff: convert two warnings to errors
Since the warnings tend to get missed too easily and these are problems
we rather want addressed than letting slide.

Closes #13929
2024-06-12 13:19:00 +02:00

534 lines
14 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
=begin comment
Converts a curldown file to nroff (man page).
=end comment
=cut
use strict;
use warnings;
my $cd2nroff = "0.1"; # to keep check
my $dir;
my $extension;
my $keepfilename;
while(@ARGV) {
if($ARGV[0] eq "-d") {
shift @ARGV;
$dir = shift @ARGV;
}
elsif($ARGV[0] eq "-e") {
shift @ARGV;
$extension = shift @ARGV;
}
elsif($ARGV[0] eq "-k") {
shift @ARGV;
$keepfilename = 1;
}
elsif($ARGV[0] eq "-h") {
print <<HELP
Usage: cd2nroff [options] [file.md]
-d <dir> Write the output to the file name from the meta-data in the
specified directory, instead of writing to stdout
-e <ext> If -d is used, this option can provide an added "extension", arbitrary
text really, to append to the file name.
-h This help text,
-v Show version then exit
HELP
;
exit 0;
}
elsif($ARGV[0] eq "-v") {
print "cd2nroff version $cd2nroff\n";
exit 0;
}
else {
last;
}
}
use POSIX qw(strftime);
my @ts;
if (defined($ENV{SOURCE_DATE_EPOCH})) {
@ts = gmtime($ENV{SOURCE_DATE_EPOCH});
} else {
@ts = localtime;
}
my $date = strftime "%Y-%m-%d", @ts;
sub outseealso {
my (@sa) = @_;
my $comma = 0;
my @o;
push @o, ".SH SEE ALSO\n";
for my $s (sort @sa) {
push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
$comma = 1;
}
push @o, "\n";
return @o;
}
sub outprotocols {
my (@p) = @_;
my $comma = 0;
my @o;
push @o, ".SH PROTOCOLS\n";
if($p[0] eq "TLS") {
push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
}
else {
my @s = sort @p;
for my $e (sort @s) {
push @o, sprintf "%s$e",
$comma ? (($e eq $s[-1]) ? " and " : ", "): "";
$comma = 1;
}
}
push @o, "\n";
return @o;
}
sub outtls {
my (@t) = @_;
my $comma = 0;
my @o;
if($t[0] eq "All") {
push @o, "\nAll TLS backends support this option.";
}
else {
push @o, "\nThis option works only with the following TLS backends:\n";
my @s = sort @t;
for my $e (@s) {
push @o, sprintf "%s$e",
$comma ? (($e eq $s[-1]) ? " and " : ", "): "";
$comma = 1;
}
}
push @o, "\n";
return @o;
}
my %knownprotos = (
'DICT' => 1,
'FILE' => 1,
'FTP' => 1,
'FTPS' => 1,
'GOPHER' => 1,
'GOPHERS' => 1,
'HTTP' => 1,
'HTTPS' => 1,
'IMAP' => 1,
'IMAPS' => 1,
'LDAP' => 1,
'LDAPS' => 1,
'MQTT' => 1,
'POP3' => 1,
'POP3S' => 1,
'RTMP' => 1,
'RTMPS' => 1,
'RTSP' => 1,
'SCP' => 1,
'SFTP' => 1,
'SMB' => 1,
'SMBS' => 1,
'SMTP' => 1,
'SMTPS' => 1,
'TELNET' => 1,
'TFTP' => 1,
'WS' => 1,
'WSS' => 1,
'TLS' => 1,
'TCP' => 1,
'All' => 1
);
my %knowntls = (
'BearSSL' => 1,
'GnuTLS' => 1,
'mbedTLS' => 1,
'OpenSSL' => 1,
'rustls' => 1,
'Schannel' => 1,
'Secure Transport' => 1,
'wolfSSL' => 1,
'All' => 1,
);
sub single {
my @seealso;
my @proto;
my @tls;
my $d;
my ($f)=@_;
my $copyright;
my $errors = 0;
my $fh;
my $line;
my $list;
my $tlslist;
my $section;
my $source;
my $spdx;
my $start = 0;
my $title;
if(defined($f)) {
if(!open($fh, "<:crlf", "$f")) {
print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
return 1;
}
}
else {
$f = "STDIN";
$fh = \*STDIN;
binmode($fh, ":crlf");
}
while(<$fh>) {
$line++;
if(!$start) {
if(/^---/) {
# header starts here
$start = 1;
}
next;
}
if(/^Title: *(.*)/i) {
$title=$1;
}
elsif(/^Section: *(.*)/i) {
$section=$1;
}
elsif(/^Source: *(.*)/i) {
$source=$1;
}
elsif(/^See-also: +(.*)/i) {
$list = 1; # 1 for see-also
push @seealso, $1;
}
elsif(/^See-also: */i) {
if($seealso[0]) {
print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
return 2;
}
$list = 1; # 1 for see-also
}
elsif(/^Protocol:/i) {
$list = 2; # 2 for protocol
}
elsif(/^TLS-backend:/i) {
$list = 3; # 3 for TLS backend
}
elsif(/^ +- (.*)/i) {
# the only lists we support are see-also and protocol
if($list == 1) {
push @seealso, $1;
}
elsif($list == 2) {
push @proto, $1;
}
elsif($list == 3) {
push @tls, $1;
}
else {
print STDERR "$f:$line:1:ERROR: list item without owner?\n";
return 2;
}
}
# REUSE-IgnoreStart
elsif(/^C: (.*)/i) {
$copyright=$1;
}
elsif(/^SPDX-License-Identifier: (.*)/i) {
$spdx=$1;
}
# REUSE-IgnoreEnd
elsif(/^---/) {
# end of the header section
if(!$title) {
print STDERR "ERROR: no 'Title:' in $f\n";
return 1;
}
if(!$section) {
print STDERR "ERROR: no 'Section:' in $f\n";
return 2;
}
if(!$seealso[0]) {
print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
return 2;
}
if(!$copyright) {
print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
return 2;
}
if(!$spdx) {
print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
return 2;
}
if($section == 3) {
if(!$proto[0]) {
printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
exit 2;
}
my $tls = 0;
for my $p (@proto) {
if($p eq "TLS") {
$tls = 1;
}
if(!$knownprotos{$p}) {
printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
exit 2;
}
}
# This is for TLS, require TLS-backend:
if($tls) {
if(!$tls[0]) {
printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
exit 2;
}
for my $t (@tls) {
if(!$knowntls{$t}) {
printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
exit 2;
}
}
}
}
last;
}
else {
chomp;
print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n";
$errors++;
}
}
if(!$start) {
print STDERR "$f:$line:1:ERROR: no header present\n";
return 2;
}
my @desc;
my $quote = 0;
my $blankline = 0;
my $header = 0;
# cut off the leading path from the file name, if any
$f =~ s/^(.*[\\\/])//;
push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
push @desc, ".TH $title $section \"$date\" $source\n";
while(<$fh>) {
$line++;
$d = $_;
if($quote) {
if($quote == 4) {
# remove the indentation
if($d =~ /^ (.*)/) {
push @desc, "$1\n";
next;
}
else {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
}
if(/^~~~/) {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
# convert single backslahes to doubles
$d =~ s/\\/\\\\/g;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
push @desc, $d;
next;
}
# remove single line HTML comments
$d =~ s/<!--.*?-->//g;
# **bold**
$d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
# *italics*
$d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
if($d =~ /[^\\][\<\>]/) {
print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n";
$errors++;
}
# convert backslash-'<' or '> to just the second character
$d =~ s/\\([<>])/$1/g;
# mentions of curl symbols with man pages use italics by default
$d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
# backticked becomes italics
$d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
if(/^## (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
# enclose in double quotes if there is a space present
if($word =~ / /) {
push @desc, ".IP \"$word\"\n";
}
else {
push @desc, ".IP $word\n";
}
$header = 1;
}
elsif(/^##/) {
# end of IP sequence
push @desc, ".PP\n";
$header = 1;
}
elsif(/^# (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'](.*)[\"\']\z/$1/;
if($word eq "PROTOCOLS") {
print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
}
elsif($word eq "EXAMPLE") {
# insert the generated PROTOCOLS section before EXAMPLE
push @desc, outprotocols(@proto);
if($proto[0] eq "TLS") {
push @desc, outtls(@tls);
}
}
push @desc, ".SH $word\n";
$header = 1;
}
elsif(/^~~~c/) {
# start of a code section, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^~~~/) {
# start of a quote section; not code, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^ (.*)/) {
# quoted, indented by 4 space
$quote = 4;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n$1\n";
}
elsif(/^[ \t]*\n/) {
# count and ignore blank lines
$blankline++;
}
else {
# don't output newlines if this is the first content after a
# header
push @desc, "\n" if($blankline && !$header);
$blankline = 0;
$header = 0;
# quote minuses in the output
$d =~ s/([^\\])-/$1\\-/g;
# replace single quotes
$d =~ s/\'/\\(aq/g;
# handle double quotes first on the line
$d =~ s/^(\s*)\"/$1\\&\"/;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
if($d =~ /^(.*) /) {
printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
length($1);
$errors++;
}
if($d =~ /^[ \t]*\n/) {
# replaced away all contents
$blankline= 1;
}
else {
push @desc, $d;
}
}
}
if($fh != \*STDIN) {
close($fh);
}
push @desc, outseealso(@seealso);
if($dir) {
if($keepfilename) {
$title = $f;
$title =~ s/\.[^.]*$//;
}
my $outfile = "$dir/$title.$section";
if(defined($extension)) {
$outfile .= $extension;
}
if(!open(O, ">", $outfile)) {
print STDERR "Failed to open $outfile : $!\n";
return 1;
}
print O @desc;
close(O);
}
else {
print @desc;
}
return $errors;
}
if(@ARGV) {
for my $f (@ARGV) {
my $r = single($f);
if($r) {
exit $r;
}
}
}
else {
exit single();
}