curl/scripts/cd2nroff
Daniel Stenberg e3fe020089
docs/libcurl: generate PROTOCOLS from meta-data
Remove the PROTOCOLS section from the source files completely and
instead generate them based on the header data in the curldown files.

It also generates TLS backend information for options marked for TLS as
protocol.

Closes #13175
2024-03-23 18:13:03 +01:00

527 lines
13 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
=begin comment
Converts a curldown file to nroff (man page).
=end comment
=cut
use strict;
use warnings;
my $cd2nroff = "0.1"; # to keep check
my $dir;
my $extension;
my $keepfilename;
while(@ARGV) {
if($ARGV[0] eq "-d") {
shift @ARGV;
$dir = shift @ARGV;
}
elsif($ARGV[0] eq "-e") {
shift @ARGV;
$extension = shift @ARGV;
}
elsif($ARGV[0] eq "-k") {
shift @ARGV;
$keepfilename = 1;
}
elsif($ARGV[0] eq "-h") {
print <<HELP
Usage: cd2nroff [options] [file.md]
-d <dir> Write the output to the file name from the meta-data in the
specified directory, instead of writing to stdout
-e <ext> If -d is used, this option can provide an added "extension", arbitrary
text really, to append to the file name.
-h This help text,
-v Show version then exit
HELP
;
exit 0;
}
elsif($ARGV[0] eq "-v") {
print "cd2nroff version $cd2nroff\n";
exit 0;
}
else {
last;
}
}
use POSIX qw(strftime);
my @ts;
if (defined($ENV{SOURCE_DATE_EPOCH})) {
@ts = localtime($ENV{SOURCE_DATE_EPOCH});
} else {
@ts = localtime;
}
my $date = strftime "%B %d %Y", @ts;
sub outseealso {
my (@sa) = @_;
my $comma = 0;
my @o;
push @o, ".SH SEE ALSO\n";
for my $s (sort @sa) {
push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
$comma = 1;
}
push @o, "\n";
return @o;
}
sub outprotocols {
my (@p) = @_;
my $comma = 0;
my @o;
push @o, ".SH PROTOCOLS\n";
if($p[0] eq "TLS") {
push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
}
else {
my @s = sort @p;
for my $e (sort @s) {
push @o, sprintf "%s$e",
$comma ? (($e eq $s[-1]) ? " and " : ", "): "";
$comma = 1;
}
}
push @o, "\n";
return @o;
}
sub outtls {
my (@t) = @_;
my $comma = 0;
my @o;
if($t[0] eq "All") {
push @o, "\nAll TLS backends support this option.";
}
else {
push @o, "\nThis option works only with the following TLS backends:\n";
my @s = sort @t;
for my $e (@s) {
push @o, sprintf "%s$e",
$comma ? (($e eq $s[-1]) ? " and " : ", "): "";
$comma = 1;
}
}
push @o, "\n";
return @o;
}
my %knownprotos = (
'DICT' => 1,
'FILE' => 1,
'FTP' => 1,
'FTPS' => 1,
'GOPHER' => 1,
'GOPHERS' => 1,
'HTTP' => 1,
'HTTPS' => 1,
'IMAP' => 1,
'IMAPS' => 1,
'LDAP' => 1,
'LDAPS' => 1,
'MQTT' => 1,
'POP3' => 1,
'POP3S' => 1,
'RTMP' => 1,
'RTMPS' => 1,
'RTSP' => 1,
'SCP' => 1,
'SFTP' => 1,
'SMB' => 1,
'SMBS' => 1,
'SMTP' => 1,
'SMTPS' => 1,
'TELNET' => 1,
'TFTP' => 1,
'WS' => 1,
'WSS' => 1,
'TLS' => 1,
'TCP' => 1,
'All' => 1
);
my %knowntls = (
'BearSSL' => 1,
'GnuTLS' => 1,
'mbedTLS' => 1,
'OpenSSL' => 1,
'rustls' => 1,
'Schannel' => 1,
'Secure Transport' => 1,
'wolfSSL' => 1,
'All' => 1,
);
sub single {
my @seealso;
my @proto;
my @tls;
my $d;
my ($f)=@_;
my $copyright;
my $errors = 0;
my $fh;
my $line;
my $list;
my $tlslist;
my $section;
my $source;
my $spdx;
my $start = 0;
my $title;
if(defined($f)) {
if(!open($fh, "<:crlf", "$f")) {
print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
return 1;
}
}
else {
$f = "STDIN";
$fh = \*STDIN;
binmode($fh, ":crlf");
}
while(<$fh>) {
$line++;
if(!$start) {
if(/^---/) {
# header starts here
$start = 1;
}
next;
}
if(/^Title: *(.*)/i) {
$title=$1;
}
elsif(/^Section: *(.*)/i) {
$section=$1;
}
elsif(/^Source: *(.*)/i) {
$source=$1;
}
elsif(/^See-also: +(.*)/i) {
$list = 1; # 1 for see-also
push @seealso, $1;
}
elsif(/^See-also: */i) {
if($seealso[0]) {
print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
return 2;
}
$list = 1; # 1 for see-also
}
elsif(/^Protocol:/i) {
$list = 2; # 2 for protocol
}
elsif(/^TLS-backend:/i) {
$list = 3; # 3 for TLS backend
}
elsif(/^ +- (.*)/i) {
# the only lists we support are see-also and protocol
if($list == 1) {
push @seealso, $1;
}
elsif($list == 2) {
push @proto, $1;
}
elsif($list == 3) {
push @tls, $1;
}
else {
print STDERR "$f:$line:1:ERROR: list item without owner?\n";
return 2;
}
}
# REUSE-IgnoreStart
elsif(/^C: (.*)/i) {
$copyright=$1;
}
elsif(/^SPDX-License-Identifier: (.*)/i) {
$spdx=$1;
}
# REUSE-IgnoreEnd
elsif(/^---/) {
# end of the header section
if(!$title) {
print STDERR "ERROR: no 'Title:' in $f\n";
return 1;
}
if(!$section) {
print STDERR "ERROR: no 'Section:' in $f\n";
return 2;
}
if(!$seealso[0]) {
print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
return 2;
}
if(!$copyright) {
print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
return 2;
}
if(!$spdx) {
print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
return 2;
}
if($section == 3) {
if(!$proto[0]) {
printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
exit 2;
}
my $tls = 0;
for my $p (@proto) {
if($p eq "TLS") {
$tls = 1;
}
if(!$knownprotos{$p}) {
printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
exit 2;
}
}
# This is for TLS, require TLS-backend:
if($tls) {
if(!$tls[0]) {
printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
exit 2;
}
for my $t (@tls) {
if(!$knowntls{$t}) {
printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
exit 2;
}
}
}
}
last;
}
else {
chomp;
print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';"
}
}
if(!$start) {
print STDERR "$f:$line:1:ERROR: no header present\n";
return 2;
}
my @desc;
my $quote = 0;
my $blankline = 0;
my $header = 0;
# cut off the leading path from the file name, if any
$f =~ s/^(.*[\\\/])//;
push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
push @desc, ".TH $title $section \"$date\" $source\n";
while(<$fh>) {
$line++;
$d = $_;
if($quote) {
if($quote == 4) {
# remove the indentation
if($d =~ /^ (.*)/) {
push @desc, "$1\n";
next;
}
else {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
}
if(/^~~~/) {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
# convert single backslahes to doubles
$d =~ s/\\/\\\\/g;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
push @desc, $d;
next;
}
# remove single line HTML comments
$d =~ s/<!--.*?-->//g;
# **bold**
$d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
# *italics*
$d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
if($d =~ /[^\\][\<\>]/) {
print STDERR "$f:$line:1:WARN: un-escaped < or > used\n";
}
# convert backslash-'<' or '> to just the second character
$d =~ s/\\([<>])/$1/g;
# mentions of curl symbols with man pages use italics by default
$d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
# backticked becomes italics
$d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
if(/^## (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
# enclose in double quotes if there is a space present
if($word =~ / /) {
push @desc, ".IP \"$word\"\n";
}
else {
push @desc, ".IP $word\n";
}
$header = 1;
}
elsif(/^# (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'](.*)[\"\']\z/$1/;
if($word eq "PROTOCOLS") {
print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
}
elsif($word eq "EXAMPLE") {
# insert the generated PROTOCOLS section before EXAMPLE
push @desc, outprotocols(@proto);
if($proto[0] eq "TLS") {
push @desc, outtls(@tls);
}
}
push @desc, ".SH $word\n";
$header = 1;
}
elsif(/^~~~c/) {
# start of a code section, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^~~~/) {
# start of a quote section; not code, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^ (.*)/) {
# quoted, indented by 4 space
$quote = 4;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n$1\n";
}
elsif(/^[ \t]*\n/) {
# count and ignore blank lines
$blankline++;
}
else {
# don't output newlines if this is the first content after a
# header
push @desc, "\n" if($blankline && !$header);
$blankline = 0;
$header = 0;
# quote minuses in the output
$d =~ s/([^\\])-/$1\\-/g;
# replace single quotes
$d =~ s/\'/\\(aq/g;
# handle double quotes first on the line
$d =~ s/^(\s*)\"/$1\\&\"/;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
if($d =~ /^(.*) /) {
printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
length($1);
$errors++;
}
if($d =~ /^[ \t]*\n/) {
# replaced away all contents
$blankline= 1;
}
else {
push @desc, $d;
}
}
}
if($fh != \*STDIN) {
close($fh);
}
push @desc, outseealso(@seealso);
if($dir) {
if($keepfilename) {
$title = $f;
$title =~ s/\.[^.]*$//;
}
my $outfile = "$dir/$title.$section";
if(defined($extension)) {
$outfile .= $extension;
}
if(!open(O, ">", $outfile)) {
print STDERR "Failed to open $outfile : $!\n";
return 1;
}
print O @desc;
close(O);
}
else {
print @desc;
}
return $errors;
}
if(@ARGV) {
for my $f (@ARGV) {
my $r = single($f);
if($r) {
exit $r;
}
}
}
else {
exit single();
}