curl/scripts/cd2nroff
Daniel Stenberg eefcc1bda4
docs: introduce "curldown" for libcurl man page format
curldown is this new file format for libcurl man pages. It is markdown
inspired with differences:

- Each file has a set of leading headers with meta-data
- Supports a small subset of markdown
- Uses .md file extensions for editors/IDE/GitHub to treat them nicely
- Generates man pages very similar to the previous ones
- Generates man pages that still convert nicely to HTML on the website
- Detects and highlights mentions of curl symbols automatically (when
  their man page section is specified)

tools:

- cd2nroff: converts from curldown to nroff man page
- nroff2cd: convert an (old) nroff man page to curldown
- cdall: convert many nroff pages to curldown versions
- cd2cd: verifies and updates a curldown to latest curldown

This setup generates .3 versions of all the curldown versions at build time.

CI:

Since the documentation is now technically markdown in the eyes of many
things, the CI runs many more tests and checks on this documentation,
including proselint, link checkers and tests that make sure we capitalize the
first letter after a period...

Closes #12730
2024-01-23 00:29:02 +01:00

332 lines
8.7 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
=begin comment
Converts a curldown file to nroff (man page).
=end comment
=cut
my $cd2nroff = "0.1"; # to keep check
my $dir;
my $extension;
while(1) {
if($ARGV[0] eq "-d") {
shift @ARGV;
$dir = shift @ARGV;
}
elsif($ARGV[0] eq "-e") {
shift @ARGV;
$extension = shift @ARGV;
}
elsif($ARGV[0] eq "-h") {
print <<HELP
Usage: cd2nroff [options] [file.md]
-d <dir> Write the output to the file name from the meta-data in the
specified directory, instead of writing to stdout
-e <ext> If -d is used, this option can provide an added "extension", arbitrary
text really, to append to the file name.
-h This help text,
-v Show version then exit
HELP
;
exit 0;
}
elsif($ARGV[0] eq "-v") {
print "cd2nroff version $cd2nroff\n";
exit 0;
}
else {
last;
}
}
use POSIX qw(strftime);
my @ts;
if (defined($ENV{SOURCE_DATE_EPOCH})) {
@ts = localtime($ENV{SOURCE_DATE_EPOCH});
} else {
@ts = localtime;
}
my $date = strftime "%B %d %Y", @ts;
sub outseealso {
my (@sa) = @_;
my $comma = 0;
my @o;
push @o, ".SH SEE ALSO\n";
for my $s (sort @sa) {
push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
$comma = 1;
}
push @o, "\n";
return @o;
}
sub single {
my @seealso;
my ($f)=@_;
my $title;
my $section;
my $source;
my $start = 0;
my $errors;
my $fh;
if($f) {
open($fh, "<:crlf", "$f") || return 1;
}
else {
$fh = STDIN;
}
while(<$fh>) {
$line++;
if(!$start) {
if(/^---/) {
# header starts here
$start = 1;
}
next;
}
if(/^Title: *(.*)/i) {
$title=$1;
}
elsif(/^Section: *(.*)/i) {
$section=$1;
}
elsif(/^Source: *(.*)/i) {
$source=$1;
}
elsif(/^See-also: +(.*)/i) {
$salist = 0;
push @seealso, $1;
}
elsif(/^See-also: */i) {
if($seealso[0]) {
print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
return 2;
}
$salist = 1;
}
elsif(/^ +- (.*)/i) {
# the only list we support is the see-also
if($salist) {
push @seealso, $1;
}
}
# REUSE-IgnoreStart
elsif(/^C: (.*)/i) {
$copyright=$1;
}
elsif(/^SPDX-License-Identifier: (.*)/i) {
$spdx=$1;
}
# REUSE-IgnoreEnd
elsif(/^---/) {
# end of the header section
if(!$title) {
print STDERR "ERROR: no 'Title:' in $f\n";
return 1;
}
if(!$section) {
print STDERR "ERROR: no 'Section:' in $f\n";
return 2;
}
if(!$seealso[0]) {
print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
return 2;
}
if(!$copyright) {
print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
return 2;
}
if(!$spdx) {
print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
return 2;
}
last;
}
else {
chomp;
print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';"
}
}
if(!$start) {
print STDERR "$f:$line:1:ERROR: no header present\n";
return 2;
}
my @desc;
my $quote = 0;
my $blankline = 0;
my $header = 0;
# cut off the leading path from the file name, if any
$f =~ s/^(.*[\\\/])//;
push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
push @desc, ".TH $title $section \"$date\" $source\n";
while(<$fh>) {
$line++;
$d = $_;
if($quote) {
if($quote == 4) {
# remove the indentation
if($d =~ /^ (.*)/) {
push @desc, "$1\n";
next;
}
else {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
}
if(/^~~~/) {
# end of quote
$quote = 0;
push @desc, ".fi\n";
next;
}
# convert single backslahes to doubles
$d =~ s/\\/\\\\/g;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
push @desc, $d;
next;
}
# **bold**
$d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
# *italics*
$d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
# mentions of curl symbols with man pages use italics by default
$d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
# backticked becomes italics
$d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
if(/^## (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'](.*)[\"\']\z/$1/;
# enclose in double quotes if there is a space present
if($word =~ / /) {
push @desc, ".IP \"$word\"\n";
}
else {
push @desc, ".IP $word\n";
}
$header = 1;
}
elsif(/^# (.*)/) {
my $word = $1;
# if there are enclosing quotes, remove them first
$word =~ s/[\"\'](.*)[\"\']\z/$1/;
push @desc, ".SH $word\n";
$header = 1;
}
elsif(/^~~~c/) {
# start of a code section, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^~~~/) {
# start of a quote section; not code, not indented
$quote = 1;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n";
}
elsif(/^ (.*)/) {
# quoted, indented by 4 space
$quote = 4;
push @desc, "\n" if($blankline && !$header);
$header = 0;
push @desc, ".nf\n$1\n";
}
elsif(/^[ \t]*\n/) {
# count and ignore blank lines
$blankline++;
}
else {
# don't output newlines if this is the first content after a
# header
push @desc, "\n" if($blankline && !$header);
$blankline = 0;
$header = 0;
# remove single line HTML comments
$d =~ s/<!--.*?-->//g;
# quote minuses in the output
$d =~ s/([^\\])-/$1\\-/g;
# replace single quotes
$d =~ s/\'/\\(aq/g;
# handle double quotes first on the line
$d =~ s/^(\s*)\"/$1\\&\"/;
# lines starting with a period needs it escaped
$d =~ s/^\./\\&./;
if($d =~ /^(.*) /) {
printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
length($1);
$errors++;
}
if($d =~ /^[ \t]*\n/) {
# replaced away all contents
$blankline= 1;
}
else {
push @desc, $d;
}
}
}
close($fh);
push @desc, outseealso(@seealso);
if($dir) {
open(O, ">$dir/$title.$section$extension");
print O @desc;
close(O);
}
else {
print @desc;
}
return $errors;
}
exit single($ARGV[0]);