GHA: spellcheck

This spellchecker checks markdown files. For this reason this job
converts all man pages in the repository to markdown with pandoc before
the check runs.

The perl script 'cleanspell' filters out details from the man page in
the process, to avoid the spellchecker trying to spellcheck things it
can't. Like curl specific symbols and the SYNOPSIS and EXAMPLE sections
of libcurl man pages.

The spell checker does not check words in sections that are within pre,
strong and em tags.

'spellcheck.words' is a custom word list with additional accepted words.

Closes #9523
This commit is contained in:
Daniel Stenberg 2022-09-20 23:30:02 +02:00
parent 6267244161
commit 72c41f7c8b
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
4 changed files with 1025 additions and 0 deletions

79
.github/scripts/cleanspell.pl vendored Executable file
View File

@ -0,0 +1,79 @@
#!/usr/bin/perl
# Copyright (C) 2022 Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
#
# Input: a libcurl nroff man page
# Output: the same file, minus the SYNOPSIS and the EXAMPLE sections
#
my $f = $ARGV[0];
my $o = $ARGV[1];
open(F, "<$f") or die;
open(O, ">$o") or die;
my $ignore = 0;
while(<F>) {
if($_ =~ /^.SH (SYNOPSIS|EXAMPLE|\"SEE ALSO\"|SEE ALSO)/) {
$ignore = 1;
}
elsif($ignore && ($_ =~ /^.SH/)) {
$ignore = 0;
}
elsif(!$ignore) {
# filter out mentioned CURLE_ names
$_ =~ s/CURL(M|SH|U|H)code//g;
$_ =~ s/CURL_(READ|WRITE)FUNC_[A-Z0-9_]*//g;
$_ =~ s/CURL_CSELECT_[A-Z0-9_]*//g;
$_ =~ s/CURL_DISABLE_[A-Z0-9_]*//g;
$_ =~ s/CURL_FORMADD_[A-Z0-9_]*//g;
$_ =~ s/CURL_HET_DEFAULT//g;
$_ =~ s/CURL_IPRESOLVE_[A-Z0-9_]*//g;
$_ =~ s/CURL_PROGRESSFUNC_CONTINUE//g;
$_ =~ s/CURL_REDIR_[A-Z0-9_]*//g;
$_ =~ s/CURL_RTSPREQ_[A-Z0-9_]*//g;
$_ =~ s/CURL_TIMECOND_[A-Z0-9_]*//g;
$_ =~ s/CURL_VERSION_[A-Z0-9_]*//g;
$_ =~ s/CURLALTSVC_[A-Z0-9_]*//g;
$_ =~ s/CURLAUTH_[A-Z0-9_]*//g;
$_ =~ s/CURLE_[A-Z0-9_]*//g;
$_ =~ s/CURLFORM_[A-Z0-9_]*//g;
$_ =~ s/CURLFTP_[A-Z0-9_]*//g;
$_ =~ s/CURLFTPAUTH_[A-Z0-9_]*//g;
$_ =~ s/CURLFTPMETHOD_[A-Z0-9_]*//g;
$_ =~ s/CURLFTPSSL_[A-Z0-9_]*//g;
$_ =~ s/CURLGSSAPI_[A-Z0-9_]*//g;
$_ =~ s/CURLHEADER_[A-Z0-9_]*//g;
$_ =~ s/CURLINFO_[A-Z0-9_]*//g;
$_ =~ s/CURLM_[A-Z0-9_]*//g;
$_ =~ s/CURLMIMEOPT_[A-Z0-9_]*//g;
$_ =~ s/CURLMOPT_[A-Z0-9_]*//g;
$_ =~ s/CURLOPT_[A-Z0-9_]*//g;
$_ =~ s/CURLPIPE_[A-Z0-9_]*//g;
$_ =~ s/CURLPROTO_[A-Z0-9_]*//g;
$_ =~ s/CURLPROXY_[A-Z0-9_]*//g;
$_ =~ s/CURLPX_[A-Z0-9_]*//g;
$_ =~ s/CURLSHE_[A-Z0-9_]*//g;
$_ =~ s/CURLSHOPT_[A-Z0-9_]*//g;
$_ =~ s/CURLSSH_[A-Z0-9_]*//g;
$_ =~ s/CURLSSLBACKEND_[A-Z0-9_]*//g;
$_ =~ s/CURLU_[A-Z0-9_]*//g;
$_ =~ s/CURLUE_[A-Z0-9_]*//g;
$_ =~ s/CURLUPART_[A-Z0-9_]*//g;
$_ =~ s/CURLUSESSL_[A-Z0-9_]*//g;
$_ =~ s/curl_global_(init_mem|sslset|cleanup)//g;
$_ =~ s/curl_(strequal|strnequal|formadd|waitfd|formget|getdate|formfree)//g;
$_ =~ s/curl_easy_(nextheader|duphandle)//g;
$_ =~ s/curl_multi_fdset//g;
$_ =~ s/curl_mime_(subparts|addpart|filedata|data_cb)//g;
$_ =~ s/curl_ws_(send|recv|meta)//g;
$_ =~ s/curl_url_(dup)//g;
$_ =~ s/libcurl-env//g;
$_ =~ s/(^|\W)((tftp|https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&'()*+,;=]+)//gi;
print O $_;
}
}
close(F);
close(O);

864
.github/scripts/spellcheck.words vendored Normal file
View File

@ -0,0 +1,864 @@
# Copyright (C) 2022 Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
#
ABI
accessor
ACK
AES
AIA
AIX
al
Alessandro
allocator
alnum
ALPN
Altera
ALTSVC
amiga
AmigaOS
AmiSSL
anyauth
anycast
API
APIs
APOP
AppVeyor
archivers
Archos
Arntsen
Aros
ascii
asynch
AsynchDNS
atime
auth
autobuild
autobuilds
Autoconf
Automake
Autotools
autotools
AVR
axTLS
backend
backends
backoff
backticks
Baratov
basename
bashrc
BDFL
BearSSL
Benoit
BeOS
bitmask
bitwise
Björn
Bjørn
bool
boolean
BoringSSL
boringssl
Boukris
Broadcom
brotli
bufref
bugfix
bugfixes
buildable
buildbot
buildconf
Caddy
calloc
CAPA
CCC
CDN
CentOS
CFLAGS
CGI's
CHACHA
chacha
Chaffraix
changelog
changeset
CharConv
charset
charsets
checksrc
checksums
chgrp
chmod
chown
ChromeOS
CI's
CIFS
CLA
CLAs
cleartext
CLI
clientp
cliget
closesocket
CMake
cmake
cmake's
CMakeLists
CodeQL
codeql
CODESET
codeset
Comcast
Config
config
conncache
connectdata
CookieInfo
Coverity
CPUs
CR
CRL
CRLF
crt
crypto
cryptographic
cryptographically
CSEQ
CSeq
csh
cshrc
CTRL
cURL
CURLcode
CURLE
CURLH
curlimages
curlrc
customizable
CVE
CVSS
CWD
CWE
cyassl
Cygwin
daniel
datatracker
decrypt
deepcode
DELE
DER
deselectable
destructor
detections
dev
devcpp
DevOps
devtools
DHCP
dir
distro
distro's
distros
dlist
DLL
dll
DLLs
DNS
dns
dnsop
DoH
doxygen
drftpd
dsa
Dudka
Dymond
dynbuf
EAGAIN
EBCDIC
ECC
ECDHE
ECH
eCOS
EFnet
EGD
EHLO
EINTR
else's
encodings
enctype
endianness
Engler
enum
epoll
EPRT
EPSV
ERRNO
errno
ESNI
et
etag
ETag
ETags
exe
executables
EXPN
extensibility
failsafe
Falkeborn
Fandrich
Fastly
fcpp
Feltzing
ffi
filesize
filesystem
FLOSS
fnmatch
formpost
formposts
Fortnite
FOSS
FPL
fread
FreeBSD
FreeDOS
FreeRTOS
freshmeat
Frexx
fseek
FTPing
fuzzer
fwrite
Garmin
gcc
GCM
gdb
Genode
Gergely
getaddrinfo
getenv
gethostbyname
gethostname
Getinfo
getinfo
GETing
getpwuid
ggcov
Ghedini
Gisle
github
Glesys
globbing
gmail
GnuTLS
gnutls
Golemon
GOST
GPG
GPL
GPLed
Greear
groff
GSKit
gskit
GSS
GSSAPI
Guenter
Gunderson
Gustafsson
gzip
Gzipped
gzipped
HackerOne
HackerOne's
HAProxy
HardenedBSD
Hards
Haxx
haxx
Heimdal
HELO
HH
HMAC
Hoersken
Holme
homebrew
hostname
hostnames
Housley
Hruska
HSTS
hsts
HTC
html
http
HTTPAUTH
httpget
HttpGet
HTTPS
https
hyper's
Högskolan
IANA
Icecast
ICONV
iconv
IDN
IDNA
IETF
ietf
ifdef
ifdefed
Ifdefs
ifdefs
ILE
Illumos
IMAP
imap
IMAPS
imaps
impacket
init
initializer
inlined
interoperable
interoperates
IoT
ipadOS
IPCXN
IPv
IRIs
IRIX
Itanium
iX
Jakub
Jiri
jo
jpeg
jq
JSON
json
Julien
Kamil
Kaufmann
kB
KDE
keepalive
Keil
kerberos
Keychain
KiB
kickstart
Kirei
Knauf
kqueue
Krb
krb
Kubernetes
Kuhrt
Kungliga
Largefile
LDAP
ldap
LDAPS
ldaps
LF
LGTM
libbrotlidec
libc
libcurl
libcurl's
libcurls
libera
libev
libevent
libgsasl
libidn
libnssckbi
libnsspem
libpsl
Libre
libre
LibreSSL
libressl
librtmp
libs
libssh
libSSH
libssh2
Libtool
libuv
libWebSocket
libz
libzstd
LineageOS
linux
ln
localhost
logfile
lookups
loopback
LPRT
LSB
lseek
Lua
lwIP
macdef
macOS
macos
Makefile
makefiles
malloc
mallocs
maprintf
Marek
Mavrogiannopoulos
Mbed
mbedTLS
Meglio
memdebug
MesaLink
mesalink
Metalink
mfprintf
Michal
Micrium
MicroBlaze
MicroOS
mingw
MingW
MINIX
misconfigured
Mishyn
mitigations
MITM
mk
mkdir
mktime
Monnerat
monospace
MorphOS
MPE
MPL
mprintf
MQTT
mqtt
mqtts
MSB
MSGSENT
msh
MSIE
msnprintf
msprintf
msquic
mstate
MSVC
MSYS
msys
mtime
mTLS
MUA
multicwd
multiparts
MultiSSL
mumbo
musedev
mutex
mvaprintf
mvfprintf
mvprintf
mvsnprintf
mvsprintf
MX
Nagel
Nagle
NAMELOOKUP
Natively
NATs
nc
NCR
NDK
NEC
Necko
NetBSD
netrc
Netware
NFS
nghttp
nghttpx
ngtcp
Nikos
Nios
nitems
NixOS
NLST
nmake
nmemb
nocwd
NODELAY
NonStop
NOOP
Novell
NPN
nroff
nslookup
NSS
nss
NTLM
NTLMUSER
NTLMv
NUM
NuttX
OAuth
objcopy
OCSP
Ok
OpenBSD
OpenLDAP
OpenRISC
OpenSSF
OpenSSF's
OpenSSH
OpenSSL
OpenStep
openSUSE
openwall
Orbis
ORing
Osipov
OSS
pac
pacman
parser's
parsers
PASE
PASV
PEM
pem
perl
permafailing
PINGs
pipelining
PKCS
PKGBUILD
PKI
pluggable
PolarSSL
Polhem
pollset
POSIX
Postfix
POSTing
POSTs
PowerShell
pre
prebuilt
precompiled
prepend
prepended
prepending
prepends
preprocess
preprocessed
Preprocessing
preprocessor
Prereq
PRET
pretransfer
printf
printf's
PSL
pthreads
PTR
ptr
py
pycurl
QNX
QoS
Qubes
QUIC
quictls
quicwg
Raad
radix
RAS
RBS
ReactOS
README
realloc
rebase
RECV
recv
Redhat
redirections
redirs
redistributable
Redox
reentrant
Referer
referer
reinitializes
Relatedly
repo
reprioritized
resending
RETR
retransmit
retrigger
RICS
Rikard
rmdir
ROADMAP
Roadmap
Rockbox
roffit
RPG
RSA
RTMP
rtmp
RTMPE
RTMPS
RTMPT
RTMPTE
RTMPTS
RTOS
RTP
RTSP
rtsp
RTT
runtests
runtime
Ruslan
rustc
rustls
Sagula
SanDisk
SAS
SASL
Satiro
Schannel
Schindelin
SCO
SCP
scp
SDK
se
SEB
SEK
selectable
Serv
setopt
setsockopt
setuid
SFTP
sftp
sha
SHOUTcast
SIGALRM
SIGCHLD
SIGPIPE
singlecwd
SINIX
Sintonen
sizeof
SLE
slist
sln
SMB
smb
SMBS
smbs
SMBv
SMTP
smtp
smtps
SMTPS
SNI
socketopen
socketpair
sockopt
SOCKOPT
SOCKSv
Solaris
SONAME
Soref
SPARC
SPDX
SPNEGO
Spotify
sprintf
src
SRP
SRWLOCK
SSL
ssl
SSLeay
SSLKEYLOGFILE
sslv
SSLv
SSLVERSION
SSPI
stackoverflow
STARTTLS
STARTTRANSFER
stateful
statvfs
stderr
stdin
stdout
Steinar
Stenberg
STOR
strcat
strcpy
strdup
strerror
strlen
strncat
struct
structs
Structs
stunnel
subdirectories
subdirectory
submitters
substring
substrings
SunOS
SunSSH
superset
svc
svcb
Svyatoslav
Swisscom
sws
Symbian
symlink
symlinks
syntaxes
Szakats
TABs
Tatsuhiro
TBD
TCP
tcpdump
Tekniska
testability
TFTP
tftp
Tizen
TLS
tlsv
TLSv
TODO
Tomtom
toolchain
toolchains
toolset
TPF
TrackMemory
Tru
Tse
Tsujikawa
TTL
tvOS
txt
typedef
typedefed
ucLinux
UDP
UI
UID
UIDL
Ultrix
Unary
unassign
uncompress
unencrypted
unescape
UNICOS
unix
UnixSockets
UnixWare
unlink
unpause
unpaused
unpauses
unpausing
unsanitized
Unshare
unsharing
untrusted
UPN
upstreaming
URI
URIs
url
URL's
urlencoded
urlget
USD
userdata
Userinfo
userinfo
UTF
UX
valgrind
Vanem
vararg
VC
vcpkg
vexxhost
Viktor
VM
VMS
VMware
VSE
vsprintf
vt
vtls
vxWorks
wakeup
Warta
watchOS
WAV
WB
web page
WebOS
WebSocket
WEBSOCKET
WHATWG
whitespace
Whitespaces
winbind
WinBind
winbuild
winidn
WinIDN
WinLDAP
WinSock
winsock
WinSSL
winssl
Wireshark
wolfSSH
wolfSSL
WS
WSS
www
Xbox
xdigit
Xilinx
XP
Xtensa
XYZ
Youtube
YYYY
YYYYMMDD
Zakrzewski
Zitzmann
zlib
zsh
zstd
Zuul
zuul

32
.github/scripts/spellcheck.yaml vendored Normal file
View File

@ -0,0 +1,32 @@
# Copyright (C) 2022 Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
#
# Docs: https://github.com/UnicornGlobal/spellcheck-github-actions
matrix:
- name: Markdown
expect_match: false
apsell:
mode: en
dictionary:
wordlists:
- wordlist.txt
output: wordlist.dic
encoding: utf-8
pipeline:
- pyspelling.filters.markdown:
markdown_extensions:
- markdown.extensions.extra:
- pyspelling.filters.html:
comments: true
attributes:
- title
- alt
ignores:
- ':matches(code, pre)'
- 'code'
- 'pre'
- 'strong'
- 'em'
sources:
- '**/*.md|!docs/BINDINGS.md'

50
.github/workflows/spellcheck.yml vendored Normal file
View File

@ -0,0 +1,50 @@
# Copyright (C) 2022 Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
name: spell
on:
push:
branches:
- master
paths:
- '**.md'
- '**.3'
- '**.1'
- '.github/**'
pull_request:
branches:
- master
paths:
- '**.md'
- '**.3'
- '**.1'
- '.github/**'
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: install pandoc
run: sudo apt-get install pandoc
- name: strip "uncheckable" sections from .3 pages
run: find docs -name "*.3" -size +40c | sed 's/\.3//' | xargs -t -n1 -I OO ./.github/scripts/cleanspell.pl OO.3 OO.33
- name: convert .3 man pages to markdown
run: find docs -name "*.33" -size +40c | sed 's/\.33//' | xargs -t -n1 -I OO pandoc -f man -t markdown OO.33 -o OO.md
- name: convert .1 man pages to markdown
run: find docs -name "*.1" -size +40c | sed 's/\.1//' | xargs -t -n1 -I OO pandoc OO.1 -o OO.md
- name: setup the custom wordlist
run: grep -v '^#' .github/scripts/spellcheck.words > wordlist.txt
- name: setup the spellcheck config
run: cp .github/scripts/spellcheck.yaml .
- name: Check Spelling
uses: UnicornGlobal/spellcheck-github-actions@master