url: find scheme with a "perfect hash"

Instead of a loop to scan over the potentially 30+ scheme names, this
uses a "perfect hash" table. This works fine because the set of schemes
is known and cannot change in a build. The hash algorithm and table size
is made to only make a single scheme index per table entry.

The perfect hash is generated by a separate tool (scripts/schemetable.c)

Closes #12347
This commit is contained in:
Daniel Stenberg 2023-11-17 13:04:13 +01:00
parent d6447b1bd2
commit b2d8f3f7f9
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2

343
lib/url.c
View File

@ -168,130 +168,6 @@ static curl_prot_t get_protocol_family(const struct Curl_handler *h)
return h->family;
}
/*
* Protocol table. Schemes (roughly) in 2019 popularity order:
*
* HTTPS, HTTP, FTP, FTPS, SFTP, FILE, SCP, SMTP, LDAP, IMAPS, TELNET, IMAP,
* LDAPS, SMTPS, TFTP, SMB, POP3, GOPHER POP3S, RTSP, RTMP, SMBS, DICT
*/
static const struct Curl_handler * const protocols[] = {
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_https,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_http,
#endif
#ifdef USE_WEBSOCKETS
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_wss,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_ws,
#endif
#endif
#ifndef CURL_DISABLE_FTP
&Curl_handler_ftp,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP)
&Curl_handler_ftps,
#endif
#if defined(USE_SSH)
&Curl_handler_sftp,
#endif
#ifndef CURL_DISABLE_FILE
&Curl_handler_file,
#endif
#if defined(USE_SSH) && !defined(USE_WOLFSSH)
&Curl_handler_scp,
#endif
#ifndef CURL_DISABLE_SMTP
&Curl_handler_smtp,
#ifdef USE_SSL
&Curl_handler_smtps,
#endif
#endif
#ifndef CURL_DISABLE_LDAP
&Curl_handler_ldap,
#if !defined(CURL_DISABLE_LDAPS) && \
((defined(USE_OPENLDAP) && defined(USE_SSL)) || \
(!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL)))
&Curl_handler_ldaps,
#endif
#endif
#ifndef CURL_DISABLE_IMAP
&Curl_handler_imap,
#ifdef USE_SSL
&Curl_handler_imaps,
#endif
#endif
#ifndef CURL_DISABLE_TELNET
&Curl_handler_telnet,
#endif
#ifndef CURL_DISABLE_TFTP
&Curl_handler_tftp,
#endif
#ifndef CURL_DISABLE_POP3
&Curl_handler_pop3,
#ifdef USE_SSL
&Curl_handler_pop3s,
#endif
#endif
#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \
(SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smb,
#ifdef USE_SSL
&Curl_handler_smbs,
#endif
#endif
#ifndef CURL_DISABLE_RTSP
&Curl_handler_rtsp,
#endif
#ifndef CURL_DISABLE_MQTT
&Curl_handler_mqtt,
#endif
#ifndef CURL_DISABLE_GOPHER
&Curl_handler_gopher,
#ifdef USE_SSL
&Curl_handler_gophers,
#endif
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmp,
&Curl_handler_rtmpt,
&Curl_handler_rtmpe,
&Curl_handler_rtmpte,
&Curl_handler_rtmps,
&Curl_handler_rtmpts,
#endif
#ifndef CURL_DISABLE_DICT
&Curl_handler_dict,
#endif
NULL
};
void Curl_freeset(struct Curl_easy *data)
{
/* Free all dynamic strings stored in the data->set substructure. */
@ -1593,17 +1469,216 @@ const struct Curl_handler *Curl_get_scheme_handler(const char *scheme)
const struct Curl_handler *Curl_getn_scheme_handler(const char *scheme,
size_t len)
{
size_t i;
/* Scan protocol handler table and match against 'scheme'. The handler may
be changed later when the protocol specific setup function is called. */
for(i = 0; i < ARRAYSIZE(protocols) - 1; ++i)
if(strncasecompare(protocols[i]->scheme, scheme, len) &&
!protocols[i]->scheme[len])
/* Protocol found in table. */
return protocols[i];
return NULL; /* not found */
}
/* table generated by schemetable.c:
1. gcc schemetable.c && ./a.out
2. check how small the table gets
3. tweak the hash algorithm, then rerun from 1
4. when the table is good enough
5. copy the table into this source code
6. make sure this function uses the same hash function that worked for
schemetable.c
7. if needed, adjust the #ifdefs in schemetable.c and rerun
*/
static const struct Curl_handler * const protocols[67] = {
#ifndef CURL_DISABLE_FILE
&Curl_handler_file,
#else
NULL,
#endif
NULL, NULL,
#if defined(USE_SSL) && !defined(CURL_DISABLE_GOPHER)
&Curl_handler_gophers,
#else
NULL,
#endif
NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpe,
#else
NULL,
#endif
#ifndef CURL_DISABLE_SMTP
&Curl_handler_smtp,
#else
NULL,
#endif
#if defined(USE_SSH)
&Curl_handler_sftp,
#else
NULL,
#endif
#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \
(SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smb,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_SMTP)
&Curl_handler_smtps,
#else
NULL,
#endif
#ifndef CURL_DISABLE_TELNET
&Curl_handler_telnet,
#else
NULL,
#endif
#ifndef CURL_DISABLE_GOPHER
&Curl_handler_gopher,
#else
NULL,
#endif
#ifndef CURL_DISABLE_TFTP
&Curl_handler_tftp,
#else
NULL,
#endif
NULL, NULL, NULL,
#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP)
&Curl_handler_ftps,
#else
NULL,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_http,
#else
NULL,
#endif
#ifndef CURL_DISABLE_IMAP
&Curl_handler_imap,
#else
NULL,
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmps,
#else
NULL,
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmpt,
#else
NULL,
#endif
NULL, NULL, NULL,
#if !defined(CURL_DISABLE_LDAP) && \
!defined(CURL_DISABLE_LDAPS) && \
((defined(USE_OPENLDAP) && defined(USE_SSL)) || \
(!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL)))
&Curl_handler_ldaps,
#else
NULL,
#endif
#if defined(USE_WEBSOCKETS) && \
defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_wss,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_https,
#else
NULL,
#endif
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
#ifndef CURL_DISABLE_RTSP
&Curl_handler_rtsp,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_SMB) && \
defined(USE_CURL_NTLM_CORE) && (SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smbs,
#else
NULL,
#endif
#if defined(USE_SSH) && !defined(USE_WOLFSSH)
&Curl_handler_scp,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_POP3
&Curl_handler_pop3,
#else
NULL,
#endif
NULL, NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmp,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpte,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_DICT
&Curl_handler_dict,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_MQTT
&Curl_handler_mqtt,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_POP3)
&Curl_handler_pop3s,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_IMAP)
&Curl_handler_imaps,
#else
NULL,
#endif
NULL,
#if defined(USE_WEBSOCKETS) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_ws,
#else
NULL,
#endif
NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpts,
#else
NULL,
#endif
#ifndef CURL_DISABLE_LDAP
&Curl_handler_ldap,
#else
NULL,
#endif
NULL, NULL,
#ifndef CURL_DISABLE_FTP
&Curl_handler_ftp,
#else
NULL,
#endif
};
if(len && (len <= 7)) {
const char *s = scheme;
size_t l = len;
const struct Curl_handler *h;
unsigned int c = 978;
while(l) {
c <<= 5;
c += Curl_raw_tolower(*s);
s++;
l--;
}
h = protocols[c % 67];
if(h && strncasecompare(scheme, h->scheme, len) && !h->scheme[len])
return h;
}
return NULL;
}
static CURLcode findprotocol(struct Curl_easy *data,
struct connectdata *conn,