url: find scheme with a "perfect hash"

Instead of a loop to scan over the potentially 30+ scheme names, this
uses a "perfect hash" table. This works fine because the set of schemes
is known and cannot change in a build. The hash algorithm and table size
is made to only make a single scheme index per table entry.

The perfect hash is generated by a separate tool (scripts/schemetable.c)

Closes #12347
This commit is contained in:
Daniel Stenberg 2023-11-17 13:04:13 +01:00
parent d6447b1bd2
commit b2d8f3f7f9
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2

343
lib/url.c
View File

@ -168,130 +168,6 @@ static curl_prot_t get_protocol_family(const struct Curl_handler *h)
return h->family; return h->family;
} }
/*
* Protocol table. Schemes (roughly) in 2019 popularity order:
*
* HTTPS, HTTP, FTP, FTPS, SFTP, FILE, SCP, SMTP, LDAP, IMAPS, TELNET, IMAP,
* LDAPS, SMTPS, TFTP, SMB, POP3, GOPHER POP3S, RTSP, RTMP, SMBS, DICT
*/
static const struct Curl_handler * const protocols[] = {
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_https,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_http,
#endif
#ifdef USE_WEBSOCKETS
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_wss,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_ws,
#endif
#endif
#ifndef CURL_DISABLE_FTP
&Curl_handler_ftp,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP)
&Curl_handler_ftps,
#endif
#if defined(USE_SSH)
&Curl_handler_sftp,
#endif
#ifndef CURL_DISABLE_FILE
&Curl_handler_file,
#endif
#if defined(USE_SSH) && !defined(USE_WOLFSSH)
&Curl_handler_scp,
#endif
#ifndef CURL_DISABLE_SMTP
&Curl_handler_smtp,
#ifdef USE_SSL
&Curl_handler_smtps,
#endif
#endif
#ifndef CURL_DISABLE_LDAP
&Curl_handler_ldap,
#if !defined(CURL_DISABLE_LDAPS) && \
((defined(USE_OPENLDAP) && defined(USE_SSL)) || \
(!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL)))
&Curl_handler_ldaps,
#endif
#endif
#ifndef CURL_DISABLE_IMAP
&Curl_handler_imap,
#ifdef USE_SSL
&Curl_handler_imaps,
#endif
#endif
#ifndef CURL_DISABLE_TELNET
&Curl_handler_telnet,
#endif
#ifndef CURL_DISABLE_TFTP
&Curl_handler_tftp,
#endif
#ifndef CURL_DISABLE_POP3
&Curl_handler_pop3,
#ifdef USE_SSL
&Curl_handler_pop3s,
#endif
#endif
#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \
(SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smb,
#ifdef USE_SSL
&Curl_handler_smbs,
#endif
#endif
#ifndef CURL_DISABLE_RTSP
&Curl_handler_rtsp,
#endif
#ifndef CURL_DISABLE_MQTT
&Curl_handler_mqtt,
#endif
#ifndef CURL_DISABLE_GOPHER
&Curl_handler_gopher,
#ifdef USE_SSL
&Curl_handler_gophers,
#endif
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmp,
&Curl_handler_rtmpt,
&Curl_handler_rtmpe,
&Curl_handler_rtmpte,
&Curl_handler_rtmps,
&Curl_handler_rtmpts,
#endif
#ifndef CURL_DISABLE_DICT
&Curl_handler_dict,
#endif
NULL
};
void Curl_freeset(struct Curl_easy *data) void Curl_freeset(struct Curl_easy *data)
{ {
/* Free all dynamic strings stored in the data->set substructure. */ /* Free all dynamic strings stored in the data->set substructure. */
@ -1593,17 +1469,216 @@ const struct Curl_handler *Curl_get_scheme_handler(const char *scheme)
const struct Curl_handler *Curl_getn_scheme_handler(const char *scheme, const struct Curl_handler *Curl_getn_scheme_handler(const char *scheme,
size_t len) size_t len)
{ {
size_t i; /* table generated by schemetable.c:
/* Scan protocol handler table and match against 'scheme'. The handler may 1. gcc schemetable.c && ./a.out
be changed later when the protocol specific setup function is called. */ 2. check how small the table gets
for(i = 0; i < ARRAYSIZE(protocols) - 1; ++i) 3. tweak the hash algorithm, then rerun from 1
if(strncasecompare(protocols[i]->scheme, scheme, len) && 4. when the table is good enough
!protocols[i]->scheme[len]) 5. copy the table into this source code
/* Protocol found in table. */ 6. make sure this function uses the same hash function that worked for
return protocols[i]; schemetable.c
return NULL; /* not found */ 7. if needed, adjust the #ifdefs in schemetable.c and rerun
} */
static const struct Curl_handler * const protocols[67] = {
#ifndef CURL_DISABLE_FILE
&Curl_handler_file,
#else
NULL,
#endif
NULL, NULL,
#if defined(USE_SSL) && !defined(CURL_DISABLE_GOPHER)
&Curl_handler_gophers,
#else
NULL,
#endif
NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpe,
#else
NULL,
#endif
#ifndef CURL_DISABLE_SMTP
&Curl_handler_smtp,
#else
NULL,
#endif
#if defined(USE_SSH)
&Curl_handler_sftp,
#else
NULL,
#endif
#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \
(SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smb,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_SMTP)
&Curl_handler_smtps,
#else
NULL,
#endif
#ifndef CURL_DISABLE_TELNET
&Curl_handler_telnet,
#else
NULL,
#endif
#ifndef CURL_DISABLE_GOPHER
&Curl_handler_gopher,
#else
NULL,
#endif
#ifndef CURL_DISABLE_TFTP
&Curl_handler_tftp,
#else
NULL,
#endif
NULL, NULL, NULL,
#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP)
&Curl_handler_ftps,
#else
NULL,
#endif
#ifndef CURL_DISABLE_HTTP
&Curl_handler_http,
#else
NULL,
#endif
#ifndef CURL_DISABLE_IMAP
&Curl_handler_imap,
#else
NULL,
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmps,
#else
NULL,
#endif
#ifdef USE_LIBRTMP
&Curl_handler_rtmpt,
#else
NULL,
#endif
NULL, NULL, NULL,
#if !defined(CURL_DISABLE_LDAP) && \
!defined(CURL_DISABLE_LDAPS) && \
((defined(USE_OPENLDAP) && defined(USE_SSL)) || \
(!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL)))
&Curl_handler_ldaps,
#else
NULL,
#endif
#if defined(USE_WEBSOCKETS) && \
defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_wss,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_https,
#else
NULL,
#endif
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
#ifndef CURL_DISABLE_RTSP
&Curl_handler_rtsp,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_SMB) && \
defined(USE_CURL_NTLM_CORE) && (SIZEOF_CURL_OFF_T > 4)
&Curl_handler_smbs,
#else
NULL,
#endif
#if defined(USE_SSH) && !defined(USE_WOLFSSH)
&Curl_handler_scp,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_POP3
&Curl_handler_pop3,
#else
NULL,
#endif
NULL, NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmp,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpte,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_DICT
&Curl_handler_dict,
#else
NULL,
#endif
NULL, NULL, NULL,
#ifndef CURL_DISABLE_MQTT
&Curl_handler_mqtt,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_POP3)
&Curl_handler_pop3s,
#else
NULL,
#endif
#if defined(USE_SSL) && !defined(CURL_DISABLE_IMAP)
&Curl_handler_imaps,
#else
NULL,
#endif
NULL,
#if defined(USE_WEBSOCKETS) && !defined(CURL_DISABLE_HTTP)
&Curl_handler_ws,
#else
NULL,
#endif
NULL,
#ifdef USE_LIBRTMP
&Curl_handler_rtmpts,
#else
NULL,
#endif
#ifndef CURL_DISABLE_LDAP
&Curl_handler_ldap,
#else
NULL,
#endif
NULL, NULL,
#ifndef CURL_DISABLE_FTP
&Curl_handler_ftp,
#else
NULL,
#endif
};
if(len && (len <= 7)) {
const char *s = scheme;
size_t l = len;
const struct Curl_handler *h;
unsigned int c = 978;
while(l) {
c <<= 5;
c += Curl_raw_tolower(*s);
s++;
l--;
}
h = protocols[c % 67];
if(h && strncasecompare(scheme, h->scheme, len) && !h->scheme[len])
return h;
}
return NULL;
}
static CURLcode findprotocol(struct Curl_easy *data, static CURLcode findprotocol(struct Curl_easy *data,
struct connectdata *conn, struct connectdata *conn,