diff --git a/docs/libcurl/curl_url_get.md b/docs/libcurl/curl_url_get.md index 2f3b89b5e5..100eea7f52 100644 --- a/docs/libcurl/curl_url_get.md +++ b/docs/libcurl/curl_url_get.md @@ -113,6 +113,18 @@ punycode. (Added in curl 8.3.0) +## CURLU_GET_EMPTY + +When this flag is used in curl_url_get(), it makes the function return empty +query and fragments parts or when used in the full URL. By default, libcurl +otherwise considers empty parts non-existing. + +An empty query part is one where this is nothing following the question mark +(before the possible fragment). An empty fragments part is one where there is +nothing following the hash sign. + +(Added in curl 8.8.0) + # PARTS ## CURLUPART_URL diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions index 6b37e52317..e5531df92c 100644 --- a/docs/libcurl/symbols-in-versions +++ b/docs/libcurl/symbols-in-versions @@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0 CURLU_DEFAULT_PORT 7.62.0 CURLU_DEFAULT_SCHEME 7.62.0 CURLU_DISALLOW_USER 7.62.0 +CURLU_GET_EMPTY 8.8.0 CURLU_GUESS_SCHEME 7.62.0 CURLU_NO_AUTHORITY 7.67.0 CURLU_NO_DEFAULT_PORT 7.62.0 diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index 91f8c4548a..19388c3c01 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -99,6 +99,9 @@ typedef enum { #define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */ #define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */ #define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */ +#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments + when extracting the URL or the + components */ typedef struct Curl_URL CURLU; diff --git a/lib/urlapi.c b/lib/urlapi.c index ab96ae218a..c6d2370116 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -79,7 +79,9 @@ struct Curl_URL { char *path; char *query; char *fragment; - long portnum; /* the numerical version */ + unsigned short portnum; /* the numerical version */ + BIT(query_present); /* to support blank */ + BIT(fragment_present); /* to support blank */ }; #define DEFAULT_SCHEME "https" @@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, if(rest[0]) return CURLUE_BAD_PORT_NUMBER; - u->portnum = port; + u->portnum = (unsigned short) port; /* generate a new port number string to get rid of leading zeroes etc */ free(u->port); u->port = aprintf("%ld", port); @@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) fragment = strchr(path, '#'); if(fragment) { fraglen = pathlen - (fragment - path); + u->fragment_present = TRUE; if(fraglen > 1) { /* skip the leading '#' in the copy but include the terminating null */ if(flags & CURLU_URLENCODE) { @@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) size_t qlen = fragment ? (size_t)(fragment - query) : pathlen - (query - path); pathlen -= qlen; + u->query_present = TRUE; if(qlen > 1) { if(flags & CURLU_URLENCODE) { struct dynbuf enc; @@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in) DUP(u, in, fragment); DUP(u, in, zoneid); u->portnum = in->portnum; + u->fragment_present = in->fragment_present; + u->query_present = in->query_present; } return u; fail: @@ -1491,10 +1497,16 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, ptr = u->query; ifmissing = CURLUE_NO_QUERY; plusdecode = urldecode; + if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY)) + /* there was a blank query and the user do not ask for it */ + ptr = NULL; break; case CURLUPART_FRAGMENT: ptr = u->fragment; ifmissing = CURLUE_NO_FRAGMENT; + if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY) + /* there was a blank fragment and the user asks for it */ + ptr = ""; break; case CURLUPART_URL: { char *url; @@ -1502,13 +1514,18 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, char *options = u->options; char *port = u->port; char *allochost = NULL; + bool show_fragment = + u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY); + bool show_query = + (u->query && u->query[0]) || + (u->query_present && flags & CURLU_GET_EMPTY); punycode = (flags & CURLU_PUNYCODE)?1:0; depunyfy = (flags & CURLU_PUNY2IDN)?1:0; if(u->scheme && strcasecompare("file", u->scheme)) { url = aprintf("file://%s%s%s", u->path, - u->fragment? "#": "", - u->fragment? u->fragment : ""); + show_fragment ? "#": "", + u->fragment ? u->fragment : ""); } else if(!u->host) return CURLUE_NO_HOST; @@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, port ? ":": "", port ? port : "", u->path ? u->path : "/", - (u->query && u->query[0]) ? "?": "", - (u->query && u->query[0]) ? u->query : "", - u->fragment? "#": "", + show_query ? "?": "", + u->query ? u->query : "", + show_fragment ? "#": "", u->fragment? u->fragment : ""); free(allochost); } @@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, break; case CURLUPART_QUERY: storep = &u->query; + u->query_present = FALSE; break; case CURLUPART_FRAGMENT: storep = &u->fragment; + u->fragment_present = FALSE; break; default: return CURLUE_UNKNOWN_PART; @@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, appendquery = (flags & CURLU_APPENDQUERY)?1:0; equalsencode = appendquery; storep = &u->query; + u->query_present = TRUE; break; case CURLUPART_FRAGMENT: storep = &u->fragment; + u->fragment_present = TRUE; break; case CURLUPART_URL: { /* @@ -1972,6 +1993,6 @@ nomem: /* set after the string, to make it not assigned if the allocation above fails */ if(port) - u->portnum = port; + u->portnum = (unsigned short)port; return CURLUE_OK; } diff --git a/tests/data/test1201 b/tests/data/test1201 index 296e7fa6a0..f4c1a48386 100644 --- a/tests/data/test1201 +++ b/tests/data/test1201 @@ -25,7 +25,7 @@ gopher Gopher selector -gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER? +gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER @@ -33,7 +33,7 @@ gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER? # Verify data after the test has been "shot" -/selector/SELECTOR/%TESTNUMBER? +/selector/SELECTOR/%TESTNUMBER diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 5e693390d1..0a872e9496 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -151,6 +151,21 @@ struct clearurlcase { }; static const struct testcase get_parts_list[] ={ + {"https://curl.se/#", + "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ", + 0, CURLU_GET_EMPTY, CURLUE_OK}, + {"https://curl.se/?#", + "https | [11] | [12] | [13] | curl.se | [15] | / | | ", + 0, CURLU_GET_EMPTY, CURLUE_OK}, + {"https://curl.se/?", + "https | [11] | [12] | [13] | curl.se | [15] | / | | [17]", + 0, CURLU_GET_EMPTY, CURLUE_OK}, + {"https://curl.se/?", + "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]", + 0, 0, CURLUE_OK}, + {"https://curl.se/?#", + "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]", + 0, 0, CURLUE_OK}, {"https://curl.se/# ", "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20", CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK}, @@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={ }; static const struct urltestcase get_url_list[] = { + {"http://user@example.com?#", + "http://user@example.com/?#", + 0, CURLU_GET_EMPTY, CURLUE_OK}, /* WHATWG disgrees, it wants "https:/0.0.0.0/" */ {"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK}, @@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out) 3. Extract all components (not URL) */ static const struct setgetcase setget_parts_list[] = { + {"https://example.com/", + "query=\"\",", + "https | [11] | [12] | [13] | example.com | [15] | / | | [17]", + 0, 0, CURLU_GET_EMPTY, CURLUE_OK}, + {"https://example.com/", + "fragment=\"\",", + "https | [11] | [12] | [13] | example.com | [15] | / | [16] | ", + 0, 0, CURLU_GET_EMPTY, CURLUE_OK}, + {"https://example.com/", + "query=\"\",", + "https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]", + 0, 0, 0, CURLUE_OK}, {"https://example.com", "path=get,", "https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",