diff --git a/docs/libcurl/curl_url_get.md b/docs/libcurl/curl_url_get.md
index 2f3b89b5e5..100eea7f52 100644
--- a/docs/libcurl/curl_url_get.md
+++ b/docs/libcurl/curl_url_get.md
@@ -113,6 +113,18 @@ punycode.
(Added in curl 8.3.0)
+## CURLU_GET_EMPTY
+
+When this flag is used in curl_url_get(), it makes the function return empty
+query and fragments parts or when used in the full URL. By default, libcurl
+otherwise considers empty parts non-existing.
+
+An empty query part is one where this is nothing following the question mark
+(before the possible fragment). An empty fragments part is one where there is
+nothing following the hash sign.
+
+(Added in curl 8.8.0)
+
# PARTS
## CURLUPART_URL
diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions
index 6b37e52317..e5531df92c 100644
--- a/docs/libcurl/symbols-in-versions
+++ b/docs/libcurl/symbols-in-versions
@@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0
CURLU_DEFAULT_PORT 7.62.0
CURLU_DEFAULT_SCHEME 7.62.0
CURLU_DISALLOW_USER 7.62.0
+CURLU_GET_EMPTY 8.8.0
CURLU_GUESS_SCHEME 7.62.0
CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0
diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h
index 91f8c4548a..19388c3c01 100644
--- a/include/curl/urlapi.h
+++ b/include/curl/urlapi.h
@@ -99,6 +99,9 @@ typedef enum {
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
+#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
+ when extracting the URL or the
+ components */
typedef struct Curl_URL CURLU;
diff --git a/lib/urlapi.c b/lib/urlapi.c
index ab96ae218a..c6d2370116 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -79,7 +79,9 @@ struct Curl_URL {
char *path;
char *query;
char *fragment;
- long portnum; /* the numerical version */
+ unsigned short portnum; /* the numerical version */
+ BIT(query_present); /* to support blank */
+ BIT(fragment_present); /* to support blank */
};
#define DEFAULT_SCHEME "https"
@@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;
- u->portnum = port;
+ u->portnum = (unsigned short) port;
/* generate a new port number string to get rid of leading zeroes etc */
free(u->port);
u->port = aprintf("%ld", port);
@@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
fragment = strchr(path, '#');
if(fragment) {
fraglen = pathlen - (fragment - path);
+ u->fragment_present = TRUE;
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
if(flags & CURLU_URLENCODE) {
@@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
size_t qlen = fragment ? (size_t)(fragment - query) :
pathlen - (query - path);
pathlen -= qlen;
+ u->query_present = TRUE;
if(qlen > 1) {
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
@@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in)
DUP(u, in, fragment);
DUP(u, in, zoneid);
u->portnum = in->portnum;
+ u->fragment_present = in->fragment_present;
+ u->query_present = in->query_present;
}
return u;
fail:
@@ -1491,10 +1497,16 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = urldecode;
+ if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
+ /* there was a blank query and the user do not ask for it */
+ ptr = NULL;
break;
case CURLUPART_FRAGMENT:
ptr = u->fragment;
ifmissing = CURLUE_NO_FRAGMENT;
+ if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
+ /* there was a blank fragment and the user asks for it */
+ ptr = "";
break;
case CURLUPART_URL: {
char *url;
@@ -1502,13 +1514,18 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
+ bool show_fragment =
+ u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
+ bool show_query =
+ (u->query && u->query[0]) ||
+ (u->query_present && flags & CURLU_GET_EMPTY);
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
- u->fragment? "#": "",
- u->fragment? u->fragment : "");
+ show_fragment ? "#": "",
+ u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
@@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
- (u->query && u->query[0]) ? "?": "",
- (u->query && u->query[0]) ? u->query : "",
- u->fragment? "#": "",
+ show_query ? "?": "",
+ u->query ? u->query : "",
+ show_fragment ? "#": "",
u->fragment? u->fragment : "");
free(allochost);
}
@@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break;
case CURLUPART_QUERY:
storep = &u->query;
+ u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
+ u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
@@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
equalsencode = appendquery;
storep = &u->query;
+ u->query_present = TRUE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
+ u->fragment_present = TRUE;
break;
case CURLUPART_URL: {
/*
@@ -1972,6 +1993,6 @@ nomem:
/* set after the string, to make it not assigned if the allocation above
fails */
if(port)
- u->portnum = port;
+ u->portnum = (unsigned short)port;
return CURLUE_OK;
}
diff --git a/tests/data/test1201 b/tests/data/test1201
index 296e7fa6a0..f4c1a48386 100644
--- a/tests/data/test1201
+++ b/tests/data/test1201
@@ -25,7 +25,7 @@ gopher
Gopher selector
-gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
+gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER
@@ -33,7 +33,7 @@ gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
# Verify data after the test has been "shot"
-/selector/SELECTOR/%TESTNUMBER?
+/selector/SELECTOR/%TESTNUMBER
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 5e693390d1..0a872e9496 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -151,6 +151,21 @@ struct clearurlcase {
};
static const struct testcase get_parts_list[] ={
+ {"https://curl.se/#",
+ "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ",
+ 0, CURLU_GET_EMPTY, CURLUE_OK},
+ {"https://curl.se/?#",
+ "https | [11] | [12] | [13] | curl.se | [15] | / | | ",
+ 0, CURLU_GET_EMPTY, CURLUE_OK},
+ {"https://curl.se/?",
+ "https | [11] | [12] | [13] | curl.se | [15] | / | | [17]",
+ 0, CURLU_GET_EMPTY, CURLUE_OK},
+ {"https://curl.se/?",
+ "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
+ 0, 0, CURLUE_OK},
+ {"https://curl.se/?#",
+ "https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
+ 0, 0, CURLUE_OK},
{"https://curl.se/# ",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
@@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={
};
static const struct urltestcase get_url_list[] = {
+ {"http://user@example.com?#",
+ "http://user@example.com/?#",
+ 0, CURLU_GET_EMPTY, CURLUE_OK},
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},
@@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out)
3. Extract all components (not URL)
*/
static const struct setgetcase setget_parts_list[] = {
+ {"https://example.com/",
+ "query=\"\",",
+ "https | [11] | [12] | [13] | example.com | [15] | / | | [17]",
+ 0, 0, CURLU_GET_EMPTY, CURLUE_OK},
+ {"https://example.com/",
+ "fragment=\"\",",
+ "https | [11] | [12] | [13] | example.com | [15] | / | [16] | ",
+ 0, 0, CURLU_GET_EMPTY, CURLUE_OK},
+ {"https://example.com/",
+ "query=\"\",",
+ "https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]",
+ 0, 0, 0, CURLUE_OK},
{"https://example.com",
"path=get,",
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",