urlapi: fix parsing URL without slash with CURLU_URLENCODE

When CURLU_URLENCODE is set, the parser would mistreat the path component if the URL was specified without a slash like in http://local.test:80?-123 Extended test 1560 to reproduce and verify the fix. Reported-by: Trail of Bits Closes #9763
2025-03-01 15:15:34 +08:00 · 2022-10-18 15:54:06 +02:00 · 2022-10-18 15:54:06 +02:00 · 7d6cf06f57
commit 7d6cf06f57
parent 81094cb492
2 changed files with 65 additions and 30 deletions
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@ -116,26 +116,26 @@ static const char *find_host_sep(const char *url)
 }
 /*
- * Decide in an encoding-independent manner whether a character in a
+ * Decide in an encoding-independent manner whether a character in a URL must
- * URL must be escaped. The same criterion must be used in strlen_url()
+ * be escaped. This is used in urlencode_str().
 * and strcpy_url().
 */
 static bool urlchar_needs_escaping(int c)
 {
  return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
 }
-/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
+/* urlencode_str() writes data into an output dynbuf and URL-encodes the
- * in the source URL accordingly.
+ * spaces in the source URL accordingly.
 *
 * URL encoding should be skipped for host names, otherwise IDN resolution
 * will fail.
 *
 */
-static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
+static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
                               size_t len, bool relative,
                               bool query)
 {
  /* we must add this with whitespace-replacing */
-  bool left = TRUE;
+  bool left = !query;
  const unsigned char *iptr;
  const unsigned char *host_sep = (const unsigned char *) url;
@ -143,8 +143,7 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
    host_sep = (const unsigned char *) find_host_sep(url);
  for(iptr = (unsigned char *)url;    /* read from here */
-      *iptr;         /* until zero byte */
+      len; iptr++, len--) {
      iptr++) {
    if(iptr < host_sep) {
      if(Curl_dyn_addn(o, iptr, 1))
@ -361,7 +360,7 @@ static char *concat_url(char *base, const char *relurl)
  }
  /* then append the new piece on the right side */
-  strcpy_url(&newest, useurl, !host_changed);
+  urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
  return Curl_dyn_ptr(&newest);
 }
@ -1130,16 +1129,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
    }
  }
  if(*path && (flags & CURLU_URLENCODE)) {
    struct dynbuf enc;
    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
    if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
      result = CURLUE_OUT_OF_MEMORY;
      goto fail;
    }
    path = u->path = Curl_dyn_ptr(&enc);
  }
  fragment = strchr(path, '#');
  if(fragment) {
    fraglen = strlen(fragment);
@ -1163,12 +1152,25 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
    size_t qlen = strlen(query) - fraglen; /* includes '?' */
    pathlen = strlen(path) - qlen - fraglen;
    if(qlen > 1) {
-      u->query = Curl_memdup(query + 1, qlen);
+      if(qlen && (flags & CURLU_URLENCODE)) {
-      if(!u->query) {
+        struct dynbuf enc;
-        result = CURLUE_OUT_OF_MEMORY;
+        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-        goto fail;
+        /* skip the leading question mark */
        if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
          result = CURLUE_OUT_OF_MEMORY;
          goto fail;
        }
        qlen = Curl_dyn_len(&enc);
        query = u->query = Curl_dyn_ptr(&enc);
      }
      else {
        u->query = Curl_memdup(query + 1, qlen);
        if(!u->query) {
          result = CURLUE_OUT_OF_MEMORY;
          goto fail;
        }
        u->query[qlen - 1] = 0;
      }
      u->query[qlen - 1] = 0;
      if(junkscan(u->query, flags)) {
        result = CURLUE_BAD_QUERY;
@ -1187,6 +1189,17 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
  else
    pathlen = strlen(path) - fraglen;
  if(pathlen && (flags & CURLU_URLENCODE)) {
    struct dynbuf enc;
    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
    if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
      result = CURLUE_OUT_OF_MEMORY;
      goto fail;
    }
    pathlen = Curl_dyn_len(&enc);
    path = u->path = Curl_dyn_ptr(&enc);
  }
  if(!pathlen) {
    /* there is no path left, unset */
    path = NULL;
@ -1563,13 +1576,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
    break;
  }
  if(ptr) {
-    *part = strdup(ptr);
+    size_t partlen = strlen(ptr);
    size_t i = 0;
    *part = Curl_memdup(ptr, partlen + 1);
    if(!*part)
      return CURLUE_OUT_OF_MEMORY;
    if(plusdecode) {
      /* convert + to space */
-      char *plus;
+      char *plus = *part;
-      for(plus = *part; *plus; ++plus) {
+      for(i = 0; i < partlen; ++plus, i++) {
        if(*plus == '+')
          *plus = ' ';
      }
@ -1586,11 +1601,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
        return CURLUE_URLDECODE;
      }
      *part = decoded;
      partlen = dlen;
    }
    if(urlencode) {
      struct dynbuf enc;
      Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-      if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
+      if(urlencode_str(&enc, *part, partlen, TRUE,
                       what == CURLUPART_QUERY))
        return CURLUE_OUT_OF_MEMORY;
      free(*part);
      *part = Curl_dyn_ptr(&enc);
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@ -138,6 +138,12 @@ struct clearurlcase {
 };
 static const struct testcase get_parts_list[] ={
  {"https://user@example.net?he l lo",
   "https | user | [12] | [13] | example.net | [15] | / | he+l+lo | [17]",
   CURLU_ALLOW_SPACE, CURLU_URLENCODE, CURLUE_OK},
  {"https://user@example.net?he l lo",
   "https | user | [12] | [13] | example.net | [15] | / | he l lo | [17]",
   CURLU_ALLOW_SPACE, 0, CURLUE_OK},
  {"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
  {"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
  {"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
@ -849,6 +855,18 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
 }
 static const struct redircase set_url_list[] = {
  {"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
   "http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
   "http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",
   0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
  {"http://local.test?redirect=http://local.test:80?-321",
   "http://local.test:80?-123",
   "http://local.test:80/?-123",
   0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
  {"http://local.test?redirect=http://local.test:80?-321",
   "http://local.test:80?-123",
   "http://local.test:80/?-123",
   0, 0, CURLUE_OK},
  {"http://example.org/static/favicon/wikipedia.ico",
   "//fake.example.com/licenses/by-sa/3.0/",
   "http://fake.example.com/licenses/by-sa/3.0/",