2018-08-05 17:51:07 +08:00
|
|
|
.\" **************************************************************************
|
|
|
|
.\" * _ _ ____ _
|
|
|
|
.\" * Project ___| | | | _ \| |
|
|
|
|
.\" * / __| | | | |_) | |
|
|
|
|
.\" * | (__| |_| | _ <| |___
|
|
|
|
.\" * \___|\___/|_| \_\_____|
|
|
|
|
.\" *
|
2023-01-02 20:51:48 +08:00
|
|
|
.\" * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
|
2018-08-05 17:51:07 +08:00
|
|
|
.\" *
|
|
|
|
.\" * This software is licensed as described in the file COPYING, which
|
|
|
|
.\" * you should have received as part of this distribution. The terms
|
2020-11-04 21:02:01 +08:00
|
|
|
.\" * are also available at https://curl.se/docs/copyright.html.
|
2018-08-05 17:51:07 +08:00
|
|
|
.\" *
|
|
|
|
.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell
|
|
|
|
.\" * copies of the Software, and permit persons to whom the Software is
|
|
|
|
.\" * furnished to do so, under the terms of the COPYING file.
|
|
|
|
.\" *
|
|
|
|
.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
|
|
.\" * KIND, either express or implied.
|
|
|
|
.\" *
|
|
|
|
.\" * SPDX-License-Identifier: curl
|
2022-05-17 17:16:50 +08:00
|
|
|
.\" *
|
2018-08-05 17:51:07 +08:00
|
|
|
.\" **************************************************************************
|
|
|
|
.TH curl_url_get 3 "6 Aug 2018" "libcurl" "libcurl Manual"
|
|
|
|
.SH NAME
|
|
|
|
curl_url_get - extract a part from a URL
|
|
|
|
.SH SYNOPSIS
|
|
|
|
.nf
|
2021-11-26 21:20:18 +08:00
|
|
|
#include <curl/curl.h>
|
|
|
|
|
2018-08-05 17:51:07 +08:00
|
|
|
CURLUcode curl_url_get(CURLU *url,
|
|
|
|
CURLUPart what,
|
|
|
|
char **part,
|
|
|
|
unsigned int flags)
|
|
|
|
.fi
|
|
|
|
.SH DESCRIPTION
|
|
|
|
Given the \fIurl\fP handle of an already parsed URL, this function lets the
|
|
|
|
user extract individual pieces from it.
|
|
|
|
|
|
|
|
The \fIwhat\fP argument should be the particular part to extract (see list
|
|
|
|
below) and \fIpart\fP points to a 'char *' to get updated to point to a newly
|
|
|
|
allocated string with the contents.
|
|
|
|
|
|
|
|
The \fIflags\fP argument is a bitmask with individual features.
|
|
|
|
|
|
|
|
The returned part pointer must be freed with \fIcurl_free(3)\fP after use.
|
|
|
|
.SH FLAGS
|
|
|
|
The flags argument is zero, one or more bits set in a bitmask.
|
|
|
|
.IP CURLU_DEFAULT_PORT
|
|
|
|
If the handle has no port stored, this option will make \fIcurl_url_get(3)\fP
|
|
|
|
return the default port for the used scheme.
|
|
|
|
.IP CURLU_DEFAULT_SCHEME
|
|
|
|
If the handle has no scheme stored, this option will make
|
|
|
|
\fIcurl_url_get(3)\fP return the default scheme instead of error.
|
|
|
|
.IP CURLU_NO_DEFAULT_PORT
|
|
|
|
Instructs \fIcurl_url_get(3)\fP to not return a port number if it matches the
|
|
|
|
default port for the scheme.
|
|
|
|
.IP CURLU_URLDECODE
|
|
|
|
Asks \fIcurl_url_get(3)\fP to URL decode the contents before returning it. It
|
|
|
|
will not attempt to decode the scheme, the port number or the full URL.
|
2022-12-15 19:57:48 +08:00
|
|
|
|
2018-10-09 03:37:40 +08:00
|
|
|
The query component will also get plus-to-space conversion as a bonus when
|
2018-08-05 17:51:07 +08:00
|
|
|
this bit is set.
|
|
|
|
|
|
|
|
Note that this URL decoding is charset unaware and you will get a zero
|
|
|
|
terminated string back with data that could be intended for a particular
|
|
|
|
encoding.
|
|
|
|
|
|
|
|
If there's any byte values lower than 32 in the decoded string, the get
|
|
|
|
operation will return an error instead.
|
2021-10-09 02:12:26 +08:00
|
|
|
.IP CURLU_URLENCODE
|
|
|
|
If set, will make \fIcurl_url_get(3)\fP URL encode the host name part when a
|
|
|
|
full URL is retrieved. If not set (default), libcurl returns the URL with the
|
|
|
|
host name "raw" to support IDN names to appear as-is. IDN host names are
|
|
|
|
typically using non-ASCII bytes that otherwise will be percent-encoded.
|
|
|
|
|
|
|
|
Note that even when not asking for URL encoding, the '%' (byte 37) will be URL
|
|
|
|
encoded to make sure the host name remains valid.
|
2022-12-26 17:58:37 +08:00
|
|
|
.IP CURLU_PUNYCODE
|
|
|
|
If set and \fICURLU_URLENCODE\fP is not set, and asked to retrieve the
|
|
|
|
\fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP parts, libcurl returns the host
|
|
|
|
name in its punycode version if it contains any non-ASCII octets (and is an
|
|
|
|
IDN name).
|
|
|
|
|
|
|
|
If libcurl is built without IDN capabilities, using this bit will make
|
|
|
|
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name contains
|
|
|
|
anything outside the ASCII range.
|
|
|
|
|
|
|
|
(Added in curl 7.88.0)
|
2018-08-05 17:51:07 +08:00
|
|
|
.SH PARTS
|
|
|
|
.IP CURLUPART_URL
|
|
|
|
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
|
|
|
|
normalized and possibly cleaned up version of what was previously parsed.
|
|
|
|
.IP CURLUPART_SCHEME
|
|
|
|
Scheme cannot be URL decoded on get.
|
|
|
|
.IP CURLUPART_USER
|
|
|
|
.IP CURLUPART_PASSWORD
|
|
|
|
.IP CURLUPART_OPTIONS
|
|
|
|
.IP CURLUPART_HOST
|
2022-09-21 05:30:19 +08:00
|
|
|
The host name. If it is an IPv6 numeric address, the zone id will not be part
|
2019-09-26 07:12:27 +08:00
|
|
|
of it but is provided separately in \fICURLUPART_ZONEID\fP. IPv6 numerical
|
|
|
|
addresses are returned within brackets ([]).
|
2019-05-03 19:18:12 +08:00
|
|
|
.IP CURLUPART_ZONEID
|
|
|
|
If the host name is a numeric IPv6 address, this field might also be set.
|
2018-08-05 17:51:07 +08:00
|
|
|
.IP CURLUPART_PORT
|
|
|
|
Port cannot be URL decoded on get.
|
|
|
|
.IP CURLUPART_PATH
|
2021-08-13 15:22:05 +08:00
|
|
|
\fIpart\fP will be '/' even if no path is supplied in the URL.
|
2018-08-05 17:51:07 +08:00
|
|
|
.IP CURLUPART_QUERY
|
2021-08-13 15:22:05 +08:00
|
|
|
The initial question mark that denotes the beginning of the query part is
|
|
|
|
a delimiter only.
|
|
|
|
It is not part of the query contents.
|
|
|
|
|
|
|
|
|
|
|
|
A not-present query will lead \fIpart\fP to be set to NULL.
|
|
|
|
A zero-length query will lead \fIpart\fP to be set to a zero-length string.
|
|
|
|
|
2018-08-05 17:51:07 +08:00
|
|
|
The query part will also get pluses converted to space when asked to URL
|
|
|
|
decode on get with the CURLU_URLDECODE bit.
|
|
|
|
.IP CURLUPART_FRAGMENT
|
|
|
|
.SH EXAMPLE
|
|
|
|
.nf
|
|
|
|
CURLUcode rc;
|
|
|
|
CURLU *url = curl_url();
|
|
|
|
rc = curl_url_set(url, CURLUPART_URL, "https://example.com", 0);
|
|
|
|
if(!rc) {
|
|
|
|
char *scheme;
|
|
|
|
rc = curl_url_get(url, CURLUPART_SCHEME, &scheme, 0);
|
|
|
|
if(!rc) {
|
2018-09-10 21:39:33 +08:00
|
|
|
printf("the scheme is %s\\n", scheme);
|
2018-08-05 17:51:07 +08:00
|
|
|
curl_free(scheme);
|
|
|
|
}
|
|
|
|
curl_url_cleanup(url);
|
|
|
|
}
|
|
|
|
.fi
|
|
|
|
.SH AVAILABILITY
|
2021-10-25 17:45:09 +08:00
|
|
|
Added in 7.62.0. CURLUPART_ZONEID was added in 7.65.0.
|
2021-10-25 14:54:08 +08:00
|
|
|
.SH RETURN VALUE
|
|
|
|
Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went
|
2021-11-26 15:46:59 +08:00
|
|
|
fine. See the \fIlibcurl-errors(3)\fP man page for the full list with
|
2021-10-25 14:54:08 +08:00
|
|
|
descriptions.
|
|
|
|
|
|
|
|
If this function returns an error, no URL part is returned.
|
2018-08-05 17:51:07 +08:00
|
|
|
.SH "SEE ALSO"
|
|
|
|
.BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_set "(3), "
|
2021-09-27 14:22:54 +08:00
|
|
|
.BR curl_url_dup "(3), " curl_url_strerror "(3), " CURLOPT_CURLU "(3)"
|