curl/lib/curl_multibyte.c
Jay Satiro 09363500b9 curl_multibyte: always return a heap-allocated copy of string
- Change the Windows char <-> UTF-8 conversion functions to return an
  allocated copy of the passed in string instead of the original.

Prior to this change the curlx_convert_ functions would, as what I
assume was an optimization, not make a copy of the passed in string if
no conversion was required. No conversion is required in non-UNICODE
Windows builds since our tchar strings are type char and remain in
whatever the passed in encoding is, which is assumed to be UTF-8 but may
be other encoding.

In contrast the UNICODE Windows builds require conversion
(wchar <-> char) and do return a copy. That inconsistency could lead to
programming errors where the developer expects a copy, and does not
realize that won't happen in all cases.

Closes https://github.com/curl/curl/pull/6602
2021-02-20 14:39:39 -05:00

172 lines
4.2 KiB
C

/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
/*
* This file is 'mem-include-scan' clean, which means memdebug.h and
* curl_memory.h are purposely not included in this file. See test 1132.
*
* The functions in this file are curlx functions which are not tracked by the
* curl memory tracker memdebug.
*/
#include "curl_setup.h"
#if defined(WIN32)
#include "curl_multibyte.h"
/*
* MultiByte conversions using Windows kernel32 library.
*/
wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8)
{
wchar_t *str_w = NULL;
if(str_utf8) {
int str_w_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
str_utf8, -1, NULL, 0);
if(str_w_len > 0) {
str_w = malloc(str_w_len * sizeof(wchar_t));
if(str_w) {
if(MultiByteToWideChar(CP_UTF8, 0, str_utf8, -1, str_w,
str_w_len) == 0) {
free(str_w);
return NULL;
}
}
}
}
return str_w;
}
char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w)
{
char *str_utf8 = NULL;
if(str_w) {
int bytes = WideCharToMultiByte(CP_UTF8, 0, str_w, -1,
NULL, 0, NULL, NULL);
if(bytes > 0) {
str_utf8 = malloc(bytes);
if(str_utf8) {
if(WideCharToMultiByte(CP_UTF8, 0, str_w, -1, str_utf8, bytes,
NULL, NULL) == 0) {
free(str_utf8);
return NULL;
}
}
}
}
return str_utf8;
}
#endif /* WIN32 */
#if defined(USE_WIN32_LARGE_FILES) || defined(USE_WIN32_SMALL_FILES)
int curlx_win32_open(const char *filename, int oflag, ...)
{
int pmode = 0;
#ifdef _UNICODE
int result = -1;
wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename);
#endif
va_list param;
va_start(param, oflag);
if(oflag & O_CREAT)
pmode = va_arg(param, int);
va_end(param);
#ifdef _UNICODE
if(filename_w)
result = _wopen(filename_w, oflag, pmode);
free(filename_w);
if(result != -1)
return result;
#endif
return (_open)(filename, oflag, pmode);
}
FILE *curlx_win32_fopen(const char *filename, const char *mode)
{
#ifdef _UNICODE
FILE *result = NULL;
wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename);
wchar_t *mode_w = curlx_convert_UTF8_to_wchar(mode);
if(filename_w && mode_w)
result = _wfopen(filename_w, mode_w);
free(filename_w);
free(mode_w);
if(result)
return result;
#endif
return (fopen)(filename, mode);
}
int curlx_win32_stat(const char *path, struct_stat *buffer)
{
int result = -1;
#ifdef _UNICODE
wchar_t *path_w = curlx_convert_UTF8_to_wchar(path);
if(path_w) {
#if defined(USE_WIN32_SMALL_FILES)
result = _wstat(path_w, buffer);
#else
result = _wstati64(path_w, buffer);
#endif
free(path_w);
if(result != -1)
return result;
}
#endif /* _UNICODE */
#if defined(USE_WIN32_SMALL_FILES)
result = _stat(path, buffer);
#else
result = _stati64(path, buffer);
#endif
return result;
}
int curlx_win32_access(const char *path, int mode)
{
#if defined(_UNICODE)
wchar_t *path_w = curlx_convert_UTF8_to_wchar(path);
if(path_w) {
int result = _waccess(path_w, mode);
free(path_w);
if(result != -1)
return result;
}
#endif /* _UNICODE */
return _access(path, mode);
}
#endif /* USE_WIN32_LARGE_FILES || USE_WIN32_SMALL_FILES */