Merge pull request #100132 from Ivorforce/string-compile-time-strlen

Optimize String construction from statically known strings by evaluating `strlen` at compile-time.
This commit is contained in:
Thaddeus Crews 2024-12-10 14:15:49 -06:00
commit 78215f3cc6
No known key found for this signature in database
GPG Key ID: 62181B86FE9E5D84
2 changed files with 177 additions and 179 deletions

View File

@ -65,15 +65,6 @@ const char16_t Char16String::_null = 0;
const char32_t String::_null = 0;
const char32_t String::_replacement_char = 0xfffd;
// strlen equivalent function for char32_t * arguments.
_FORCE_INLINE_ size_t strlen(const char32_t *p_str) {
const char32_t *ptr = p_str;
while (*ptr != 0) {
++ptr;
}
return ptr - p_str;
}
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
const String &s = p_s;
int beg = CLAMP(p_col, 0, s.length());
@ -313,80 +304,32 @@ Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r
return OK;
}
void String::copy_from(const char *p_cstr) {
// copy Latin-1 encoded c-string directly
if (!p_cstr) {
void String::copy_from(const StrRange<char> &p_cstr) {
if (p_cstr.len == 0) {
resize(0);
return;
}
const size_t len = strlen(p_cstr);
resize(p_cstr.len + 1); // include 0
if (len == 0) {
resize(0);
return;
}
resize(len + 1); // include 0
const char *end = p_cstr + len;
const char *src = p_cstr.c_str;
const char *end = src + p_cstr.len;
char32_t *dst = ptrw();
for (; p_cstr < end; ++p_cstr, ++dst) {
for (; src < end; ++src, ++dst) {
// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
*dst = static_cast<uint8_t>(*p_cstr);
*dst = static_cast<uint8_t>(*src);
}
*dst = 0;
}
void String::copy_from(const char *p_cstr, const int p_clip_to) {
// copy Latin-1 encoded c-string directly
if (!p_cstr) {
void String::copy_from(const StrRange<char32_t> &p_cstr) {
if (p_cstr.len == 0) {
resize(0);
return;
}
int len = 0;
const char *ptr = p_cstr;
while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
len++;
}
if (len == 0) {
resize(0);
return;
}
resize(len + 1); // include 0
const char *end = p_cstr + len;
char32_t *dst = ptrw();
for (; p_cstr < end; ++p_cstr, ++dst) {
// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
*dst = static_cast<uint8_t>(*p_cstr);
}
*dst = 0;
}
void String::copy_from(const wchar_t *p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr);
#endif
}
void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr, p_clip_to);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr, p_clip_to);
#endif
copy_from_unchecked(p_cstr.c_str, p_cstr.len);
}
void String::copy_from(const char32_t &p_char) {
@ -412,42 +355,6 @@ void String::copy_from(const char32_t &p_char) {
dst[1] = 0;
}
void String::copy_from(const char32_t *p_cstr) {
if (!p_cstr) {
resize(0);
return;
}
const int len = strlen(p_cstr);
if (len == 0) {
resize(0);
return;
}
copy_from_unchecked(p_cstr, len);
}
void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
if (!p_cstr) {
resize(0);
return;
}
int len = 0;
const char32_t *ptr = p_cstr;
while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
len++;
}
if (len == 0) {
resize(0);
return;
}
copy_from_unchecked(p_cstr, len);
}
// assumes the following have already been validated:
// p_char != nullptr
// p_length > 0
@ -475,18 +382,6 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
*dst = 0;
}
void String::operator=(const char *p_str) {
copy_from(p_str);
}
void String::operator=(const char32_t *p_str) {
copy_from(p_str);
}
void String::operator=(const wchar_t *p_str) {
copy_from(p_str);
}
String String::operator+(const String &p_str) const {
String res = *this;
res += p_str;
@ -697,7 +592,7 @@ bool String::operator==(const String &p_str) const {
return true;
}
bool String::operator==(const StrRange &p_str_range) const {
bool String::operator==(const StrRange<char32_t> &p_str_range) const {
int len = p_str_range.len;
if (length() != len) {
@ -2512,37 +2407,6 @@ Char16String String::utf16() const {
return utf16s;
}
String::String(const char *p_str) {
copy_from(p_str);
}
String::String(const wchar_t *p_str) {
copy_from(p_str);
}
String::String(const char32_t *p_str) {
copy_from(p_str);
}
String::String(const char *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}
String::String(const wchar_t *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}
String::String(const char32_t *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}
String::String(const StrRange &p_range) {
if (!p_range.c_str) {
return;
}
copy_from(p_range.c_str, p_range.len);
}
int64_t String::hex_to_int() const {
int len = length();
if (len == 0) {

View File

@ -39,6 +39,89 @@
#include "core/typedefs.h"
#include "core/variant/array.h"
/*************************************************************************/
/* Utility Functions */
/*************************************************************************/
// Not defined by std.
// strlen equivalent function for char16_t * arguments.
constexpr size_t strlen(const char16_t *p_str) {
const char16_t *ptr = p_str;
while (*ptr != 0) {
++ptr;
}
return ptr - p_str;
}
// strlen equivalent function for char32_t * arguments.
constexpr size_t strlen(const char32_t *p_str) {
const char32_t *ptr = p_str;
while (*ptr != 0) {
++ptr;
}
return ptr - p_str;
}
// strlen equivalent function for wchar_t * arguments; depends on the platform.
constexpr size_t strlen(const wchar_t *str) {
// Use static_cast twice because reinterpret_cast is not allowed in constexpr
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit
return strlen(static_cast<const char16_t *>(static_cast<const void *>(str)));
#else
// wchar_t is 32-bit
return strlen(static_cast<const char32_t *>(static_cast<const void *>(str)));
#endif
}
constexpr size_t _strlen_clipped(const char *p_str, int p_clip_to_len) {
if (p_clip_to_len < 0) {
return strlen(p_str);
}
int len = 0;
while (len < p_clip_to_len && *(p_str++) != 0) {
len++;
}
return len;
}
constexpr size_t _strlen_clipped(const char32_t *p_str, int p_clip_to_len) {
if (p_clip_to_len < 0) {
return strlen(p_str);
}
int len = 0;
while (len < p_clip_to_len && *(p_str++) != 0) {
len++;
}
return len;
}
/*************************************************************************/
/* StrRange */
/*************************************************************************/
template <typename Element>
struct StrRange {
const Element *c_str;
size_t len;
explicit StrRange(const std::nullptr_t p_cstring) :
c_str(nullptr), len(0) {}
explicit StrRange(const Element *p_cstring, const size_t p_len) :
c_str(p_cstring), len(p_len) {}
template <size_t len>
explicit StrRange(const Element (&p_cstring)[len]) :
c_str(p_cstring), len(strlen(p_cstring)) {}
static StrRange from_c_str(const Element *p_cstring) {
return StrRange(p_cstring, p_cstring ? strlen(p_cstring) : 0);
}
};
/*************************************************************************/
/* CharProxy */
/*************************************************************************/
@ -119,6 +202,7 @@ public:
int length() const { return size() ? size() - 1 : 0; }
const char16_t *get_data() const;
operator const char16_t *() const { return get_data(); }
explicit operator StrRange<char16_t>() const { return StrRange(get_data(), length()); }
protected:
void copy_from(const char16_t *p_cstr);
@ -161,6 +245,7 @@ public:
int length() const { return size() ? size() - 1 : 0; }
const char *get_data() const;
operator const char *() const { return get_data(); }
explicit operator StrRange<char>() const { return StrRange(get_data(), length()); }
protected:
void copy_from(const char *p_cstr);
@ -170,31 +255,59 @@ protected:
/* String */
/*************************************************************************/
struct StrRange {
const char32_t *c_str;
int len;
StrRange(const char32_t *p_c_str = nullptr, int p_len = 0) {
c_str = p_c_str;
len = p_len;
}
};
class String {
CowData<char32_t> _cowdata;
static const char32_t _null;
static const char32_t _replacement_char;
void copy_from(const char *p_cstr);
void copy_from(const char *p_cstr, const int p_clip_to);
void copy_from(const wchar_t *p_cstr);
void copy_from(const wchar_t *p_cstr, const int p_clip_to);
void copy_from(const char32_t *p_cstr);
void copy_from(const char32_t *p_cstr, const int p_clip_to);
// Known-length copy.
void copy_from(const StrRange<char> &p_cstr);
void copy_from(const StrRange<char32_t> &p_cstr);
void copy_from(const char32_t &p_char);
void copy_from_unchecked(const char32_t *p_char, int p_length);
void copy_from_unchecked(const char32_t *p_char, const int p_length);
// NULL-terminated c string copy - automatically parse the string to find the length.
void copy_from(const char *p_cstr) {
copy_from(StrRange<char>::from_c_str(p_cstr));
}
void copy_from(const char *p_cstr, int p_clip_to) {
copy_from(StrRange(p_cstr, p_cstr ? _strlen_clipped(p_cstr, p_clip_to) : 0));
}
void copy_from(const char32_t *p_cstr) {
copy_from(StrRange<char32_t>::from_c_str(p_cstr));
}
void copy_from(const char32_t *p_cstr, int p_clip_to) {
copy_from(StrRange(p_cstr, p_cstr ? _strlen_clipped(p_cstr, p_clip_to) : 0));
}
// wchar_t copy_from depends on the platform.
void copy_from(const StrRange<wchar_t> &p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr.c_str, p_cstr.len);
#else
// wchar_t is 32-bit, copy directly
copy_from((StrRange<char32_t> &)p_cstr);
#endif
}
void copy_from(const wchar_t *p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr);
#endif
}
void copy_from(const wchar_t *p_cstr, int p_clip_to) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr, p_clip_to);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr, p_clip_to);
#endif
}
bool _base_is_subsequence_of(const String &p_string, bool case_insensitive) const;
int _count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const;
@ -227,6 +340,8 @@ public:
}
_FORCE_INLINE_ CharProxy<char32_t> operator[](int p_index) { return CharProxy<char32_t>(p_index, _cowdata); }
/* Compatibility Operators */
bool operator==(const String &p_str) const;
bool operator!=(const String &p_str) const;
String operator+(const String &p_str) const;
@ -238,16 +353,10 @@ public:
String &operator+=(const wchar_t *p_str);
String &operator+=(const char32_t *p_str);
/* Compatibility Operators */
void operator=(const char *p_str);
void operator=(const wchar_t *p_str);
void operator=(const char32_t *p_str);
bool operator==(const char *p_str) const;
bool operator==(const wchar_t *p_str) const;
bool operator==(const char32_t *p_str) const;
bool operator==(const StrRange &p_str_range) const;
bool operator==(const StrRange<char32_t> &p_str_range) const;
bool operator!=(const char *p_str) const;
bool operator!=(const wchar_t *p_str) const;
@ -493,13 +602,38 @@ public:
Vector<uint8_t> to_utf32_buffer() const;
Vector<uint8_t> to_wchar_buffer() const;
String(const char *p_str);
String(const wchar_t *p_str);
String(const char32_t *p_str);
String(const char *p_str, int p_clip_to_len);
String(const wchar_t *p_str, int p_clip_to_len);
String(const char32_t *p_str, int p_clip_to_len);
String(const StrRange &p_range);
// Constructors for NULL terminated C strings.
String(const char *p_cstr) {
copy_from(p_cstr);
}
String(const wchar_t *p_cstr) {
copy_from(p_cstr);
}
String(const char32_t *p_cstr) {
copy_from(p_cstr);
}
String(const char *p_cstr, int p_clip_to_len) {
copy_from(p_cstr, p_clip_to_len);
}
String(const wchar_t *p_cstr, int p_clip_to_len) {
copy_from(p_cstr, p_clip_to_len);
}
String(const char32_t *p_cstr, int p_clip_to_len) {
copy_from(p_cstr, p_clip_to_len);
}
// Copy assignment for NULL terminated C strings.
void operator=(const char *p_cstr) {
copy_from(p_cstr);
}
void operator=(const wchar_t *p_cstr) {
copy_from(p_cstr);
}
void operator=(const char32_t *p_cstr) {
copy_from(p_cstr);
}
explicit operator StrRange<char32_t>() const { return StrRange(get_data(), length()); }
};
bool operator==(const char *p_chr, const String &p_str);