mirror of
https://github.com/godotengine/godot.git
synced 2025-03-07 23:32:58 +08:00
Merge pull request #73973 from dalexeev/fix-regex-sub
RegEx: Fix handling of unset/unknown capture groups
This commit is contained in:
commit
867806954f
@ -289,25 +289,17 @@ TypedArray<RegExMatch> RegEx::search_all(const String &p_subject, int p_offset,
|
||||
return result;
|
||||
}
|
||||
|
||||
String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_all, int p_offset, int p_end) const {
|
||||
ERR_FAIL_COND_V(!is_valid(), String());
|
||||
ERR_FAIL_COND_V_MSG(p_offset < 0, String(), "RegEx sub offset must be >= 0");
|
||||
|
||||
// safety_zone is the number of chars we allocate in addition to the number of chars expected in order to
|
||||
// guard against the PCRE API writing one additional \0 at the end. PCRE's API docs are unclear on whether
|
||||
// PCRE understands outlength in pcre2_substitute() as counting an implicit additional terminating char or
|
||||
// not. always allocating one char more than telling PCRE has us on the safe side.
|
||||
int RegEx::_sub(const String &p_subject, const String &p_replacement, int p_offset, int p_end, uint32_t p_flags, String &r_output) const {
|
||||
// `safety_zone` is the number of chars we allocate in addition to the number of chars expected in order to
|
||||
// guard against the PCRE API writing one additional `\0` at the end. PCRE's API docs are unclear on whether
|
||||
// PCRE understands outlength in `pcre2_substitute(`) as counting an implicit additional terminating char or
|
||||
// not. Always allocating one char more than telling PCRE has us on the safe side.
|
||||
const int safety_zone = 1;
|
||||
|
||||
PCRE2_SIZE olength = p_subject.length() + 1; // space for output string and one terminating \0 character
|
||||
PCRE2_SIZE olength = p_subject.length() + 1; // Space for output string and one terminating `\0` character.
|
||||
Vector<char32_t> output;
|
||||
output.resize(olength + safety_zone);
|
||||
|
||||
uint32_t flags = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
|
||||
if (p_all) {
|
||||
flags |= PCRE2_SUBSTITUTE_GLOBAL;
|
||||
}
|
||||
|
||||
PCRE2_SIZE length = p_subject.length();
|
||||
if (p_end >= 0 && (uint32_t)p_end < length) {
|
||||
length = p_end;
|
||||
@ -322,22 +314,49 @@ String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_a
|
||||
|
||||
pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx);
|
||||
|
||||
int res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||
int res = pcre2_substitute_32(c, s, length, p_offset, p_flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||
|
||||
if (res == PCRE2_ERROR_NOMEMORY) {
|
||||
output.resize(olength + safety_zone);
|
||||
o = (PCRE2_UCHAR32 *)output.ptrw();
|
||||
res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||
res = pcre2_substitute_32(c, s, length, p_offset, p_flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||
}
|
||||
|
||||
pcre2_match_data_free_32(match);
|
||||
pcre2_match_context_free_32(mctx);
|
||||
|
||||
if (res < 0) {
|
||||
return String();
|
||||
if (res >= 0) {
|
||||
r_output = String(output.ptr(), olength) + p_subject.substr(length);
|
||||
}
|
||||
|
||||
return String(output.ptr(), olength) + p_subject.substr(length);
|
||||
return res;
|
||||
}
|
||||
|
||||
String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_all, int p_offset, int p_end) const {
|
||||
ERR_FAIL_COND_V(!is_valid(), String());
|
||||
ERR_FAIL_COND_V_MSG(p_offset < 0, String(), "RegEx sub offset must be >= 0");
|
||||
|
||||
uint32_t flags = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_UNSET_EMPTY;
|
||||
if (p_all) {
|
||||
flags |= PCRE2_SUBSTITUTE_GLOBAL;
|
||||
}
|
||||
|
||||
String output;
|
||||
const int res = _sub(p_subject, p_replacement, p_offset, p_end, flags, output);
|
||||
|
||||
if (res < 0) {
|
||||
PCRE2_UCHAR32 buf[256];
|
||||
pcre2_get_error_message_32(res, buf, 256);
|
||||
String message = "PCRE2 Error: " + String((const char32_t *)buf);
|
||||
ERR_PRINT(message.utf8());
|
||||
|
||||
if (res == PCRE2_ERROR_NOSUBSTRING) {
|
||||
flags |= PCRE2_SUBSTITUTE_UNKNOWN_UNSET;
|
||||
_sub(p_subject, p_replacement, p_offset, p_end, flags, output);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
bool RegEx::is_valid() const {
|
||||
|
@ -78,6 +78,8 @@ class RegEx : public RefCounted {
|
||||
|
||||
void _pattern_info(uint32_t what, void *where) const;
|
||||
|
||||
int _sub(const String &p_subject, const String &p_replacement, int p_offset, int p_end, uint32_t p_flags, String &r_output) const;
|
||||
|
||||
protected:
|
||||
static void _bind_methods();
|
||||
|
||||
|
@ -145,6 +145,15 @@ TEST_CASE("[RegEx] Substitution") {
|
||||
CHECK(re5.sub(s5, "cc", true, 0, 2) == "ccccaa");
|
||||
CHECK(re5.sub(s5, "cc", true, 1, 3) == "acccca");
|
||||
CHECK(re5.sub(s5, "", true, 0, 2) == "aa");
|
||||
|
||||
const String s6 = "property get_property set_property";
|
||||
|
||||
RegEx re6("(get_|set_)?property");
|
||||
REQUIRE(re6.is_valid());
|
||||
CHECK(re6.sub(s6, "$1new_property", true) == "new_property get_new_property set_new_property");
|
||||
ERR_PRINT_OFF;
|
||||
CHECK(re6.sub(s6, "$5new_property", true) == "new_property new_property new_property");
|
||||
ERR_PRINT_ON;
|
||||
}
|
||||
|
||||
TEST_CASE("[RegEx] Substitution with empty input and/or replacement") {
|
||||
|
Loading…
Reference in New Issue
Block a user