Optimize escaping of JSON strings

There were quite a few places where we either had a non-NUL-terminated
string or a text Datum which we needed to call escape_json() on.  Many of
these places required that a temporary string was created due to the fact
that escape_json() needs a NUL-terminated cstring.  For text types, those
first had to be converted to cstring before calling escape_json() on them.

Here we introduce two new functions to make escaping JSON more optimal:

escape_json_text() can be given a text Datum to append onto the given
buffer.  This is more optimal as it foregoes the need to convert the text
Datum into a cstring.  A temporary allocation is only required if the text
Datum needs to be detoasted.

escape_json_with_len() can be used when the length of the cstring is
already known or the given string isn't NUL-terminated.  Having this
allows various places which were creating a temporary NUL-terminated
string to just call escape_json_with_len() without any temporary memory
allocations.

Discussion: https://postgr.es/m/CAApHDvpLXwMZvbCKcdGfU9XQjGCDm7tFpRdTXuB9PVgpNUYfEQ@mail.gmail.com
Reviewed-by: Melih Mutlu, Heikki Linnakangas
This commit is contained in:
David Rowley 2024-07-27 23:46:07 +12:00
parent 67427f1009
commit 17a5871d9d
7 changed files with 151 additions and 101 deletions

View File

@ -1343,23 +1343,20 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
int count = HS_COUNT(in); int count = HS_COUNT(in);
char *base = STRPTR(in); char *base = STRPTR(in);
HEntry *entries = ARRPTR(in); HEntry *entries = ARRPTR(in);
StringInfoData tmp, StringInfoData dst;
dst;
if (count == 0) if (count == 0)
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2)); PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
initStringInfo(&tmp);
initStringInfo(&dst); initStringInfo(&dst);
appendStringInfoChar(&dst, '{'); appendStringInfoChar(&dst, '{');
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ {
resetStringInfo(&tmp); escape_json_with_len(&dst,
appendBinaryStringInfo(&tmp, HSTORE_KEY(entries, base, i), HSTORE_KEY(entries, base, i),
HSTORE_KEYLEN(entries, i)); HSTORE_KEYLEN(entries, i));
escape_json(&dst, tmp.data);
appendStringInfoString(&dst, ": "); appendStringInfoString(&dst, ": ");
if (HSTORE_VALISNULL(entries, i)) if (HSTORE_VALISNULL(entries, i))
appendStringInfoString(&dst, "null"); appendStringInfoString(&dst, "null");
@ -1372,13 +1369,13 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
appendStringInfoString(&dst, "false"); appendStringInfoString(&dst, "false");
else else
{ {
resetStringInfo(&tmp); char *str = HSTORE_VAL(entries, base, i);
appendBinaryStringInfo(&tmp, HSTORE_VAL(entries, base, i), int len = HSTORE_VALLEN(entries, i);
HSTORE_VALLEN(entries, i));
if (IsValidJsonNumber(tmp.data, tmp.len)) if (IsValidJsonNumber(str, len))
appendBinaryStringInfo(&dst, tmp.data, tmp.len); appendBinaryStringInfo(&dst, str, len);
else else
escape_json(&dst, tmp.data); escape_json_with_len(&dst, str, len);
} }
if (i + 1 != count) if (i + 1 != count)
@ -1398,32 +1395,28 @@ hstore_to_json(PG_FUNCTION_ARGS)
int count = HS_COUNT(in); int count = HS_COUNT(in);
char *base = STRPTR(in); char *base = STRPTR(in);
HEntry *entries = ARRPTR(in); HEntry *entries = ARRPTR(in);
StringInfoData tmp, StringInfoData dst;
dst;
if (count == 0) if (count == 0)
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2)); PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
initStringInfo(&tmp);
initStringInfo(&dst); initStringInfo(&dst);
appendStringInfoChar(&dst, '{'); appendStringInfoChar(&dst, '{');
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ {
resetStringInfo(&tmp); escape_json_with_len(&dst,
appendBinaryStringInfo(&tmp, HSTORE_KEY(entries, base, i), HSTORE_KEY(entries, base, i),
HSTORE_KEYLEN(entries, i)); HSTORE_KEYLEN(entries, i));
escape_json(&dst, tmp.data);
appendStringInfoString(&dst, ": "); appendStringInfoString(&dst, ": ");
if (HSTORE_VALISNULL(entries, i)) if (HSTORE_VALISNULL(entries, i))
appendStringInfoString(&dst, "null"); appendStringInfoString(&dst, "null");
else else
{ {
resetStringInfo(&tmp); escape_json_with_len(&dst,
appendBinaryStringInfo(&tmp, HSTORE_VAL(entries, base, i), HSTORE_VAL(entries, base, i),
HSTORE_VALLEN(entries, i)); HSTORE_VALLEN(entries, i));
escape_json(&dst, tmp.data);
} }
if (i + 1 != count) if (i + 1 != count)

View File

@ -148,7 +148,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid,
pg_verify_mbstr(PG_UTF8, pathname, pathlen, true)) pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
{ {
appendStringInfoString(&buf, "{ \"Path\": "); appendStringInfoString(&buf, "{ \"Path\": ");
escape_json(&buf, pathname); escape_json_with_len(&buf, pathname, pathlen);
appendStringInfoString(&buf, ", "); appendStringInfoString(&buf, ", ");
} }
else else

View File

@ -23,6 +23,7 @@
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/date.h" #include "utils/date.h"
#include "utils/datetime.h" #include "utils/datetime.h"
#include "utils/fmgroids.h"
#include "utils/json.h" #include "utils/json.h"
#include "utils/jsonfuncs.h" #include "utils/jsonfuncs.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
@ -285,9 +286,16 @@ datum_to_json_internal(Datum val, bool is_null, StringInfo result,
pfree(jsontext); pfree(jsontext);
break; break;
default: default:
outputstr = OidOutputFunctionCall(outfuncoid, val); /* special-case text types to save useless palloc/memcpy cycles */
escape_json(result, outputstr); if (outfuncoid == F_TEXTOUT || outfuncoid == F_VARCHAROUT ||
pfree(outputstr); outfuncoid == F_BPCHAROUT)
escape_json_text(result, (text *) DatumGetPointer(val));
else
{
outputstr = OidOutputFunctionCall(outfuncoid, val);
escape_json(result, outputstr);
pfree(outputstr);
}
break; break;
} }
} }
@ -1391,7 +1399,6 @@ json_object(PG_FUNCTION_ARGS)
count, count,
i; i;
text *rval; text *rval;
char *v;
switch (ndims) switch (ndims)
{ {
@ -1434,19 +1441,16 @@ json_object(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null value not allowed for object key"))); errmsg("null value not allowed for object key")));
v = TextDatumGetCString(in_datums[i * 2]);
if (i > 0) if (i > 0)
appendStringInfoString(&result, ", "); appendStringInfoString(&result, ", ");
escape_json(&result, v); escape_json_text(&result, (text *) DatumGetPointer(in_datums[i * 2]));
appendStringInfoString(&result, " : "); appendStringInfoString(&result, " : ");
pfree(v);
if (in_nulls[i * 2 + 1]) if (in_nulls[i * 2 + 1])
appendStringInfoString(&result, "null"); appendStringInfoString(&result, "null");
else else
{ {
v = TextDatumGetCString(in_datums[i * 2 + 1]); escape_json_text(&result,
escape_json(&result, v); (text *) DatumGetPointer(in_datums[i * 2 + 1]));
pfree(v);
} }
} }
@ -1483,7 +1487,6 @@ json_object_two_arg(PG_FUNCTION_ARGS)
val_count, val_count,
i; i;
text *rval; text *rval;
char *v;
if (nkdims > 1 || nkdims != nvdims) if (nkdims > 1 || nkdims != nvdims)
ereport(ERROR, ereport(ERROR,
@ -1512,20 +1515,15 @@ json_object_two_arg(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null value not allowed for object key"))); errmsg("null value not allowed for object key")));
v = TextDatumGetCString(key_datums[i]);
if (i > 0) if (i > 0)
appendStringInfoString(&result, ", "); appendStringInfoString(&result, ", ");
escape_json(&result, v); escape_json_text(&result, (text *) DatumGetPointer(key_datums[i]));
appendStringInfoString(&result, " : "); appendStringInfoString(&result, " : ");
pfree(v);
if (val_nulls[i]) if (val_nulls[i])
appendStringInfoString(&result, "null"); appendStringInfoString(&result, "null");
else else
{ escape_json_text(&result,
v = TextDatumGetCString(val_datums[i]); (text *) DatumGetPointer(val_datums[i]));
escape_json(&result, v);
pfree(v);
}
} }
appendStringInfoChar(&result, '}'); appendStringInfoChar(&result, '}');
@ -1541,50 +1539,100 @@ json_object_two_arg(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(rval); PG_RETURN_TEXT_P(rval);
} }
/*
* escape_json_char
* Inline helper function for escape_json* functions
*/
static pg_attribute_always_inline void
escape_json_char(StringInfo buf, char c)
{
switch (c)
{
case '\b':
appendStringInfoString(buf, "\\b");
break;
case '\f':
appendStringInfoString(buf, "\\f");
break;
case '\n':
appendStringInfoString(buf, "\\n");
break;
case '\r':
appendStringInfoString(buf, "\\r");
break;
case '\t':
appendStringInfoString(buf, "\\t");
break;
case '"':
appendStringInfoString(buf, "\\\"");
break;
case '\\':
appendStringInfoString(buf, "\\\\");
break;
default:
if ((unsigned char) c < ' ')
appendStringInfo(buf, "\\u%04x", (int) c);
else
appendStringInfoCharMacro(buf, c);
break;
}
}
/* /*
* Produce a JSON string literal, properly escaping characters in the text. * escape_json
* Produce a JSON string literal, properly escaping the NUL-terminated
* cstring.
*/ */
void void
escape_json(StringInfo buf, const char *str) escape_json(StringInfo buf, const char *str)
{ {
const char *p; appendStringInfoCharMacro(buf, '"');
for (; *str != '\0'; str++)
escape_json_char(buf, *str);
appendStringInfoCharMacro(buf, '"'); appendStringInfoCharMacro(buf, '"');
for (p = str; *p; p++) }
{
switch (*p) /*
{ * escape_json_with_len
case '\b': * Produce a JSON string literal, properly escaping the possibly not
appendStringInfoString(buf, "\\b"); * NUL-terminated characters in 'str'. 'len' defines the number of bytes
break; * from 'str' to process.
case '\f': */
appendStringInfoString(buf, "\\f"); void
break; escape_json_with_len(StringInfo buf, const char *str, int len)
case '\n': {
appendStringInfoString(buf, "\\n");
break;
case '\r':
appendStringInfoString(buf, "\\r");
break;
case '\t':
appendStringInfoString(buf, "\\t");
break;
case '"':
appendStringInfoString(buf, "\\\"");
break;
case '\\':
appendStringInfoString(buf, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(buf, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(buf, *p);
break;
}
}
appendStringInfoCharMacro(buf, '"'); appendStringInfoCharMacro(buf, '"');
for (int i = 0; i < len; i++)
escape_json_char(buf, str[i]);
appendStringInfoCharMacro(buf, '"');
}
/*
* escape_json_text
* Append 'txt' onto 'buf' and escape using escape_json_with_len.
*
* This is more efficient than calling text_to_cstring and appending the
* result as that could require an additional palloc and memcpy.
*/
void
escape_json_text(StringInfo buf, const text *txt)
{
/* must cast away the const, unfortunately */
text *tunpacked = pg_detoast_datum_packed(unconstify(text *, txt));
int len = VARSIZE_ANY_EXHDR(tunpacked);
char *str;
str = VARDATA_ANY(tunpacked);
escape_json_with_len(buf, str, len);
/* pfree any detoasted values */
if (tunpacked != txt)
pfree(tunpacked);
} }
/* Semantic actions for key uniqueness check */ /* Semantic actions for key uniqueness check */

View File

@ -354,7 +354,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
appendBinaryStringInfo(out, "null", 4); appendBinaryStringInfo(out, "null", 4);
break; break;
case jbvString: case jbvString:
escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len)); escape_json_with_len(out, scalarVal->val.string.val, scalarVal->val.string.len);
break; break;
case jbvNumeric: case jbvNumeric:
appendStringInfoString(out, appendStringInfoString(out,

View File

@ -3133,18 +3133,6 @@ populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv,
json = jsv->val.json.str; json = jsv->val.json.str;
Assert(json); Assert(json);
if (len >= 0)
{
/* Need to copy non-null-terminated string */
str = palloc(len + 1 * sizeof(char));
memcpy(str, json, len);
str[len] = '\0';
}
else
{
/* string is already null-terminated */
str = unconstify(char *, json);
}
/* If converting to json/jsonb, make string into valid JSON literal */ /* If converting to json/jsonb, make string into valid JSON literal */
if ((typid == JSONOID || typid == JSONBOID) && if ((typid == JSONOID || typid == JSONBOID) &&
@ -3153,12 +3141,24 @@ populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv,
StringInfoData buf; StringInfoData buf;
initStringInfo(&buf); initStringInfo(&buf);
escape_json(&buf, str); if (len >= 0)
/* free temporary buffer */ escape_json_with_len(&buf, json, len);
if (str != json) else
pfree(str); escape_json(&buf, json);
str = buf.data; str = buf.data;
} }
else if (len >= 0)
{
/* create a NUL-terminated version */
str = palloc(len + 1);
memcpy(str, json, len);
str[len] = '\0';
}
else
{
/* string is already NUL-terminated */
str = unconstify(char *, json);
}
} }
else else
{ {
@ -5936,7 +5936,7 @@ transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype
{ {
text *out = _state->action(_state->action_state, token, strlen(token)); text *out = _state->action(_state->action_state, token, strlen(token));
escape_json(_state->strval, text_to_cstring(out)); escape_json_text(_state->strval, out);
} }
else else
appendStringInfoString(_state->strval, token); appendStringInfoString(_state->strval, token);

View File

@ -523,6 +523,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
{ {
JsonPathItem elem; JsonPathItem elem;
int i; int i;
int32 len;
char *str;
check_stack_depth(); check_stack_depth();
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
@ -533,7 +535,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
appendStringInfoString(buf, "null"); appendStringInfoString(buf, "null");
break; break;
case jpiString: case jpiString:
escape_json(buf, jspGetString(v, NULL)); str = jspGetString(v, &len);
escape_json_with_len(buf, str, len);
break; break;
case jpiNumeric: case jpiNumeric:
if (jspHasNext(v)) if (jspHasNext(v))
@ -662,7 +665,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
case jpiKey: case jpiKey:
if (inKey) if (inKey)
appendStringInfoChar(buf, '.'); appendStringInfoChar(buf, '.');
escape_json(buf, jspGetString(v, NULL)); str = jspGetString(v, &len);
escape_json_with_len(buf, str, len);
break; break;
case jpiCurrent: case jpiCurrent:
Assert(!inKey); Assert(!inKey);
@ -674,7 +678,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
break; break;
case jpiVariable: case jpiVariable:
appendStringInfoChar(buf, '$'); appendStringInfoChar(buf, '$');
escape_json(buf, jspGetString(v, NULL)); str = jspGetString(v, &len);
escape_json_with_len(buf, str, len);
break; break;
case jpiFilter: case jpiFilter:
appendStringInfoString(buf, "?("); appendStringInfoString(buf, "?(");
@ -732,7 +737,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
appendStringInfoString(buf, " like_regex "); appendStringInfoString(buf, " like_regex ");
escape_json(buf, v->content.like_regex.pattern); escape_json_with_len(buf,
v->content.like_regex.pattern,
v->content.like_regex.patternlen);
if (v->content.like_regex.flags) if (v->content.like_regex.flags)
{ {

View File

@ -18,6 +18,8 @@
/* functions in json.c */ /* functions in json.c */
extern void escape_json(StringInfo buf, const char *str); extern void escape_json(StringInfo buf, const char *str);
extern void escape_json_with_len(StringInfo buf, const char *str, int len);
extern void escape_json_text(StringInfo buf, const text *txt);
extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid, extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
const int *tzp); const int *tzp);
extern bool to_json_is_immutable(Oid typoid); extern bool to_json_is_immutable(Oid typoid);