Convert jsonpath's input function to report errors softly

Reviewed by Tom Lane

Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net
This commit is contained in:
Andrew Dunstan 2022-12-24 15:19:14 -05:00
parent 780ec9f1b2
commit e37fe1db6e
8 changed files with 323 additions and 126 deletions

View File

@ -66,16 +66,19 @@
#include "funcapi.h" #include "funcapi.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "libpq/pqformat.h" #include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/json.h" #include "utils/json.h"
#include "utils/jsonpath.h" #include "utils/jsonpath.h"
static Datum jsonPathFromCstring(char *in, int len); static Datum jsonPathFromCstring(char *in, int len, struct Node *escontext);
static char *jsonPathToCstring(StringInfo out, JsonPath *in, static char *jsonPathToCstring(StringInfo out, JsonPath *in,
int estimated_len); int estimated_len);
static int flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item, static bool flattenJsonPathParseItem(StringInfo buf, int *result,
struct Node *escontext,
JsonPathParseItem *item,
int nestingLevel, bool insideArraySubscript); int nestingLevel, bool insideArraySubscript);
static void alignStringInfoInt(StringInfo buf); static void alignStringInfoInt(StringInfo buf);
static int32 reserveSpaceForItemPointer(StringInfo buf); static int32 reserveSpaceForItemPointer(StringInfo buf);
@ -95,7 +98,7 @@ jsonpath_in(PG_FUNCTION_ARGS)
char *in = PG_GETARG_CSTRING(0); char *in = PG_GETARG_CSTRING(0);
int len = strlen(in); int len = strlen(in);
return jsonPathFromCstring(in, len); return jsonPathFromCstring(in, len, fcinfo->context);
} }
/* /*
@ -119,7 +122,7 @@ jsonpath_recv(PG_FUNCTION_ARGS)
else else
elog(ERROR, "unsupported jsonpath version number: %d", version); elog(ERROR, "unsupported jsonpath version number: %d", version);
return jsonPathFromCstring(str, nbytes); return jsonPathFromCstring(str, nbytes, NULL);
} }
/* /*
@ -165,24 +168,29 @@ jsonpath_send(PG_FUNCTION_ARGS)
* representation of jsonpath. * representation of jsonpath.
*/ */
static Datum static Datum
jsonPathFromCstring(char *in, int len) jsonPathFromCstring(char *in, int len, struct Node *escontext)
{ {
JsonPathParseResult *jsonpath = parsejsonpath(in, len); JsonPathParseResult *jsonpath = parsejsonpath(in, len, escontext);
JsonPath *res; JsonPath *res;
StringInfoData buf; StringInfoData buf;
if (SOFT_ERROR_OCCURRED(escontext))
return (Datum) 0;
if (!jsonpath)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"", "jsonpath",
in)));
initStringInfo(&buf); initStringInfo(&buf);
enlargeStringInfo(&buf, 4 * len /* estimation */ ); enlargeStringInfo(&buf, 4 * len /* estimation */ );
appendStringInfoSpaces(&buf, JSONPATH_HDRSZ); appendStringInfoSpaces(&buf, JSONPATH_HDRSZ);
if (!jsonpath) if (!flattenJsonPathParseItem(&buf, NULL, escontext,
ereport(ERROR, jsonpath->expr, 0, false))
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), return (Datum) 0;
errmsg("invalid input syntax for type %s: \"%s\"", "jsonpath",
in)));
flattenJsonPathParseItem(&buf, jsonpath->expr, 0, false);
res = (JsonPath *) buf.data; res = (JsonPath *) buf.data;
SET_VARSIZE(res, buf.len); SET_VARSIZE(res, buf.len);
@ -225,9 +233,10 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len)
* Recursive function converting given jsonpath parse item and all its * Recursive function converting given jsonpath parse item and all its
* children into a binary representation. * children into a binary representation.
*/ */
static int static bool
flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item, flattenJsonPathParseItem(StringInfo buf, int *result, struct Node *escontext,
int nestingLevel, bool insideArraySubscript) JsonPathParseItem *item, int nestingLevel,
bool insideArraySubscript)
{ {
/* position from beginning of jsonpath data */ /* position from beginning of jsonpath data */
int32 pos = buf->len - JSONPATH_HDRSZ; int32 pos = buf->len - JSONPATH_HDRSZ;
@ -295,16 +304,22 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
int32 left = reserveSpaceForItemPointer(buf); int32 left = reserveSpaceForItemPointer(buf);
int32 right = reserveSpaceForItemPointer(buf); int32 right = reserveSpaceForItemPointer(buf);
chld = !item->value.args.left ? pos : if (!item->value.args.left)
flattenJsonPathParseItem(buf, item->value.args.left, chld = pos;
else if (! flattenJsonPathParseItem(buf, &chld, escontext,
item->value.args.left,
nestingLevel + argNestingLevel, nestingLevel + argNestingLevel,
insideArraySubscript); insideArraySubscript))
return false;
*(int32 *) (buf->data + left) = chld - pos; *(int32 *) (buf->data + left) = chld - pos;
chld = !item->value.args.right ? pos : if (!item->value.args.right)
flattenJsonPathParseItem(buf, item->value.args.right, chld = pos;
else if (! flattenJsonPathParseItem(buf, &chld, escontext,
item->value.args.right,
nestingLevel + argNestingLevel, nestingLevel + argNestingLevel,
insideArraySubscript); insideArraySubscript))
return false;
*(int32 *) (buf->data + right) = chld - pos; *(int32 *) (buf->data + right) = chld - pos;
} }
break; break;
@ -323,9 +338,11 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
item->value.like_regex.patternlen); item->value.like_regex.patternlen);
appendStringInfoChar(buf, '\0'); appendStringInfoChar(buf, '\0');
chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr, if (! flattenJsonPathParseItem(buf, &chld, escontext,
item->value.like_regex.expr,
nestingLevel, nestingLevel,
insideArraySubscript); insideArraySubscript))
return false;
*(int32 *) (buf->data + offs) = chld - pos; *(int32 *) (buf->data + offs) = chld - pos;
} }
break; break;
@ -341,10 +358,13 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
{ {
int32 arg = reserveSpaceForItemPointer(buf); int32 arg = reserveSpaceForItemPointer(buf);
chld = !item->value.arg ? pos : if (!item->value.arg)
flattenJsonPathParseItem(buf, item->value.arg, chld = pos;
else if (! flattenJsonPathParseItem(buf, &chld, escontext,
item->value.arg,
nestingLevel + argNestingLevel, nestingLevel + argNestingLevel,
insideArraySubscript); insideArraySubscript))
return false;
*(int32 *) (buf->data + arg) = chld - pos; *(int32 *) (buf->data + arg) = chld - pos;
} }
break; break;
@ -357,13 +377,13 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
break; break;
case jpiCurrent: case jpiCurrent:
if (nestingLevel <= 0) if (nestingLevel <= 0)
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("@ is not allowed in root expressions"))); errmsg("@ is not allowed in root expressions")));
break; break;
case jpiLast: case jpiLast:
if (!insideArraySubscript) if (!insideArraySubscript)
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("LAST is allowed only in array subscripts"))); errmsg("LAST is allowed only in array subscripts")));
break; break;
@ -383,15 +403,22 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
{ {
int32 *ppos; int32 *ppos;
int32 topos; int32 topos;
int32 frompos = int32 frompos;
flattenJsonPathParseItem(buf,
if (! flattenJsonPathParseItem(buf, &frompos, escontext,
item->value.array.elems[i].from, item->value.array.elems[i].from,
nestingLevel, true) - pos; nestingLevel, true))
return false;
frompos -= pos;
if (item->value.array.elems[i].to) if (item->value.array.elems[i].to)
topos = flattenJsonPathParseItem(buf, {
if (! flattenJsonPathParseItem(buf, &topos, escontext,
item->value.array.elems[i].to, item->value.array.elems[i].to,
nestingLevel, true) - pos; nestingLevel, true))
return false;
topos -= pos;
}
else else
topos = 0; topos = 0;
@ -424,12 +451,17 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
if (item->next) if (item->next)
{ {
chld = flattenJsonPathParseItem(buf, item->next, nestingLevel, if (! flattenJsonPathParseItem(buf, &chld, escontext,
insideArraySubscript) - pos; item->next, nestingLevel,
insideArraySubscript))
return false;
chld -= pos;
*(int32 *) (buf->data + next) = chld; *(int32 *) (buf->data + next) = chld;
} }
return pos; if (result)
*result = pos;
return true;
} }
/* /*

View File

@ -1721,7 +1721,8 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
cxt->regex = cxt->regex =
cstring_to_text_with_len(jsp->content.like_regex.pattern, cstring_to_text_with_len(jsp->content.like_regex.pattern,
jsp->content.like_regex.patternlen); jsp->content.like_regex.patternlen);
cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags); (void) jspConvertRegexFlags(jsp->content.like_regex.flags,
&(cxt->cflags), NULL);
} }
if (RE_compile_and_execute(cxt->regex, str->val.string.val, if (RE_compile_and_execute(cxt->regex, str->val.string.val,

View File

@ -38,9 +38,11 @@ static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
static JsonPathParseItem *makeItemList(List *list); static JsonPathParseItem *makeItemList(List *list);
static JsonPathParseItem *makeIndexArray(List *list); static JsonPathParseItem *makeIndexArray(List *list);
static JsonPathParseItem *makeAny(int first, int last); static JsonPathParseItem *makeAny(int first, int last);
static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr, static bool makeItemLikeRegex(JsonPathParseItem *expr,
JsonPathString *pattern, JsonPathString *pattern,
JsonPathString *flags); JsonPathString *flags,
JsonPathParseItem ** result,
struct Node *escontext);
/* /*
* Bison doesn't allocate anything that needs to live across parser calls, * Bison doesn't allocate anything that needs to live across parser calls,
@ -57,6 +59,9 @@ static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
%expect 0 %expect 0
%name-prefix="jsonpath_yy" %name-prefix="jsonpath_yy"
%parse-param {JsonPathParseResult **result} %parse-param {JsonPathParseResult **result}
%parse-param {struct Node *escontext}
%lex-param {JsonPathParseResult **result}
%lex-param {struct Node *escontext}
%union %union
{ {
@ -163,9 +168,20 @@ predicate:
{ $$ = makeItemUnary(jpiIsUnknown, $2); } { $$ = makeItemUnary(jpiIsUnknown, $2); }
| expr STARTS_P WITH_P starts_with_initial | expr STARTS_P WITH_P starts_with_initial
{ $$ = makeItemBinary(jpiStartsWith, $1, $4); } { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
| expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); } | expr LIKE_REGEX_P STRING_P
{
JsonPathParseItem *jppitem;
if (! makeItemLikeRegex($1, &$3, NULL, &jppitem, escontext))
YYABORT;
$$ = jppitem;
}
| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
{ $$ = makeItemLikeRegex($1, &$3, &$5); } {
JsonPathParseItem *jppitem;
if (! makeItemLikeRegex($1, &$3, &$5, &jppitem, escontext))
YYABORT;
$$ = jppitem;
}
; ;
starts_with_initial: starts_with_initial:
@ -472,9 +488,10 @@ makeAny(int first, int last)
return v; return v;
} }
static JsonPathParseItem * static bool
makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
JsonPathString *flags) JsonPathString *flags, JsonPathParseItem ** result,
struct Node *escontext)
{ {
JsonPathParseItem *v = makeItemType(jpiLikeRegex); JsonPathParseItem *v = makeItemType(jpiLikeRegex);
int i; int i;
@ -506,7 +523,7 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
v->value.like_regex.flags |= JSP_REGEX_QUOTE; v->value.like_regex.flags |= JSP_REGEX_QUOTE;
break; break;
default: default:
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.", errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
@ -515,22 +532,48 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
} }
} }
/* Convert flags to what RE_compile_and_cache needs */ /* Convert flags to what pg_regcomp needs */
cflags = jspConvertRegexFlags(v->value.like_regex.flags); if ( !jspConvertRegexFlags(v->value.like_regex.flags, &cflags, escontext))
return false;
/* check regex validity */ /* check regex validity */
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val, {
pattern->len), regex_t re_tmp;
cflags, DEFAULT_COLLATION_OID); pg_wchar *wpattern;
int wpattern_len;
int re_result;
return v; wpattern = (pg_wchar *) palloc((pattern->len + 1) * sizeof(pg_wchar));
wpattern_len = pg_mb2wchar_with_len(pattern->val,
wpattern,
pattern->len);
if ((re_result = pg_regcomp(&re_tmp, wpattern, wpattern_len, cflags,
DEFAULT_COLLATION_OID)) != REG_OKAY)
{
char errMsg[100];
/* See regexp.c for explanation */
CHECK_FOR_INTERRUPTS();
pg_regerror(re_result, &re_tmp, errMsg, sizeof(errMsg));
ereturn(escontext, false,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression: %s", errMsg)));
}
pg_regfree(&re_tmp);
}
*result = v;
return true;
} }
/* /*
* Convert from XQuery regex flags to those recognized by our regex library. * Convert from XQuery regex flags to those recognized by our regex library.
*/ */
int bool
jspConvertRegexFlags(uint32 xflags) jspConvertRegexFlags(uint32 xflags, int *result, struct Node *escontext)
{ {
/* By default, XQuery is very nearly the same as Spencer's AREs */ /* By default, XQuery is very nearly the same as Spencer's AREs */
int cflags = REG_ADVANCED; int cflags = REG_ADVANCED;
@ -561,18 +604,12 @@ jspConvertRegexFlags(uint32 xflags)
* XQuery-style ignore-whitespace mode. * XQuery-style ignore-whitespace mode.
*/ */
if (xflags & JSP_REGEX_WSPACE) if (xflags & JSP_REGEX_WSPACE)
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented"))); errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
} }
/* *result = cflags;
* We'll never need sub-match details at execution. While
* RE_compile_and_execute would set this flag anyway, force it on here to
* ensure that the regex cache entries created by makeItemLikeRegex are
* useful.
*/
cflags |= REG_NOSUB;
return cflags; return true;
} }

View File

@ -25,8 +25,14 @@ typedef struct JsonPathString
#include "utils/jsonpath.h" #include "utils/jsonpath.h"
#include "jsonpath_gram.h" #include "jsonpath_gram.h"
extern int jsonpath_yylex(YYSTYPE *yylval_param); #define YY_DECL extern int jsonpath_yylex(YYSTYPE *yylval_param, \
extern int jsonpath_yyparse(JsonPathParseResult **result); JsonPathParseResult **result, \
extern void jsonpath_yyerror(JsonPathParseResult **result, const char *message); struct Node *escontext)
YY_DECL;
extern int jsonpath_yyparse(JsonPathParseResult **result,
struct Node *escontext);
extern void jsonpath_yyerror(JsonPathParseResult **result,
struct Node *escontext,
const char *message);
#endif /* JSONPATH_INTERNAL_H */ #endif /* JSONPATH_INTERNAL_H */

View File

@ -25,6 +25,7 @@
#include "jsonpath_gram.h" #include "jsonpath_gram.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
#include "nodes/miscnodes.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
} }
@ -39,8 +40,8 @@ static int scanbuflen;
static void addstring(bool init, char *s, int l); static void addstring(bool init, char *s, int l);
static void addchar(bool init, char c); static void addchar(bool init, char c);
static enum yytokentype checkKeyword(void); static enum yytokentype checkKeyword(void);
static void parseUnicode(char *s, int l); static bool parseUnicode(char *s, int l, struct Node *escontext);
static void parseHexChar(char *s); static bool parseHexChar(char *s, struct Node *escontext);
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf #undef fprintf
@ -147,25 +148,48 @@ hex_fail \\x{hex_dig}{0,1}
<xnq,xq,xvq>\\v { addchar(false, '\v'); } <xnq,xq,xvq>\\v { addchar(false, '\v'); }
<xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); } <xnq,xq,xvq>{unicode}+ {
if (!parseUnicode(yytext, yyleng, escontext))
yyterminate();
}
<xnq,xq,xvq>{hex_char} { parseHexChar(yytext); } <xnq,xq,xvq>{hex_char} {
if (!parseHexChar(yytext, escontext))
yyterminate();
}
<xnq,xq,xvq>{unicode}*{unicodefail} { jsonpath_yyerror(NULL, "invalid unicode sequence"); } <xnq,xq,xvq>{unicode}*{unicodefail} {
jsonpath_yyerror(NULL, escontext,
"invalid unicode sequence");
yyterminate();
}
<xnq,xq,xvq>{hex_fail} { jsonpath_yyerror(NULL, "invalid hex character sequence"); } <xnq,xq,xvq>{hex_fail} {
jsonpath_yyerror(NULL, escontext,
"invalid hex character sequence");
yyterminate();
}
<xnq,xq,xvq>{unicode}+\\ { <xnq,xq,xvq>{unicode}+\\ {
/* throw back the \\, and treat as unicode */ /* throw back the \\, and treat as unicode */
yyless(yyleng - 1); yyless(yyleng - 1);
parseUnicode(yytext, yyleng); if (!parseUnicode(yytext, yyleng, escontext))
yyterminate();
} }
<xnq,xq,xvq>\\. { addchar(false, yytext[1]); } <xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
<xnq,xq,xvq>\\ { jsonpath_yyerror(NULL, "unexpected end after backslash"); } <xnq,xq,xvq>\\ {
jsonpath_yyerror(NULL, escontext,
"unexpected end after backslash");
yyterminate();
}
<xq,xvq><<EOF>> { jsonpath_yyerror(NULL, "unexpected end of quoted string"); } <xq,xvq><<EOF>> {
jsonpath_yyerror(NULL, escontext,
"unexpected end of quoted string");
yyterminate();
}
<xq>\" { <xq>\" {
yylval->str = scanstring; yylval->str = scanstring;
@ -187,8 +211,12 @@ hex_fail \\x{hex_dig}{0,1}
<xc>\* { } <xc>\* { }
<xc><<EOF>> { jsonpath_yyerror(NULL, "unexpected end of comment"); } <xc><<EOF>> {
jsonpath_yyerror(
NULL, escontext,
"unexpected end of comment");
yyterminate();
}
\&\& { return AND_P; } \&\& { return AND_P; }
\|\| { return OR_P; } \|\| { return OR_P; }
@ -253,11 +281,30 @@ hex_fail \\x{hex_dig}{0,1}
return INT_P; return INT_P;
} }
{realfail} { jsonpath_yyerror(NULL, "invalid numeric literal"); } {realfail} {
{integer_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); } jsonpath_yyerror(
{decimal_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); } NULL, escontext,
{real_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); } "invalid numeric literal");
yyterminate();
}
{integer_junk} {
jsonpath_yyerror(
NULL, escontext,
"trailing junk after numeric literal");
yyterminate();
}
{decimal_junk} {
jsonpath_yyerror(
NULL, escontext,
"trailing junk after numeric literal");
yyterminate();
}
{real_junk} {
jsonpath_yyerror(
NULL, escontext,
"trailing junk after numeric literal");
yyterminate();
}
\" { \" {
addchar(true, '\0'); addchar(true, '\0');
BEGIN xq; BEGIN xq;
@ -281,18 +328,23 @@ hex_fail \\x{hex_dig}{0,1}
/* LCOV_EXCL_STOP */ /* LCOV_EXCL_STOP */
void void
jsonpath_yyerror(JsonPathParseResult **result, const char *message) jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
const char *message)
{ {
/* don't overwrite escontext if it's already been set */
if (SOFT_ERROR_OCCURRED(escontext))
return;
if (*yytext == YY_END_OF_BUFFER_CHAR) if (*yytext == YY_END_OF_BUFFER_CHAR)
{ {
ereport(ERROR, errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
/* translator: %s is typically "syntax error" */ /* translator: %s is typically "syntax error" */
errmsg("%s at end of jsonpath input", _(message)))); errmsg("%s at end of jsonpath input", _(message))));
} }
else else
{ {
ereport(ERROR, errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
/* translator: first %s is typically "syntax error" */ /* translator: first %s is typically "syntax error" */
errmsg("%s at or near \"%s\" of jsonpath input", errmsg("%s at or near \"%s\" of jsonpath input",
@ -463,14 +515,14 @@ addchar(bool init, char c)
/* Interface to jsonpath parser */ /* Interface to jsonpath parser */
JsonPathParseResult * JsonPathParseResult *
parsejsonpath(const char *str, int len) parsejsonpath(const char *str, int len, struct Node *escontext)
{ {
JsonPathParseResult *parseresult; JsonPathParseResult *parseresult;
jsonpath_scanner_init(str, len); jsonpath_scanner_init(str, len);
if (jsonpath_yyparse((void *) &parseresult) != 0) if (jsonpath_yyparse((void *) &parseresult, escontext) != 0)
jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */ jsonpath_yyerror(NULL, escontext, "bogus input"); /* shouldn't happen */
jsonpath_scanner_finish(); jsonpath_scanner_finish();
@ -478,27 +530,36 @@ parsejsonpath(const char *str, int len)
} }
/* Turn hex character into integer */ /* Turn hex character into integer */
static int static bool
hexval(char c) hexval(char c, int *result, struct Node *escontext)
{ {
if (c >= '0' && c <= '9') if (c >= '0' && c <= '9')
return c - '0'; {
*result = c - '0';
return true;
}
if (c >= 'a' && c <= 'f') if (c >= 'a' && c <= 'f')
return c - 'a' + 0xA; {
*result = c - 'a' + 0xA;
return true;
}
if (c >= 'A' && c <= 'F') if (c >= 'A' && c <= 'F')
return c - 'A' + 0xA; {
jsonpath_yyerror(NULL, "invalid hexadecimal digit"); *result = c - 'A' + 0xA;
return 0; /* not reached */ return true;
}
jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
return false;
} }
/* Add given unicode character to scanstring */ /* Add given unicode character to scanstring */
static void static bool
addUnicodeChar(int ch) addUnicodeChar(int ch, struct Node *escontext)
{ {
if (ch == 0) if (ch == 0)
{ {
/* We can't allow this, since our TEXT type doesn't */ /* We can't allow this, since our TEXT type doesn't */
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"), errmsg("unsupported Unicode escape sequence"),
errdetail("\\u0000 cannot be converted to text."))); errdetail("\\u0000 cannot be converted to text.")));
@ -507,30 +568,42 @@ addUnicodeChar(int ch)
{ {
char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
/*
* If we're trapping the error status, call the noerror form of the
* conversion function. Otherwise call the normal form which provides
* more detailed errors.
*/
if (! escontext || ! IsA(escontext, ErrorSaveContext))
pg_unicode_to_server(ch, (unsigned char *) cbuf); pg_unicode_to_server(ch, (unsigned char *) cbuf);
else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
ereturn(escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("could not convert unicode to server encoding")));
addstring(false, cbuf, strlen(cbuf)); addstring(false, cbuf, strlen(cbuf));
} }
return true;
} }
/* Add unicode character, processing any surrogate pairs */ /* Add unicode character, processing any surrogate pairs */
static void static bool
addUnicode(int ch, int *hi_surrogate) addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
{ {
if (is_utf16_surrogate_first(ch)) if (is_utf16_surrogate_first(ch))
{ {
if (*hi_surrogate != -1) if (*hi_surrogate != -1)
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode high surrogate must not follow " errdetail("Unicode high surrogate must not follow "
"a high surrogate."))); "a high surrogate.")));
*hi_surrogate = ch; *hi_surrogate = ch;
return; return true;
} }
else if (is_utf16_surrogate_second(ch)) else if (is_utf16_surrogate_second(ch))
{ {
if (*hi_surrogate == -1) if (*hi_surrogate == -1)
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high " errdetail("Unicode low surrogate must follow a high "
@ -540,22 +613,22 @@ addUnicode(int ch, int *hi_surrogate)
} }
else if (*hi_surrogate != -1) else if (*hi_surrogate != -1)
{ {
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high " errdetail("Unicode low surrogate must follow a high "
"surrogate."))); "surrogate.")));
} }
addUnicodeChar(ch); return addUnicodeChar(ch, escontext);
} }
/* /*
* parseUnicode was adopted from json_lex_string() in * parseUnicode was adopted from json_lex_string() in
* src/backend/utils/adt/json.c * src/backend/utils/adt/json.c
*/ */
static void static bool
parseUnicode(char *s, int l) parseUnicode(char *s, int l, struct Node *escontext)
{ {
int i = 2; int i = 2;
int hi_surrogate = -1; int hi_surrogate = -1;
@ -563,41 +636,57 @@ parseUnicode(char *s, int l)
for (i = 2; i < l; i += 2) /* skip '\u' */ for (i = 2; i < l; i += 2) /* skip '\u' */
{ {
int ch = 0; int ch = 0;
int j; int j, si;
if (s[i] == '{') /* parse '\u{XX...}' */ if (s[i] == '{') /* parse '\u{XX...}' */
{ {
while (s[++i] != '}' && i < l) while (s[++i] != '}' && i < l)
ch = (ch << 4) | hexval(s[i]); {
if (!hexval(s[i], &si, escontext))
return false;
ch = (ch << 4) | si;
}
i++; /* skip '}' */ i++; /* skip '}' */
} }
else /* parse '\uXXXX' */ else /* parse '\uXXXX' */
{ {
for (j = 0; j < 4 && i < l; j++) for (j = 0; j < 4 && i < l; j++)
ch = (ch << 4) | hexval(s[i++]); {
if (!hexval(s[i++], &si, escontext))
return false;
ch = (ch << 4) | si;
}
} }
addUnicode(ch, &hi_surrogate); if (! addUnicode(ch, &hi_surrogate, escontext))
return false;
} }
if (hi_surrogate != -1) if (hi_surrogate != -1)
{ {
ereport(ERROR, ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high " errdetail("Unicode low surrogate must follow a high "
"surrogate."))); "surrogate.")));
} }
return true;
} }
/* Parse sequence of hex-encoded characters */ /* Parse sequence of hex-encoded characters */
static void static bool
parseHexChar(char *s) parseHexChar(char *s, struct Node *escontext)
{ {
int ch = (hexval(s[2]) << 4) | int s2, s3, ch;
hexval(s[3]); if (!hexval(s[2], &s2, escontext))
return false;
if (!hexval(s[3], &s3, escontext))
return false;
addUnicodeChar(ch); ch = (s2 << 4) | s3;
return addUnicodeChar(ch, escontext);
} }
/* /*

View File

@ -254,8 +254,11 @@ typedef struct JsonPathParseResult
bool lax; bool lax;
} JsonPathParseResult; } JsonPathParseResult;
extern JsonPathParseResult *parsejsonpath(const char *str, int len); extern JsonPathParseResult *parsejsonpath(const char *str, int len,
struct Node *escontext);
extern bool jspConvertRegexFlags(uint32 xflags, int *result,
struct Node *escontext);
extern int jspConvertRegexFlags(uint32 xflags);
#endif #endif

View File

@ -1032,3 +1032,21 @@ select '1?(2>3)'::jsonpath;
(1)?(2 > 3) (1)?(2 > 3)
(1 row) (1 row)
-- test non-error-throwing API
SELECT str as jsonpath,
pg_input_is_valid(str,'jsonpath') as ok,
pg_input_error_message(str,'jsonpath') as errmsg
FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text,
'$ ? (@ like_regex "pattern" flag "a")',
'@ + 1',
'00',
'1a']) str;
jsonpath | ok | errmsg
-------------------------------------------+----+-----------------------------------------------------------------------
$ ? (@ like_regex "pattern" flag "smixq") | t |
$ ? (@ like_regex "pattern" flag "a") | f | invalid input syntax for type jsonpath
@ + 1 | f | @ is not allowed in root expressions
00 | f | trailing junk after numeric literal at or near "00" of jsonpath input
1a | f | trailing junk after numeric literal at or near "1a" of jsonpath input
(5 rows)

View File

@ -187,3 +187,14 @@ select '1..e3'::jsonpath;
select '(1.).e'::jsonpath; select '(1.).e'::jsonpath;
select '(1.).e3'::jsonpath; select '(1.).e3'::jsonpath;
select '1?(2>3)'::jsonpath; select '1?(2>3)'::jsonpath;
-- test non-error-throwing API
SELECT str as jsonpath,
pg_input_is_valid(str,'jsonpath') as ok,
pg_input_error_message(str,'jsonpath') as errmsg
FROM unnest(ARRAY['$ ? (@ like_regex "pattern" flag "smixq")'::text,
'$ ? (@ like_regex "pattern" flag "a")',
'@ + 1',
'00',
'1a']) str;