Convert tsqueryin and tsvectorin to report errors softly.

This is slightly tedious because the adjustments cascade through
a couple of levels of subroutines, but it's not very hard.
I chose to avoid changing function signatures more than absolutely
necessary, by passing the escontext pointer in existing structs
where possible.

tsquery's nuisance NOTICEs about empty queries are suppressed in
soft-error mode, since they're not errors and we surely don't want
them to be shown to the user anyway.  Maybe that whole behavior
should be reconsidered.

Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2022-12-27 12:00:31 -05:00
parent eb8312a22a
commit 78212f2101
8 changed files with 196 additions and 52 deletions

View File

@ -594,7 +594,8 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
0); 0,
NULL);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }
@ -630,7 +631,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
P_TSQ_PLAIN); P_TSQ_PLAIN,
NULL);
PG_RETURN_POINTER(query); PG_RETURN_POINTER(query);
} }
@ -667,7 +669,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
P_TSQ_PLAIN); P_TSQ_PLAIN,
NULL);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }
@ -704,7 +707,8 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
P_TSQ_WEB); P_TSQ_WEB,
NULL);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }

View File

@ -16,6 +16,7 @@
#include "libpq/pqformat.h" #include "libpq/pqformat.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h" #include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
@ -58,10 +59,16 @@ typedef enum
/* /*
* get token from query string * get token from query string
* *
* *operator is filled in with OP_* when return values is PT_OPR, * All arguments except "state" are output arguments.
* but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
* *
* If return value is PT_OPR, then *operator is filled with an OP_* code
* and *weight will contain a distance value in case of phrase operator.
*
* If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
* are filled.
*
* If PT_ERR is returned then a soft error has occurred. If state->escontext
* isn't already filled then this should be reported as a generic parse error.
*/ */
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator, typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
int *lenval, char **strval, int *lenval, char **strval,
@ -93,6 +100,9 @@ struct TSQueryParserStateData
/* state for value's parser */ /* state for value's parser */
TSVectorParseState valstate; TSVectorParseState valstate;
/* context object for soft errors - must match valstate's escontext */
Node *escontext;
}; };
/* /*
@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
if (ptr == endptr) if (ptr == endptr)
return false; return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
ereport(ERROR, ereturn(pstate->escontext, false,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
MAXENTRYPOS))); MAXENTRYPOS)));
@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
} }
else if (t_iseq(state->buf, ':')) else if (t_iseq(state->buf, ':'))
{ {
ereport(ERROR, /* generic syntax error message is fine */
(errcode(ERRCODE_SYNTAX_ERROR), return PT_ERR;
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
} }
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
{ {
@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR; state->state = WAITOPERATOR;
return PT_VAL; return PT_VAL;
} }
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND) else if (state->state == WAITFIRSTOPERAND)
{ {
return PT_END; return PT_END;
} }
else else
ereport(ERROR, ereturn(state->escontext, PT_ERR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsquery: \"%s\"", errmsg("no operand in tsquery: \"%s\"",
state->buffer))); state->buffer)));
@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
*operator = OP_PHRASE; *operator = OP_PHRASE;
return PT_OPR; return PT_OPR;
} }
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* parse_phrase_operator reported a soft error */
return PT_ERR;
}
else if (t_iseq(state->buf, ')')) else if (t_iseq(state->buf, ')'))
{ {
state->buf++; state->buf++;
@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
state->state = WAITOPERATOR; state->state = WAITOPERATOR;
return PT_VAL; return PT_VAL;
} }
else if (SOFT_ERROR_OCCURRED(state->escontext))
{
/* gettoken_tsvector reported a soft error */
return PT_ERR;
}
else if (state->state == WAITFIRSTOPERAND) else if (state->state == WAITFIRSTOPERAND)
{ {
return PT_END; return PT_END;
@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
QueryOperand *tmp; QueryOperand *tmp;
if (distance >= MAXSTRPOS) if (distance >= MAXSTRPOS)
ereport(ERROR, ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("value is too big in tsquery: \"%s\"", errmsg("value is too big in tsquery: \"%s\"",
state->buffer))); state->buffer)));
if (lenval >= MAXSTRLEN) if (lenval >= MAXSTRLEN)
ereport(ERROR, ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("operand is too long in tsquery: \"%s\"", errmsg("operand is too long in tsquery: \"%s\"",
state->buffer))); state->buffer)));
@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
pg_crc32 valcrc; pg_crc32 valcrc;
if (lenval >= MAXSTRLEN) if (lenval >= MAXSTRLEN)
ereport(ERROR, ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long in tsquery: \"%s\"", errmsg("word is too long in tsquery: \"%s\"",
state->buffer))); state->buffer)));
@ -686,11 +709,17 @@ makepol(TSQueryParserState state,
return; return;
case PT_ERR: case PT_ERR:
default: default:
ereport(ERROR, /* don't overwrite a soft error saved by gettoken function */
(errcode(ERRCODE_SYNTAX_ERROR), if (!SOFT_ERROR_OCCURRED(state->escontext))
errmsg("syntax error in tsquery: \"%s\"", errsave(state->escontext,
state->buffer))); (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
return;
} }
/* detect soft error in pushval or recursion */
if (SOFT_ERROR_OCCURRED(state->escontext))
return;
} }
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
/* /*
* Parse the tsquery stored in "buf".
*
* Each value (operand) in the query is passed to pushval. pushval can * Each value (operand) in the query is passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using * transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue, * pushValue and pushOperator. It must push a single value with pushValue,
@ -778,12 +809,19 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
* *
* opaque is passed on to pushval as is, pushval can use it to store its * opaque is passed on to pushval as is, pushval can use it to store its
* private state. * private state.
*
* The pushval function can record soft errors via escontext.
* Callers must check SOFT_ERROR_OCCURRED to detect that.
*
* A bitmask of flags (see ts_utils.h) and an error context object
* can be provided as well. If a soft error occurs, NULL is returned.
*/ */
TSQuery TSQuery
parse_tsquery(char *buf, parse_tsquery(char *buf,
PushFunction pushval, PushFunction pushval,
Datum opaque, Datum opaque,
int flags) int flags,
Node *escontext)
{ {
struct TSQueryParserStateData state; struct TSQueryParserStateData state;
int i; int i;
@ -791,6 +829,7 @@ parse_tsquery(char *buf,
int commonlen; int commonlen;
QueryItem *ptr; QueryItem *ptr;
ListCell *cell; ListCell *cell;
bool noisy;
bool needcleanup; bool needcleanup;
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
@ -808,15 +847,19 @@ parse_tsquery(char *buf,
else else
state.gettoken = gettoken_query_standard; state.gettoken = gettoken_query_standard;
/* emit nuisance NOTICEs only if not doing soft errors */
noisy = !(escontext && IsA(escontext, ErrorSaveContext));
/* init state */ /* init state */
state.buffer = buf; state.buffer = buf;
state.buf = buf; state.buf = buf;
state.count = 0; state.count = 0;
state.state = WAITFIRSTOPERAND; state.state = WAITFIRSTOPERAND;
state.polstr = NIL; state.polstr = NIL;
state.escontext = escontext;
/* init value parser's state */ /* init value parser's state */
state.valstate = init_tsvector_parser(state.buffer, tsv_flags); state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
/* init list of operand */ /* init list of operand */
state.sumlen = 0; state.sumlen = 0;
@ -829,11 +872,15 @@ parse_tsquery(char *buf,
close_tsvector_parser(state.valstate); close_tsvector_parser(state.valstate);
if (SOFT_ERROR_OCCURRED(escontext))
return NULL;
if (state.polstr == NIL) if (state.polstr == NIL)
{ {
ereport(NOTICE, if (noisy)
(errmsg("text-search query doesn't contain lexemes: \"%s\"", ereport(NOTICE,
state.buffer))); (errmsg("text-search query doesn't contain lexemes: \"%s\"",
state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ); query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ); SET_VARSIZE(query, HDRSIZETQ);
query->size = 0; query->size = 0;
@ -841,7 +888,7 @@ parse_tsquery(char *buf,
} }
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen)) if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
ereport(ERROR, ereturn(escontext, NULL,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("tsquery is too large"))); errmsg("tsquery is too large")));
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
@ -889,7 +936,7 @@ parse_tsquery(char *buf,
* If there are QI_VALSTOP nodes, delete them and simplify the tree. * If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/ */
if (needcleanup) if (needcleanup)
query = cleanup_tsquery_stopwords(query); query = cleanup_tsquery_stopwords(query, noisy);
return query; return query;
} }
@ -908,8 +955,13 @@ Datum
tsqueryin(PG_FUNCTION_ARGS) tsqueryin(PG_FUNCTION_ARGS)
{ {
char *in = PG_GETARG_CSTRING(0); char *in = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0)); PG_RETURN_TSQUERY(parse_tsquery(in,
pushval_asis,
PointerGetDatum(NULL),
0,
escontext));
} }
/* /*

View File

@ -383,7 +383,7 @@ calcstrlen(NODE *node)
* Remove QI_VALSTOP (stopword) nodes from TSQuery. * Remove QI_VALSTOP (stopword) nodes from TSQuery.
*/ */
TSQuery TSQuery
cleanup_tsquery_stopwords(TSQuery in) cleanup_tsquery_stopwords(TSQuery in, bool noisy)
{ {
int32 len, int32 len,
lenstr, lenstr,
@ -403,8 +403,9 @@ cleanup_tsquery_stopwords(TSQuery in)
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd); root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
if (root == NULL) if (root == NULL)
{ {
ereport(NOTICE, if (noisy)
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored"))); ereport(NOTICE,
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
out = palloc(HDRSIZETQ); out = palloc(HDRSIZETQ);
out->size = 0; out->size = 0;
SET_VARSIZE(out, HDRSIZETQ); SET_VARSIZE(out, HDRSIZETQ);

View File

@ -15,6 +15,7 @@
#include "postgres.h" #include "postgres.h"
#include "libpq/pqformat.h" #include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h" #include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
#include "utils/builtins.h" #include "utils/builtins.h"
@ -178,6 +179,7 @@ Datum
tsvectorin(PG_FUNCTION_ARGS) tsvectorin(PG_FUNCTION_ARGS)
{ {
char *buf = PG_GETARG_CSTRING(0); char *buf = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
TSVectorParseState state; TSVectorParseState state;
WordEntryIN *arr; WordEntryIN *arr;
int totallen; int totallen;
@ -201,7 +203,7 @@ tsvectorin(PG_FUNCTION_ARGS)
char *cur; char *cur;
int buflen = 256; /* allocated size of tmpbuf */ int buflen = 256; /* allocated size of tmpbuf */
state = init_tsvector_parser(buf, 0); state = init_tsvector_parser(buf, 0, escontext);
arrlen = 64; arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@ -210,14 +212,14 @@ tsvectorin(PG_FUNCTION_ARGS)
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{ {
if (toklen >= MAXSTRLEN) if (toklen >= MAXSTRLEN)
ereport(ERROR, ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long (%ld bytes, max %ld bytes)", errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) toklen, (long) toklen,
(long) (MAXSTRLEN - 1)))); (long) (MAXSTRLEN - 1))));
if (cur - tmpbuf > MAXSTRPOS) if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR, ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)", errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
(long) (cur - tmpbuf), (long) MAXSTRPOS))); (long) (cur - tmpbuf), (long) MAXSTRPOS)));
@ -261,13 +263,17 @@ tsvectorin(PG_FUNCTION_ARGS)
close_tsvector_parser(state); close_tsvector_parser(state);
/* Did gettoken_tsvector fail? */
if (SOFT_ERROR_OCCURRED(escontext))
PG_RETURN_NULL();
if (len > 0) if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen); len = uniqueentry(arr, len, tmpbuf, &buflen);
else else
buflen = 0; buflen = 0;
if (buflen > MAXSTRPOS) if (buflen > MAXSTRPOS)
ereport(ERROR, ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS))); errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
@ -285,6 +291,7 @@ tsvectorin(PG_FUNCTION_ARGS)
stroff += arr[i].entry.len; stroff += arr[i].entry.len;
if (arr[i].entry.haspos) if (arr[i].entry.haspos)
{ {
/* This should be unreachable because of MAXNUMPOS restrictions */
if (arr[i].poslen > 0xFFFF) if (arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long"); elog(ERROR, "positions array too long");

View File

@ -20,9 +20,19 @@
/* /*
* Private state of tsvector parser. Note that tsquery also uses this code to * Private state of tsvector parser. Note that tsquery also uses this code to
* parse its input, hence the boolean flags. The two flags are both true or * parse its input, hence the boolean flags. The oprisdelim and is_tsquery
* both false in current usage, but we keep them separate for clarity. * flags are both true or both false in current usage, but we keep them
* separate for clarity.
*
* If oprisdelim is set, the following characters are treated as delimiters
* (in addition to whitespace): ! | & ( )
*
* is_tsquery affects *only* the content of error messages. * is_tsquery affects *only* the content of error messages.
*
* is_web can be true to further modify tsquery parsing.
*
* If escontext is an ErrorSaveContext node, then soft errors can be
* captured there rather than being thrown.
*/ */
struct TSVectorParseStateData struct TSVectorParseStateData
{ {
@ -34,16 +44,17 @@ struct TSVectorParseStateData
bool oprisdelim; /* treat ! | * ( ) as delimiters? */ bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */ bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
bool is_web; /* we're in websearch_to_tsquery() */ bool is_web; /* we're in websearch_to_tsquery() */
Node *escontext; /* for soft error reporting */
}; };
/* /*
* Initializes parser for the input string. If oprisdelim is set, the * Initializes a parser state object for the given input string.
* following characters are treated as delimiters in addition to whitespace: * A bitmask of flags (see ts_utils.h) and an error context object
* ! | & ( ) * can be provided as well.
*/ */
TSVectorParseState TSVectorParseState
init_tsvector_parser(char *input, int flags) init_tsvector_parser(char *input, int flags, Node *escontext)
{ {
TSVectorParseState state; TSVectorParseState state;
@ -56,12 +67,15 @@ init_tsvector_parser(char *input, int flags)
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0; state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0; state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
state->is_web = (flags & P_TSV_IS_WEB) != 0; state->is_web = (flags & P_TSV_IS_WEB) != 0;
state->escontext = escontext;
return state; return state;
} }
/* /*
* Reinitializes parser to parse 'input', instead of previous input. * Reinitializes parser to parse 'input', instead of previous input.
*
* Note that bufstart (the string reported in errors) is not changed.
*/ */
void void
reset_tsvector_parser(TSVectorParseState state, char *input) reset_tsvector_parser(TSVectorParseState state, char *input)
@ -122,23 +136,26 @@ do { \
#define WAITPOSDELIM 7 #define WAITPOSDELIM 7
#define WAITCHARCMPLX 8 #define WAITCHARCMPLX 8
#define PRSSYNTAXERROR prssyntaxerror(state) #define PRSSYNTAXERROR return prssyntaxerror(state)
static void static bool
prssyntaxerror(TSVectorParseState state) prssyntaxerror(TSVectorParseState state)
{ {
ereport(ERROR, errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
state->is_tsquery ? state->is_tsquery ?
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) : errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
errmsg("syntax error in tsvector: \"%s\"", state->bufstart))); errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
/* In soft error situation, return false as convenience for caller */
return false;
} }
/* /*
* Get next token from string being parsed. Returns true if successful, * Get next token from string being parsed. Returns true if successful,
* false if end of input string is reached. On success, these output * false if end of input string is reached or soft error.
* parameters are filled in: *
* On success, these output parameters are filled in:
* *
* *strval pointer to token * *strval pointer to token
* *lenval length of *strval * *lenval length of *strval
@ -149,7 +166,11 @@ prssyntaxerror(TSVectorParseState state)
* *poslen number of elements in *pos_ptr * *poslen number of elements in *pos_ptr
* *endptr scan resumption point * *endptr scan resumption point
* *
* Pass NULL for unwanted output parameters. * Pass NULL for any unwanted output parameters.
*
* If state->escontext is an ErrorSaveContext, then caller must check
* SOFT_ERROR_OCCURRED() to determine whether a "false" result means
* error or normal end-of-string.
*/ */
bool bool
gettoken_tsvector(TSVectorParseState state, gettoken_tsvector(TSVectorParseState state,
@ -195,7 +216,7 @@ gettoken_tsvector(TSVectorParseState state,
else if (statecode == WAITNEXTCHAR) else if (statecode == WAITNEXTCHAR)
{ {
if (*(state->prsbuf) == '\0') if (*(state->prsbuf) == '\0')
ereport(ERROR, ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character: \"%s\"", errmsg("there is no escaped character: \"%s\"",
state->bufstart))); state->bufstart)));
@ -313,7 +334,7 @@ gettoken_tsvector(TSVectorParseState state,
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
/* we cannot get here in tsquery, so no need for 2 errmsgs */ /* we cannot get here in tsquery, so no need for 2 errmsgs */
if (WEP_GETPOS(pos[npos - 1]) == 0) if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR, ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector: \"%s\"", errmsg("wrong position info in tsvector: \"%s\"",
state->bufstart))); state->bufstart)));

View File

@ -25,11 +25,13 @@
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState; typedef struct TSVectorParseStateData *TSVectorParseState;
/* flag bits that can be passed to init_tsvector_parser: */
#define P_TSV_OPR_IS_DELIM (1 << 0) #define P_TSV_OPR_IS_DELIM (1 << 0)
#define P_TSV_IS_TSQUERY (1 << 1) #define P_TSV_IS_TSQUERY (1 << 1)
#define P_TSV_IS_WEB (1 << 2) #define P_TSV_IS_WEB (1 << 2)
extern TSVectorParseState init_tsvector_parser(char *input, int flags); extern TSVectorParseState init_tsvector_parser(char *input, int flags,
Node *escontext);
extern void reset_tsvector_parser(TSVectorParseState state, char *input); extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state, extern bool gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval, char **strval, int *lenval,
@ -58,13 +60,15 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
* QueryOperand struct */ * QueryOperand struct */
bool prefix); bool prefix);
/* flag bits that can be passed to parse_tsquery: */
#define P_TSQ_PLAIN (1 << 0) #define P_TSQ_PLAIN (1 << 0)
#define P_TSQ_WEB (1 << 1) #define P_TSQ_WEB (1 << 1)
extern TSQuery parse_tsquery(char *buf, extern TSQuery parse_tsquery(char *buf,
PushFunction pushval, PushFunction pushval,
Datum opaque, Datum opaque,
int flags); int flags,
Node *escontext);
/* Functions for use by PushFunction implementations */ /* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state, extern void pushValue(TSQueryParserState state,
@ -222,7 +226,7 @@ extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
* TSQuery Utilities * TSQuery Utilities
*/ */
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
extern TSQuery cleanup_tsquery_stopwords(TSQuery in); extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
typedef struct QTNode typedef struct QTNode
{ {

View File

@ -89,6 +89,25 @@ SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
ERROR: syntax error in tsvector: "'' '1' '2'" ERROR: syntax error in tsvector: "'' '1' '2'"
LINE 1: SELECT $$'' '1' '2'$$::tsvector; LINE 1: SELECT $$'' '1' '2'$$::tsvector;
^ ^
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsvector');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid($$''$$, 'tsvector');
pg_input_is_valid
-------------------
f
(1 row)
SELECT pg_input_error_message($$''$$, 'tsvector');
pg_input_error_message
--------------------------------
syntax error in tsvector: "''"
(1 row)
--Base tsquery test --Base tsquery test
SELECT '1'::tsquery; SELECT '1'::tsquery;
tsquery tsquery
@ -372,6 +391,31 @@ SELECT '!!a & !!b'::tsquery;
!!'a' & !!'b' !!'a' & !!'b'
(1 row) (1 row)
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsquery');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid('foo!', 'tsquery');
pg_input_is_valid
-------------------
f
(1 row)
SELECT pg_input_error_message('foo!', 'tsquery');
pg_input_error_message
---------------------------------
syntax error in tsquery: "foo!"
(1 row)
SELECT pg_input_error_message('a <100000> b', 'tsquery');
pg_input_error_message
---------------------------------------------------------------------------------------
distance in phrase operator must be an integer value between zero and 16384 inclusive
(1 row)
--comparisons --comparisons
SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' < 'b & c'::tsquery as "true";
true true

View File

@ -19,6 +19,11 @@ SELECT '''w'':4A,3B,2C,1D,5 a:8';
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsvector');
SELECT pg_input_is_valid($$''$$, 'tsvector');
SELECT pg_input_error_message($$''$$, 'tsvector');
--Base tsquery test --Base tsquery test
SELECT '1'::tsquery; SELECT '1'::tsquery;
SELECT '1 '::tsquery; SELECT '1 '::tsquery;
@ -68,6 +73,12 @@ SELECT 'a & !!b'::tsquery;
SELECT '!!a & b'::tsquery; SELECT '!!a & b'::tsquery;
SELECT '!!a & !!b'::tsquery; SELECT '!!a & !!b'::tsquery;
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsquery');
SELECT pg_input_is_valid('foo!', 'tsquery');
SELECT pg_input_error_message('foo!', 'tsquery');
SELECT pg_input_error_message('a <100000> b', 'tsquery');
--comparisons --comparisons
SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false"; SELECT 'a' > 'b & c'::tsquery as "false";