mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Convert tsqueryin and tsvectorin to report errors softly.
This is slightly tedious because the adjustments cascade through a couple of levels of subroutines, but it's not very hard. I chose to avoid changing function signatures more than absolutely necessary, by passing the escontext pointer in existing structs where possible. tsquery's nuisance NOTICEs about empty queries are suppressed in soft-error mode, since they're not errors and we surely don't want them to be shown to the user anyway. Maybe that whole behavior should be reconsidered. Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
This commit is contained in:
parent
eb8312a22a
commit
78212f2101
@ -594,7 +594,8 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
|
|||||||
query = parse_tsquery(text_to_cstring(in),
|
query = parse_tsquery(text_to_cstring(in),
|
||||||
pushval_morph,
|
pushval_morph,
|
||||||
PointerGetDatum(&data),
|
PointerGetDatum(&data),
|
||||||
0);
|
0,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PG_RETURN_TSQUERY(query);
|
PG_RETURN_TSQUERY(query);
|
||||||
}
|
}
|
||||||
@ -630,7 +631,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
|
|||||||
query = parse_tsquery(text_to_cstring(in),
|
query = parse_tsquery(text_to_cstring(in),
|
||||||
pushval_morph,
|
pushval_morph,
|
||||||
PointerGetDatum(&data),
|
PointerGetDatum(&data),
|
||||||
P_TSQ_PLAIN);
|
P_TSQ_PLAIN,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PG_RETURN_POINTER(query);
|
PG_RETURN_POINTER(query);
|
||||||
}
|
}
|
||||||
@ -667,7 +669,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
|
|||||||
query = parse_tsquery(text_to_cstring(in),
|
query = parse_tsquery(text_to_cstring(in),
|
||||||
pushval_morph,
|
pushval_morph,
|
||||||
PointerGetDatum(&data),
|
PointerGetDatum(&data),
|
||||||
P_TSQ_PLAIN);
|
P_TSQ_PLAIN,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PG_RETURN_TSQUERY(query);
|
PG_RETURN_TSQUERY(query);
|
||||||
}
|
}
|
||||||
@ -704,7 +707,8 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
|
|||||||
query = parse_tsquery(text_to_cstring(in),
|
query = parse_tsquery(text_to_cstring(in),
|
||||||
pushval_morph,
|
pushval_morph,
|
||||||
PointerGetDatum(&data),
|
PointerGetDatum(&data),
|
||||||
P_TSQ_WEB);
|
P_TSQ_WEB,
|
||||||
|
NULL);
|
||||||
|
|
||||||
PG_RETURN_TSQUERY(query);
|
PG_RETURN_TSQUERY(query);
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
#include "nodes/miscnodes.h"
|
||||||
#include "tsearch/ts_locale.h"
|
#include "tsearch/ts_locale.h"
|
||||||
#include "tsearch/ts_type.h"
|
#include "tsearch/ts_type.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
@ -58,10 +59,16 @@ typedef enum
|
|||||||
/*
|
/*
|
||||||
* get token from query string
|
* get token from query string
|
||||||
*
|
*
|
||||||
* *operator is filled in with OP_* when return values is PT_OPR,
|
* All arguments except "state" are output arguments.
|
||||||
* but *weight could contain a distance value in case of phrase operator.
|
|
||||||
* *strval, *lenval and *weight are filled in when return value is PT_VAL
|
|
||||||
*
|
*
|
||||||
|
* If return value is PT_OPR, then *operator is filled with an OP_* code
|
||||||
|
* and *weight will contain a distance value in case of phrase operator.
|
||||||
|
*
|
||||||
|
* If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
|
||||||
|
* are filled.
|
||||||
|
*
|
||||||
|
* If PT_ERR is returned then a soft error has occurred. If state->escontext
|
||||||
|
* isn't already filled then this should be reported as a generic parse error.
|
||||||
*/
|
*/
|
||||||
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
|
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
|
||||||
int *lenval, char **strval,
|
int *lenval, char **strval,
|
||||||
@ -93,6 +100,9 @@ struct TSQueryParserStateData
|
|||||||
|
|
||||||
/* state for value's parser */
|
/* state for value's parser */
|
||||||
TSVectorParseState valstate;
|
TSVectorParseState valstate;
|
||||||
|
|
||||||
|
/* context object for soft errors - must match valstate's escontext */
|
||||||
|
Node *escontext;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
|
|||||||
if (ptr == endptr)
|
if (ptr == endptr)
|
||||||
return false;
|
return false;
|
||||||
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
|
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
|
||||||
ereport(ERROR,
|
ereturn(pstate->escontext, false,
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
|
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
|
||||||
MAXENTRYPOS)));
|
MAXENTRYPOS)));
|
||||||
@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
|||||||
}
|
}
|
||||||
else if (t_iseq(state->buf, ':'))
|
else if (t_iseq(state->buf, ':'))
|
||||||
{
|
{
|
||||||
ereport(ERROR,
|
/* generic syntax error message is fine */
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
return PT_ERR;
|
||||||
errmsg("syntax error in tsquery: \"%s\"",
|
|
||||||
state->buffer)));
|
|
||||||
}
|
}
|
||||||
else if (!t_isspace(state->buf))
|
else if (!t_isspace(state->buf))
|
||||||
{
|
{
|
||||||
@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
|||||||
state->state = WAITOPERATOR;
|
state->state = WAITOPERATOR;
|
||||||
return PT_VAL;
|
return PT_VAL;
|
||||||
}
|
}
|
||||||
|
else if (SOFT_ERROR_OCCURRED(state->escontext))
|
||||||
|
{
|
||||||
|
/* gettoken_tsvector reported a soft error */
|
||||||
|
return PT_ERR;
|
||||||
|
}
|
||||||
else if (state->state == WAITFIRSTOPERAND)
|
else if (state->state == WAITFIRSTOPERAND)
|
||||||
{
|
{
|
||||||
return PT_END;
|
return PT_END;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ereport(ERROR,
|
ereturn(state->escontext, PT_ERR,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("no operand in tsquery: \"%s\"",
|
errmsg("no operand in tsquery: \"%s\"",
|
||||||
state->buffer)));
|
state->buffer)));
|
||||||
@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
|||||||
*operator = OP_PHRASE;
|
*operator = OP_PHRASE;
|
||||||
return PT_OPR;
|
return PT_OPR;
|
||||||
}
|
}
|
||||||
|
else if (SOFT_ERROR_OCCURRED(state->escontext))
|
||||||
|
{
|
||||||
|
/* parse_phrase_operator reported a soft error */
|
||||||
|
return PT_ERR;
|
||||||
|
}
|
||||||
else if (t_iseq(state->buf, ')'))
|
else if (t_iseq(state->buf, ')'))
|
||||||
{
|
{
|
||||||
state->buf++;
|
state->buf++;
|
||||||
@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
|||||||
state->state = WAITOPERATOR;
|
state->state = WAITOPERATOR;
|
||||||
return PT_VAL;
|
return PT_VAL;
|
||||||
}
|
}
|
||||||
|
else if (SOFT_ERROR_OCCURRED(state->escontext))
|
||||||
|
{
|
||||||
|
/* gettoken_tsvector reported a soft error */
|
||||||
|
return PT_ERR;
|
||||||
|
}
|
||||||
else if (state->state == WAITFIRSTOPERAND)
|
else if (state->state == WAITFIRSTOPERAND)
|
||||||
{
|
{
|
||||||
return PT_END;
|
return PT_END;
|
||||||
@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
|||||||
QueryOperand *tmp;
|
QueryOperand *tmp;
|
||||||
|
|
||||||
if (distance >= MAXSTRPOS)
|
if (distance >= MAXSTRPOS)
|
||||||
ereport(ERROR,
|
ereturn(state->escontext,,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("value is too big in tsquery: \"%s\"",
|
errmsg("value is too big in tsquery: \"%s\"",
|
||||||
state->buffer)));
|
state->buffer)));
|
||||||
if (lenval >= MAXSTRLEN)
|
if (lenval >= MAXSTRLEN)
|
||||||
ereport(ERROR,
|
ereturn(state->escontext,,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("operand is too long in tsquery: \"%s\"",
|
errmsg("operand is too long in tsquery: \"%s\"",
|
||||||
state->buffer)));
|
state->buffer)));
|
||||||
@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
|
|||||||
pg_crc32 valcrc;
|
pg_crc32 valcrc;
|
||||||
|
|
||||||
if (lenval >= MAXSTRLEN)
|
if (lenval >= MAXSTRLEN)
|
||||||
ereport(ERROR,
|
ereturn(state->escontext,,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("word is too long in tsquery: \"%s\"",
|
errmsg("word is too long in tsquery: \"%s\"",
|
||||||
state->buffer)));
|
state->buffer)));
|
||||||
@ -686,11 +709,17 @@ makepol(TSQueryParserState state,
|
|||||||
return;
|
return;
|
||||||
case PT_ERR:
|
case PT_ERR:
|
||||||
default:
|
default:
|
||||||
ereport(ERROR,
|
/* don't overwrite a soft error saved by gettoken function */
|
||||||
|
if (!SOFT_ERROR_OCCURRED(state->escontext))
|
||||||
|
errsave(state->escontext,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("syntax error in tsquery: \"%s\"",
|
errmsg("syntax error in tsquery: \"%s\"",
|
||||||
state->buffer)));
|
state->buffer)));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
/* detect soft error in pushval or recursion */
|
||||||
|
if (SOFT_ERROR_OCCURRED(state->escontext))
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
|
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
|
||||||
@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* Parse the tsquery stored in "buf".
|
||||||
|
*
|
||||||
* Each value (operand) in the query is passed to pushval. pushval can
|
* Each value (operand) in the query is passed to pushval. pushval can
|
||||||
* transform the simple value to an arbitrarily complex expression using
|
* transform the simple value to an arbitrarily complex expression using
|
||||||
* pushValue and pushOperator. It must push a single value with pushValue,
|
* pushValue and pushOperator. It must push a single value with pushValue,
|
||||||
@ -778,12 +809,19 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
|
|||||||
*
|
*
|
||||||
* opaque is passed on to pushval as is, pushval can use it to store its
|
* opaque is passed on to pushval as is, pushval can use it to store its
|
||||||
* private state.
|
* private state.
|
||||||
|
*
|
||||||
|
* The pushval function can record soft errors via escontext.
|
||||||
|
* Callers must check SOFT_ERROR_OCCURRED to detect that.
|
||||||
|
*
|
||||||
|
* A bitmask of flags (see ts_utils.h) and an error context object
|
||||||
|
* can be provided as well. If a soft error occurs, NULL is returned.
|
||||||
*/
|
*/
|
||||||
TSQuery
|
TSQuery
|
||||||
parse_tsquery(char *buf,
|
parse_tsquery(char *buf,
|
||||||
PushFunction pushval,
|
PushFunction pushval,
|
||||||
Datum opaque,
|
Datum opaque,
|
||||||
int flags)
|
int flags,
|
||||||
|
Node *escontext)
|
||||||
{
|
{
|
||||||
struct TSQueryParserStateData state;
|
struct TSQueryParserStateData state;
|
||||||
int i;
|
int i;
|
||||||
@ -791,6 +829,7 @@ parse_tsquery(char *buf,
|
|||||||
int commonlen;
|
int commonlen;
|
||||||
QueryItem *ptr;
|
QueryItem *ptr;
|
||||||
ListCell *cell;
|
ListCell *cell;
|
||||||
|
bool noisy;
|
||||||
bool needcleanup;
|
bool needcleanup;
|
||||||
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
|
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
|
||||||
|
|
||||||
@ -808,15 +847,19 @@ parse_tsquery(char *buf,
|
|||||||
else
|
else
|
||||||
state.gettoken = gettoken_query_standard;
|
state.gettoken = gettoken_query_standard;
|
||||||
|
|
||||||
|
/* emit nuisance NOTICEs only if not doing soft errors */
|
||||||
|
noisy = !(escontext && IsA(escontext, ErrorSaveContext));
|
||||||
|
|
||||||
/* init state */
|
/* init state */
|
||||||
state.buffer = buf;
|
state.buffer = buf;
|
||||||
state.buf = buf;
|
state.buf = buf;
|
||||||
state.count = 0;
|
state.count = 0;
|
||||||
state.state = WAITFIRSTOPERAND;
|
state.state = WAITFIRSTOPERAND;
|
||||||
state.polstr = NIL;
|
state.polstr = NIL;
|
||||||
|
state.escontext = escontext;
|
||||||
|
|
||||||
/* init value parser's state */
|
/* init value parser's state */
|
||||||
state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
|
state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
|
||||||
|
|
||||||
/* init list of operand */
|
/* init list of operand */
|
||||||
state.sumlen = 0;
|
state.sumlen = 0;
|
||||||
@ -829,8 +872,12 @@ parse_tsquery(char *buf,
|
|||||||
|
|
||||||
close_tsvector_parser(state.valstate);
|
close_tsvector_parser(state.valstate);
|
||||||
|
|
||||||
|
if (SOFT_ERROR_OCCURRED(escontext))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (state.polstr == NIL)
|
if (state.polstr == NIL)
|
||||||
{
|
{
|
||||||
|
if (noisy)
|
||||||
ereport(NOTICE,
|
ereport(NOTICE,
|
||||||
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
|
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
|
||||||
state.buffer)));
|
state.buffer)));
|
||||||
@ -841,7 +888,7 @@ parse_tsquery(char *buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
|
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
|
||||||
ereport(ERROR,
|
ereturn(escontext, NULL,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("tsquery is too large")));
|
errmsg("tsquery is too large")));
|
||||||
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
|
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
|
||||||
@ -889,7 +936,7 @@ parse_tsquery(char *buf,
|
|||||||
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
|
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
|
||||||
*/
|
*/
|
||||||
if (needcleanup)
|
if (needcleanup)
|
||||||
query = cleanup_tsquery_stopwords(query);
|
query = cleanup_tsquery_stopwords(query, noisy);
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
@ -908,8 +955,13 @@ Datum
|
|||||||
tsqueryin(PG_FUNCTION_ARGS)
|
tsqueryin(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
char *in = PG_GETARG_CSTRING(0);
|
char *in = PG_GETARG_CSTRING(0);
|
||||||
|
Node *escontext = fcinfo->context;
|
||||||
|
|
||||||
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
|
PG_RETURN_TSQUERY(parse_tsquery(in,
|
||||||
|
pushval_asis,
|
||||||
|
PointerGetDatum(NULL),
|
||||||
|
0,
|
||||||
|
escontext));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -383,7 +383,7 @@ calcstrlen(NODE *node)
|
|||||||
* Remove QI_VALSTOP (stopword) nodes from TSQuery.
|
* Remove QI_VALSTOP (stopword) nodes from TSQuery.
|
||||||
*/
|
*/
|
||||||
TSQuery
|
TSQuery
|
||||||
cleanup_tsquery_stopwords(TSQuery in)
|
cleanup_tsquery_stopwords(TSQuery in, bool noisy)
|
||||||
{
|
{
|
||||||
int32 len,
|
int32 len,
|
||||||
lenstr,
|
lenstr,
|
||||||
@ -403,6 +403,7 @@ cleanup_tsquery_stopwords(TSQuery in)
|
|||||||
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
|
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
|
||||||
if (root == NULL)
|
if (root == NULL)
|
||||||
{
|
{
|
||||||
|
if (noisy)
|
||||||
ereport(NOTICE,
|
ereport(NOTICE,
|
||||||
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
|
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
|
||||||
out = palloc(HDRSIZETQ);
|
out = palloc(HDRSIZETQ);
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
|
#include "nodes/miscnodes.h"
|
||||||
#include "tsearch/ts_locale.h"
|
#include "tsearch/ts_locale.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
@ -178,6 +179,7 @@ Datum
|
|||||||
tsvectorin(PG_FUNCTION_ARGS)
|
tsvectorin(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
char *buf = PG_GETARG_CSTRING(0);
|
char *buf = PG_GETARG_CSTRING(0);
|
||||||
|
Node *escontext = fcinfo->context;
|
||||||
TSVectorParseState state;
|
TSVectorParseState state;
|
||||||
WordEntryIN *arr;
|
WordEntryIN *arr;
|
||||||
int totallen;
|
int totallen;
|
||||||
@ -201,7 +203,7 @@ tsvectorin(PG_FUNCTION_ARGS)
|
|||||||
char *cur;
|
char *cur;
|
||||||
int buflen = 256; /* allocated size of tmpbuf */
|
int buflen = 256; /* allocated size of tmpbuf */
|
||||||
|
|
||||||
state = init_tsvector_parser(buf, 0);
|
state = init_tsvector_parser(buf, 0, escontext);
|
||||||
|
|
||||||
arrlen = 64;
|
arrlen = 64;
|
||||||
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
|
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
|
||||||
@ -210,14 +212,14 @@ tsvectorin(PG_FUNCTION_ARGS)
|
|||||||
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
|
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
|
||||||
{
|
{
|
||||||
if (toklen >= MAXSTRLEN)
|
if (toklen >= MAXSTRLEN)
|
||||||
ereport(ERROR,
|
ereturn(escontext, (Datum) 0,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("word is too long (%ld bytes, max %ld bytes)",
|
errmsg("word is too long (%ld bytes, max %ld bytes)",
|
||||||
(long) toklen,
|
(long) toklen,
|
||||||
(long) (MAXSTRLEN - 1))));
|
(long) (MAXSTRLEN - 1))));
|
||||||
|
|
||||||
if (cur - tmpbuf > MAXSTRPOS)
|
if (cur - tmpbuf > MAXSTRPOS)
|
||||||
ereport(ERROR,
|
ereturn(escontext, (Datum) 0,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
|
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
|
||||||
(long) (cur - tmpbuf), (long) MAXSTRPOS)));
|
(long) (cur - tmpbuf), (long) MAXSTRPOS)));
|
||||||
@ -261,13 +263,17 @@ tsvectorin(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
close_tsvector_parser(state);
|
close_tsvector_parser(state);
|
||||||
|
|
||||||
|
/* Did gettoken_tsvector fail? */
|
||||||
|
if (SOFT_ERROR_OCCURRED(escontext))
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
||||||
else
|
else
|
||||||
buflen = 0;
|
buflen = 0;
|
||||||
|
|
||||||
if (buflen > MAXSTRPOS)
|
if (buflen > MAXSTRPOS)
|
||||||
ereport(ERROR,
|
ereturn(escontext, (Datum) 0,
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
|
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
|
||||||
|
|
||||||
@ -285,6 +291,7 @@ tsvectorin(PG_FUNCTION_ARGS)
|
|||||||
stroff += arr[i].entry.len;
|
stroff += arr[i].entry.len;
|
||||||
if (arr[i].entry.haspos)
|
if (arr[i].entry.haspos)
|
||||||
{
|
{
|
||||||
|
/* This should be unreachable because of MAXNUMPOS restrictions */
|
||||||
if (arr[i].poslen > 0xFFFF)
|
if (arr[i].poslen > 0xFFFF)
|
||||||
elog(ERROR, "positions array too long");
|
elog(ERROR, "positions array too long");
|
||||||
|
|
||||||
|
@ -20,9 +20,19 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Private state of tsvector parser. Note that tsquery also uses this code to
|
* Private state of tsvector parser. Note that tsquery also uses this code to
|
||||||
* parse its input, hence the boolean flags. The two flags are both true or
|
* parse its input, hence the boolean flags. The oprisdelim and is_tsquery
|
||||||
* both false in current usage, but we keep them separate for clarity.
|
* flags are both true or both false in current usage, but we keep them
|
||||||
|
* separate for clarity.
|
||||||
|
*
|
||||||
|
* If oprisdelim is set, the following characters are treated as delimiters
|
||||||
|
* (in addition to whitespace): ! | & ( )
|
||||||
|
*
|
||||||
* is_tsquery affects *only* the content of error messages.
|
* is_tsquery affects *only* the content of error messages.
|
||||||
|
*
|
||||||
|
* is_web can be true to further modify tsquery parsing.
|
||||||
|
*
|
||||||
|
* If escontext is an ErrorSaveContext node, then soft errors can be
|
||||||
|
* captured there rather than being thrown.
|
||||||
*/
|
*/
|
||||||
struct TSVectorParseStateData
|
struct TSVectorParseStateData
|
||||||
{
|
{
|
||||||
@ -34,16 +44,17 @@ struct TSVectorParseStateData
|
|||||||
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
|
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
|
||||||
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
|
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
|
||||||
bool is_web; /* we're in websearch_to_tsquery() */
|
bool is_web; /* we're in websearch_to_tsquery() */
|
||||||
|
Node *escontext; /* for soft error reporting */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initializes parser for the input string. If oprisdelim is set, the
|
* Initializes a parser state object for the given input string.
|
||||||
* following characters are treated as delimiters in addition to whitespace:
|
* A bitmask of flags (see ts_utils.h) and an error context object
|
||||||
* ! | & ( )
|
* can be provided as well.
|
||||||
*/
|
*/
|
||||||
TSVectorParseState
|
TSVectorParseState
|
||||||
init_tsvector_parser(char *input, int flags)
|
init_tsvector_parser(char *input, int flags, Node *escontext)
|
||||||
{
|
{
|
||||||
TSVectorParseState state;
|
TSVectorParseState state;
|
||||||
|
|
||||||
@ -56,12 +67,15 @@ init_tsvector_parser(char *input, int flags)
|
|||||||
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
|
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
|
||||||
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
|
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
|
||||||
state->is_web = (flags & P_TSV_IS_WEB) != 0;
|
state->is_web = (flags & P_TSV_IS_WEB) != 0;
|
||||||
|
state->escontext = escontext;
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reinitializes parser to parse 'input', instead of previous input.
|
* Reinitializes parser to parse 'input', instead of previous input.
|
||||||
|
*
|
||||||
|
* Note that bufstart (the string reported in errors) is not changed.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
reset_tsvector_parser(TSVectorParseState state, char *input)
|
reset_tsvector_parser(TSVectorParseState state, char *input)
|
||||||
@ -122,23 +136,26 @@ do { \
|
|||||||
#define WAITPOSDELIM 7
|
#define WAITPOSDELIM 7
|
||||||
#define WAITCHARCMPLX 8
|
#define WAITCHARCMPLX 8
|
||||||
|
|
||||||
#define PRSSYNTAXERROR prssyntaxerror(state)
|
#define PRSSYNTAXERROR return prssyntaxerror(state)
|
||||||
|
|
||||||
static void
|
static bool
|
||||||
prssyntaxerror(TSVectorParseState state)
|
prssyntaxerror(TSVectorParseState state)
|
||||||
{
|
{
|
||||||
ereport(ERROR,
|
errsave(state->escontext,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
state->is_tsquery ?
|
state->is_tsquery ?
|
||||||
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
|
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
|
||||||
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
|
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
|
||||||
|
/* In soft error situation, return false as convenience for caller */
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get next token from string being parsed. Returns true if successful,
|
* Get next token from string being parsed. Returns true if successful,
|
||||||
* false if end of input string is reached. On success, these output
|
* false if end of input string is reached or soft error.
|
||||||
* parameters are filled in:
|
*
|
||||||
|
* On success, these output parameters are filled in:
|
||||||
*
|
*
|
||||||
* *strval pointer to token
|
* *strval pointer to token
|
||||||
* *lenval length of *strval
|
* *lenval length of *strval
|
||||||
@ -149,7 +166,11 @@ prssyntaxerror(TSVectorParseState state)
|
|||||||
* *poslen number of elements in *pos_ptr
|
* *poslen number of elements in *pos_ptr
|
||||||
* *endptr scan resumption point
|
* *endptr scan resumption point
|
||||||
*
|
*
|
||||||
* Pass NULL for unwanted output parameters.
|
* Pass NULL for any unwanted output parameters.
|
||||||
|
*
|
||||||
|
* If state->escontext is an ErrorSaveContext, then caller must check
|
||||||
|
* SOFT_ERROR_OCCURRED() to determine whether a "false" result means
|
||||||
|
* error or normal end-of-string.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
gettoken_tsvector(TSVectorParseState state,
|
gettoken_tsvector(TSVectorParseState state,
|
||||||
@ -195,7 +216,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||||||
else if (statecode == WAITNEXTCHAR)
|
else if (statecode == WAITNEXTCHAR)
|
||||||
{
|
{
|
||||||
if (*(state->prsbuf) == '\0')
|
if (*(state->prsbuf) == '\0')
|
||||||
ereport(ERROR,
|
ereturn(state->escontext, false,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("there is no escaped character: \"%s\"",
|
errmsg("there is no escaped character: \"%s\"",
|
||||||
state->bufstart)));
|
state->bufstart)));
|
||||||
@ -313,7 +334,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||||||
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
|
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
|
||||||
/* we cannot get here in tsquery, so no need for 2 errmsgs */
|
/* we cannot get here in tsquery, so no need for 2 errmsgs */
|
||||||
if (WEP_GETPOS(pos[npos - 1]) == 0)
|
if (WEP_GETPOS(pos[npos - 1]) == 0)
|
||||||
ereport(ERROR,
|
ereturn(state->escontext, false,
|
||||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("wrong position info in tsvector: \"%s\"",
|
errmsg("wrong position info in tsvector: \"%s\"",
|
||||||
state->bufstart)));
|
state->bufstart)));
|
||||||
|
@ -25,11 +25,13 @@
|
|||||||
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
|
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
|
||||||
typedef struct TSVectorParseStateData *TSVectorParseState;
|
typedef struct TSVectorParseStateData *TSVectorParseState;
|
||||||
|
|
||||||
|
/* flag bits that can be passed to init_tsvector_parser: */
|
||||||
#define P_TSV_OPR_IS_DELIM (1 << 0)
|
#define P_TSV_OPR_IS_DELIM (1 << 0)
|
||||||
#define P_TSV_IS_TSQUERY (1 << 1)
|
#define P_TSV_IS_TSQUERY (1 << 1)
|
||||||
#define P_TSV_IS_WEB (1 << 2)
|
#define P_TSV_IS_WEB (1 << 2)
|
||||||
|
|
||||||
extern TSVectorParseState init_tsvector_parser(char *input, int flags);
|
extern TSVectorParseState init_tsvector_parser(char *input, int flags,
|
||||||
|
Node *escontext);
|
||||||
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
|
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
|
||||||
extern bool gettoken_tsvector(TSVectorParseState state,
|
extern bool gettoken_tsvector(TSVectorParseState state,
|
||||||
char **strval, int *lenval,
|
char **strval, int *lenval,
|
||||||
@ -58,13 +60,15 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
|
|||||||
* QueryOperand struct */
|
* QueryOperand struct */
|
||||||
bool prefix);
|
bool prefix);
|
||||||
|
|
||||||
|
/* flag bits that can be passed to parse_tsquery: */
|
||||||
#define P_TSQ_PLAIN (1 << 0)
|
#define P_TSQ_PLAIN (1 << 0)
|
||||||
#define P_TSQ_WEB (1 << 1)
|
#define P_TSQ_WEB (1 << 1)
|
||||||
|
|
||||||
extern TSQuery parse_tsquery(char *buf,
|
extern TSQuery parse_tsquery(char *buf,
|
||||||
PushFunction pushval,
|
PushFunction pushval,
|
||||||
Datum opaque,
|
Datum opaque,
|
||||||
int flags);
|
int flags,
|
||||||
|
Node *escontext);
|
||||||
|
|
||||||
/* Functions for use by PushFunction implementations */
|
/* Functions for use by PushFunction implementations */
|
||||||
extern void pushValue(TSQueryParserState state,
|
extern void pushValue(TSQueryParserState state,
|
||||||
@ -222,7 +226,7 @@ extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
|
|||||||
* TSQuery Utilities
|
* TSQuery Utilities
|
||||||
*/
|
*/
|
||||||
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
|
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
|
||||||
extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
|
extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
|
||||||
|
|
||||||
typedef struct QTNode
|
typedef struct QTNode
|
||||||
{
|
{
|
||||||
|
@ -89,6 +89,25 @@ SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
|
|||||||
ERROR: syntax error in tsvector: "'' '1' '2'"
|
ERROR: syntax error in tsvector: "'' '1' '2'"
|
||||||
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
|
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
|
||||||
^
|
^
|
||||||
|
-- Also try it with non-error-throwing API
|
||||||
|
SELECT pg_input_is_valid('foo', 'tsvector');
|
||||||
|
pg_input_is_valid
|
||||||
|
-------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_input_is_valid($$''$$, 'tsvector');
|
||||||
|
pg_input_is_valid
|
||||||
|
-------------------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_input_error_message($$''$$, 'tsvector');
|
||||||
|
pg_input_error_message
|
||||||
|
--------------------------------
|
||||||
|
syntax error in tsvector: "''"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
--Base tsquery test
|
--Base tsquery test
|
||||||
SELECT '1'::tsquery;
|
SELECT '1'::tsquery;
|
||||||
tsquery
|
tsquery
|
||||||
@ -372,6 +391,31 @@ SELECT '!!a & !!b'::tsquery;
|
|||||||
!!'a' & !!'b'
|
!!'a' & !!'b'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Also try it with non-error-throwing API
|
||||||
|
SELECT pg_input_is_valid('foo', 'tsquery');
|
||||||
|
pg_input_is_valid
|
||||||
|
-------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_input_is_valid('foo!', 'tsquery');
|
||||||
|
pg_input_is_valid
|
||||||
|
-------------------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_input_error_message('foo!', 'tsquery');
|
||||||
|
pg_input_error_message
|
||||||
|
---------------------------------
|
||||||
|
syntax error in tsquery: "foo!"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT pg_input_error_message('a <100000> b', 'tsquery');
|
||||||
|
pg_input_error_message
|
||||||
|
---------------------------------------------------------------------------------------
|
||||||
|
distance in phrase operator must be an integer value between zero and 16384 inclusive
|
||||||
|
(1 row)
|
||||||
|
|
||||||
--comparisons
|
--comparisons
|
||||||
SELECT 'a' < 'b & c'::tsquery as "true";
|
SELECT 'a' < 'b & c'::tsquery as "true";
|
||||||
true
|
true
|
||||||
|
@ -19,6 +19,11 @@ SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
|||||||
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
||||||
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
|
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
|
||||||
|
|
||||||
|
-- Also try it with non-error-throwing API
|
||||||
|
SELECT pg_input_is_valid('foo', 'tsvector');
|
||||||
|
SELECT pg_input_is_valid($$''$$, 'tsvector');
|
||||||
|
SELECT pg_input_error_message($$''$$, 'tsvector');
|
||||||
|
|
||||||
--Base tsquery test
|
--Base tsquery test
|
||||||
SELECT '1'::tsquery;
|
SELECT '1'::tsquery;
|
||||||
SELECT '1 '::tsquery;
|
SELECT '1 '::tsquery;
|
||||||
@ -68,6 +73,12 @@ SELECT 'a & !!b'::tsquery;
|
|||||||
SELECT '!!a & b'::tsquery;
|
SELECT '!!a & b'::tsquery;
|
||||||
SELECT '!!a & !!b'::tsquery;
|
SELECT '!!a & !!b'::tsquery;
|
||||||
|
|
||||||
|
-- Also try it with non-error-throwing API
|
||||||
|
SELECT pg_input_is_valid('foo', 'tsquery');
|
||||||
|
SELECT pg_input_is_valid('foo!', 'tsquery');
|
||||||
|
SELECT pg_input_error_message('foo!', 'tsquery');
|
||||||
|
SELECT pg_input_error_message('a <100000> b', 'tsquery');
|
||||||
|
|
||||||
--comparisons
|
--comparisons
|
||||||
SELECT 'a' < 'b & c'::tsquery as "true";
|
SELECT 'a' < 'b & c'::tsquery as "true";
|
||||||
SELECT 'a' > 'b & c'::tsquery as "false";
|
SELECT 'a' > 'b & c'::tsquery as "false";
|
||||||
|
Loading…
Reference in New Issue
Block a user