mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Change predecence of phrase operator.
<-> operator now have higher predecence than & (AND) operator. This change was motivated by unexpected difference of similar queries: 'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means (a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates first. Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us
This commit is contained in:
parent
3dbbd0f02a
commit
6734a1cacd
@ -26,10 +26,10 @@
|
||||
/* FTS operator priorities, see ts_type.h */
|
||||
const int tsearch_op_priority[OP_COUNT] =
|
||||
{
|
||||
3, /* OP_NOT */
|
||||
4, /* OP_NOT */
|
||||
2, /* OP_AND */
|
||||
1, /* OP_OR */
|
||||
4 /* OP_PHRASE */
|
||||
3 /* OP_PHRASE */
|
||||
};
|
||||
|
||||
struct TSQueryParserStateData
|
||||
@ -430,6 +430,40 @@ pushStop(TSQueryParserState state)
|
||||
|
||||
#define STACKDEPTH 32
|
||||
|
||||
typedef struct OperatorElement {
|
||||
int8 op;
|
||||
int16 distance;
|
||||
} OperatorElement;
|
||||
|
||||
static void
|
||||
pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
|
||||
{
|
||||
if (*lenstack == STACKDEPTH) /* internal error */
|
||||
elog(ERROR, "tsquery stack too small");
|
||||
|
||||
stack[*lenstack].op = op;
|
||||
stack[*lenstack].distance = distance;
|
||||
|
||||
(*lenstack)++;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanOpStack(TSQueryParserState state,
|
||||
OperatorElement *stack, int *lenstack, int8 op)
|
||||
{
|
||||
int opPriority = OP_PRIORITY(op);
|
||||
|
||||
while(*lenstack)
|
||||
{
|
||||
if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op))
|
||||
break;
|
||||
|
||||
(*lenstack)--;
|
||||
pushOperator(state, stack[*lenstack].op,
|
||||
stack[*lenstack].distance);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Make polish (prefix) notation of query.
|
||||
*
|
||||
@ -444,11 +478,7 @@ makepol(TSQueryParserState state,
|
||||
ts_tokentype type;
|
||||
int lenval = 0;
|
||||
char *strval = NULL;
|
||||
struct
|
||||
{
|
||||
int8 op;
|
||||
int16 distance;
|
||||
} opstack[STACKDEPTH];
|
||||
OperatorElement opstack[STACKDEPTH];
|
||||
int lenstack = 0;
|
||||
int16 weight = 0;
|
||||
bool prefix;
|
||||
@ -462,49 +492,16 @@ makepol(TSQueryParserState state,
|
||||
{
|
||||
case PT_VAL:
|
||||
pushval(opaque, state, strval, lenval, weight, prefix);
|
||||
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
|
||||
opstack[lenstack - 1].op == OP_PHRASE ||
|
||||
opstack[lenstack - 1].op == OP_NOT))
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
break;
|
||||
case PT_OPR:
|
||||
if (lenstack && operator == OP_OR)
|
||||
pushOperator(state, OP_OR, 0);
|
||||
else
|
||||
{
|
||||
if (lenstack == STACKDEPTH) /* internal error */
|
||||
elog(ERROR, "tsquery stack too small");
|
||||
opstack[lenstack].op = operator;
|
||||
opstack[lenstack].distance = weight;
|
||||
lenstack++;
|
||||
}
|
||||
cleanOpStack(state, opstack, &lenstack, operator);
|
||||
pushOpStack(opstack, &lenstack, operator, weight);
|
||||
break;
|
||||
case PT_OPEN:
|
||||
makepol(state, pushval, opaque);
|
||||
|
||||
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
|
||||
opstack[lenstack - 1].op == OP_PHRASE ||
|
||||
opstack[lenstack - 1].op == OP_NOT))
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
break;
|
||||
case PT_CLOSE:
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
};
|
||||
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
|
||||
return;
|
||||
case PT_ERR:
|
||||
default:
|
||||
@ -514,13 +511,8 @@ makepol(TSQueryParserState state,
|
||||
state->buffer)));
|
||||
}
|
||||
}
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
|
||||
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -750,7 +742,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
|
||||
* print it in infix (human-readable) form
|
||||
*/
|
||||
static void
|
||||
infix(INFIX *in, int parentPriority)
|
||||
infix(INFIX *in, int parentPriority, bool rightPhraseOp)
|
||||
{
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
@ -819,7 +811,7 @@ infix(INFIX *in, int parentPriority)
|
||||
}
|
||||
else if (in->curpol->qoperator.oper == OP_NOT)
|
||||
{
|
||||
int priority = PRINT_PRIORITY(in->curpol);
|
||||
int priority = QO_PRIORITY(in->curpol);
|
||||
|
||||
if (priority < parentPriority)
|
||||
{
|
||||
@ -833,7 +825,7 @@ infix(INFIX *in, int parentPriority)
|
||||
*(in->cur) = '\0';
|
||||
in->curpol++;
|
||||
|
||||
infix(in, priority);
|
||||
infix(in, priority, false);
|
||||
if (priority < parentPriority)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
@ -844,17 +836,15 @@ infix(INFIX *in, int parentPriority)
|
||||
else
|
||||
{
|
||||
int8 op = in->curpol->qoperator.oper;
|
||||
int priority = PRINT_PRIORITY(in->curpol);
|
||||
int priority = QO_PRIORITY(in->curpol);
|
||||
int16 distance = in->curpol->qoperator.distance;
|
||||
INFIX nrm;
|
||||
bool needParenthesis = false;
|
||||
|
||||
in->curpol++;
|
||||
if (priority < parentPriority ||
|
||||
(op == OP_PHRASE &&
|
||||
(priority == parentPriority || /* phrases are not
|
||||
* commutative! */
|
||||
parentPriority == OP_PRIORITY(OP_AND))))
|
||||
/* phrase operator depends on order */
|
||||
(op == OP_PHRASE && rightPhraseOp))
|
||||
{
|
||||
needParenthesis = true;
|
||||
RESIZEBUF(in, 2);
|
||||
@ -868,11 +858,11 @@ infix(INFIX *in, int parentPriority)
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
|
||||
/* get right operand */
|
||||
infix(&nrm, priority);
|
||||
infix(&nrm, priority, (op == OP_PHRASE));
|
||||
|
||||
/* get & print left operand */
|
||||
in->curpol = nrm.curpol;
|
||||
infix(in, priority);
|
||||
infix(in, priority, false);
|
||||
|
||||
/* print operator & right operand */
|
||||
RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
|
||||
@ -924,7 +914,7 @@ tsqueryout(PG_FUNCTION_ARGS)
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
*(nrm.cur) = '\0';
|
||||
nrm.op = GETOPERAND(query);
|
||||
infix(&nrm, -1 /* lowest priority */ );
|
||||
infix(&nrm, -1 /* lowest priority */, false);
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_CSTRING(nrm.buf);
|
||||
@ -1151,7 +1141,7 @@ tsquerytree(PG_FUNCTION_ARGS)
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
*(nrm.cur) = '\0';
|
||||
nrm.op = GETOPERAND(query);
|
||||
infix(&nrm, true);
|
||||
infix(&nrm, -1, false);
|
||||
res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
|
||||
pfree(q);
|
||||
}
|
||||
|
@ -25,11 +25,18 @@ typedef struct NODE
|
||||
QueryItem *valnode;
|
||||
} NODE;
|
||||
|
||||
/* Non-operator nodes have fake (but highest) priority */
|
||||
/*
|
||||
* To simplify walking on query tree and pushing down of phrase operator
|
||||
* we define some fake priority here: phrase operator has highest priority
|
||||
* of any other operators (and we believe here that OP_PHRASE is a highest
|
||||
* code of operations) and value node has ever highest priority.
|
||||
* Priority values of other operations don't matter until they are less than
|
||||
* phrase operator and value node.
|
||||
*/
|
||||
#define VALUE_PRIORITY (OP_COUNT + 1)
|
||||
#define NODE_PRIORITY(x) \
|
||||
( ((x)->valnode->qoperator.type == QI_OPR) ? \
|
||||
QO_PRIORITY((x)->valnode) : \
|
||||
TOP_PRIORITY )
|
||||
(x)->valnode->qoperator.oper : VALUE_PRIORITY )
|
||||
|
||||
/*
|
||||
* make query tree from plain view of query
|
||||
@ -416,6 +423,10 @@ normalize_phrase_tree(NODE *node)
|
||||
node->left = normalize_phrase_tree(node->left);
|
||||
node->right = normalize_phrase_tree(node->right);
|
||||
|
||||
/*
|
||||
* if subtree contains only nodes with higher "priority" then
|
||||
* we are done. See comment near NODE_PRIORITY()
|
||||
*/
|
||||
if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
|
||||
NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
|
||||
return node;
|
||||
|
@ -217,33 +217,19 @@ typedef struct
|
||||
|
||||
/*
|
||||
* Legal values for QueryOperator.operator.
|
||||
* They should be ordered by priority! We assume that phrase
|
||||
* has highest priority, but this agreement is only
|
||||
* for query transformation! That's need to simplify
|
||||
* algorithm of query transformation.
|
||||
*/
|
||||
#define OP_NOT 1
|
||||
#define OP_AND 2
|
||||
#define OP_OR 3
|
||||
#define OP_PHRASE 4
|
||||
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
|
||||
#define OP_COUNT 4
|
||||
|
||||
extern const int tsearch_op_priority[OP_COUNT];
|
||||
|
||||
#define NOT_PHRASE_P 5 /* OP_PHRASE negation operations must have
|
||||
* greater priority in order to force infix()
|
||||
* to surround the whole OP_PHRASE expression
|
||||
* with parentheses. */
|
||||
|
||||
#define TOP_PRIORITY 6 /* highest priority for val nodes */
|
||||
|
||||
/* get operation priority by its code*/
|
||||
#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
|
||||
/* get QueryOperator priority */
|
||||
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
|
||||
/* special case: get QueryOperator priority for correct printing !(a <-> b>) */
|
||||
#define PRINT_PRIORITY(x) \
|
||||
( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) )
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
|
||||
(1 row)
|
||||
|
||||
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
|
||||
to_tsquery
|
||||
-----------------------------------------------------------------------------
|
||||
( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
|
||||
to_tsquery
|
||||
-----------------------------------------------------------------
|
||||
'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
|
||||
(1 row)
|
||||
|
||||
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
|
||||
phraseto_tsquery
|
||||
-----------------------------------------------------------------------
|
||||
( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
|
||||
phraseto_tsquery
|
||||
-----------------------------------------------------------
|
||||
'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
|
||||
(1 row)
|
||||
|
||||
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
|
||||
|
@ -778,9 +778,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
|
||||
(1 row)
|
||||
|
||||
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
|
||||
phraseto_tsquery
|
||||
-----------------------------------------------------------------------
|
||||
( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
|
||||
phraseto_tsquery
|
||||
-----------------------------------------------------------
|
||||
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
|
||||
(1 row)
|
||||
|
||||
SELECT ts_rank_cd(to_tsvector('english', '
|
||||
@ -1215,15 +1215,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
|
||||
(1 row)
|
||||
|
||||
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
||||
ts_rewrite
|
||||
-----------------------
|
||||
'1' & ( '2' <2> '3' )
|
||||
ts_rewrite
|
||||
-------------------
|
||||
'1' & '2' <2> '3'
|
||||
(1 row)
|
||||
|
||||
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
|
||||
ts_rewrite
|
||||
-----------------------------------------------
|
||||
( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
|
||||
ts_rewrite
|
||||
---------------------------------------
|
||||
'5' <-> '1' & '5' <-> ( '2' <-> '3' )
|
||||
(1 row)
|
||||
|
||||
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
||||
|
@ -350,21 +350,21 @@ SELECT '(a|b) <-> (d|c)'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT 'a <-> (b&c)'::tsquery;
|
||||
tsquery
|
||||
-----------------------------------
|
||||
( 'a' <-> 'b' ) & ( 'a' <-> 'c' )
|
||||
tsquery
|
||||
---------------------------
|
||||
'a' <-> 'b' & 'a' <-> 'c'
|
||||
(1 row)
|
||||
|
||||
SELECT '(a&b) <-> c'::tsquery;
|
||||
tsquery
|
||||
-----------------------------------
|
||||
( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
|
||||
tsquery
|
||||
---------------------------
|
||||
'a' <-> 'c' & 'b' <-> 'c'
|
||||
(1 row)
|
||||
|
||||
SELECT '(a&b) <-> (d&c)'::tsquery;
|
||||
tsquery
|
||||
-----------------------------------------------------------------------
|
||||
( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
|
||||
tsquery
|
||||
-------------------------------------------------------
|
||||
'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c'
|
||||
(1 row)
|
||||
|
||||
SELECT 'a <-> !b'::tsquery;
|
||||
@ -386,9 +386,9 @@ SELECT '!a <-> !b'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT 'a <-> !(b&c)'::tsquery;
|
||||
tsquery
|
||||
----------------------------------------------
|
||||
'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) )
|
||||
tsquery
|
||||
--------------------------------------
|
||||
'a' & !( 'a' <-> 'b' & 'a' <-> 'c' )
|
||||
(1 row)
|
||||
|
||||
SELECT 'a <-> !(b|c)'::tsquery;
|
||||
@ -398,9 +398,9 @@ SELECT 'a <-> !(b|c)'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT '!(a&b) <-> c'::tsquery;
|
||||
tsquery
|
||||
----------------------------------------------
|
||||
!( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c'
|
||||
tsquery
|
||||
--------------------------------------
|
||||
!( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c'
|
||||
(1 row)
|
||||
|
||||
SELECT '!(a|b) <-> c'::tsquery;
|
||||
@ -416,9 +416,9 @@ SELECT '(!a|b) <-> c'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT '(!a&b) <-> c'::tsquery;
|
||||
tsquery
|
||||
------------------------------------------
|
||||
!( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' )
|
||||
tsquery
|
||||
--------------------------------------
|
||||
!( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c'
|
||||
(1 row)
|
||||
|
||||
SELECT 'c <-> (!a|b)'::tsquery;
|
||||
@ -428,9 +428,9 @@ SELECT 'c <-> (!a|b)'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT 'c <-> (!a&b)'::tsquery;
|
||||
tsquery
|
||||
------------------------------------------
|
||||
'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' )
|
||||
tsquery
|
||||
--------------------------------------
|
||||
'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b'
|
||||
(1 row)
|
||||
|
||||
SELECT '(a|b) <-> !c'::tsquery;
|
||||
@ -440,9 +440,9 @@ SELECT '(a|b) <-> !c'::tsquery;
|
||||
(1 row)
|
||||
|
||||
SELECT '(a&b) <-> !c'::tsquery;
|
||||
tsquery
|
||||
----------------------------------------------------
|
||||
'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) )
|
||||
tsquery
|
||||
--------------------------------------------
|
||||
'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' )
|
||||
(1 row)
|
||||
|
||||
SELECT '!c <-> (a|b)'::tsquery;
|
||||
@ -532,33 +532,33 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
|
||||
(1 row)
|
||||
|
||||
SELECT 'a' <-> 'b & d'::tsquery;
|
||||
?column?
|
||||
-----------------------------------
|
||||
( 'a' <-> 'b' ) & ( 'a' <-> 'd' )
|
||||
?column?
|
||||
---------------------------
|
||||
'a' <-> 'b' & 'a' <-> 'd'
|
||||
(1 row)
|
||||
|
||||
SELECT 'a & g' <-> 'b & d'::tsquery;
|
||||
?column?
|
||||
-----------------------------------------------------------------------
|
||||
( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
|
||||
?column?
|
||||
-------------------------------------------------------
|
||||
'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd'
|
||||
(1 row)
|
||||
|
||||
SELECT 'a & g' <-> 'b | d'::tsquery;
|
||||
?column?
|
||||
-----------------------------------------------------------------------
|
||||
( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
|
||||
?column?
|
||||
-------------------------------------------------------
|
||||
'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd'
|
||||
(1 row)
|
||||
|
||||
SELECT 'a & g' <-> 'b <-> d'::tsquery;
|
||||
?column?
|
||||
-----------------------------------------------------------
|
||||
( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) )
|
||||
?column?
|
||||
---------------------------------------------------
|
||||
'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' )
|
||||
(1 row)
|
||||
|
||||
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
|
||||
tsquery_phrase
|
||||
-------------------------------------------------------------
|
||||
( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' )
|
||||
tsquery_phrase
|
||||
---------------------------------------------
|
||||
'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd'
|
||||
(1 row)
|
||||
|
||||
-- tsvector-tsquery operations
|
||||
|
Loading…
Reference in New Issue
Block a user