mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-12 18:34:36 +08:00
Fix nasty performance problem in tsquery_rewrite().
tsquery_rewrite() tries to find matches to subsets of AND/OR conditions; for example, in the query 'a | b | c' the substitution subquery 'a | c' should match and lead to replacement of the first and third items. That's fine, but the matching algorithm apparently takes about O(2^N) for an N-clause query (I say "apparently" because the code is also both unintelligible and uncommented). We could probably do better than that even without any extra assumptions --- but actually, we know that the subclauses are sorted, indeed are depending on that elsewhere in this very same function. So we can just scan the two lists a single time to detect matches, as though we were doing a merge join. Also do a re-flattening call (QTNTernary()) in tsquery_rewrite_query, just to make sure that the tree fits the expectations of the next search cycle. I didn't try to devise a test case for this, but I'm pretty sure that the oversight could have led to failure to match in some cases where a match would be expected. Improve comments, and also stick a CHECK_FOR_INTERRUPTS into dofindsubquery, just in case it's still too slow for somebody. Per report from Andreas Seltenreich. Back-patch to all supported branches. Discussion: <8760oasf2y.fsf@credativ.de>
This commit is contained in:
parent
24ebc444c6
commit
5ec81aceec
@ -21,47 +21,43 @@
|
|||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
|
||||||
static int
|
|
||||||
addone(int *counters, int last, int total)
|
|
||||||
{
|
|
||||||
/* since this function recurses, it could be driven to stack overflow. */
|
|
||||||
check_stack_depth();
|
|
||||||
|
|
||||||
counters[last]++;
|
|
||||||
if (counters[last] >= total)
|
|
||||||
{
|
|
||||||
if (last == 0)
|
|
||||||
return 0;
|
|
||||||
if (addone(counters, last - 1, total - 1) == 0)
|
|
||||||
return 0;
|
|
||||||
counters[last] = counters[last - 1] + 1;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If node is equal to ex, replace it with subs. Replacement is actually done
|
* If "node" is equal to "ex", return a copy of "subs" instead.
|
||||||
* by returning either node or a copy of subs.
|
* If "ex" matches a subset of node's children, return a modified version
|
||||||
|
* of "node" in which those children are replaced with a copy of "subs".
|
||||||
|
* Otherwise return "node" unmodified.
|
||||||
|
*
|
||||||
|
* The QTN_NOCHANGE bit is set in successfully modified nodes, so that
|
||||||
|
* we won't uselessly recurse into them.
|
||||||
|
* Also, set *isfind true if we make a replacement.
|
||||||
*/
|
*/
|
||||||
static QTNode *
|
static QTNode *
|
||||||
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
|
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
|
||||||
{
|
{
|
||||||
|
/* Can't match unless signature matches and node type matches. */
|
||||||
if ((node->sign & ex->sign) != ex->sign ||
|
if ((node->sign & ex->sign) != ex->sign ||
|
||||||
node->valnode->type != ex->valnode->type)
|
node->valnode->type != ex->valnode->type)
|
||||||
return node;
|
return node;
|
||||||
|
|
||||||
|
/* Ignore nodes marked NOCHANGE, too. */
|
||||||
if (node->flags & QTN_NOCHANGE)
|
if (node->flags & QTN_NOCHANGE)
|
||||||
return node;
|
return node;
|
||||||
|
|
||||||
if (node->valnode->type == QI_OPR)
|
if (node->valnode->type == QI_OPR)
|
||||||
{
|
{
|
||||||
|
/* Must be same operator. */
|
||||||
if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
|
if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
|
||||||
return node;
|
return node;
|
||||||
|
|
||||||
if (node->nchild == ex->nchild)
|
if (node->nchild == ex->nchild)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Simple case: when same number of children, match if equal.
|
||||||
|
* (This is reliable when the children were sorted earlier.)
|
||||||
|
*/
|
||||||
if (QTNEq(node, ex))
|
if (QTNEq(node, ex))
|
||||||
{
|
{
|
||||||
|
/* Match; delete node and return a copy of subs instead. */
|
||||||
QTNFree(node);
|
QTNFree(node);
|
||||||
if (subs)
|
if (subs)
|
||||||
{
|
{
|
||||||
@ -73,79 +69,92 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
|
|||||||
*isfind = true;
|
*isfind = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (node->nchild > ex->nchild)
|
else if (node->nchild > ex->nchild && ex->nchild > 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* AND and NOT are commutative, so we check if a subset of the
|
* AND and OR are commutative/associative, so we should check if a
|
||||||
* children match. For example, if tnode is A | B | C, and ex is B
|
* subset of the children match. For example, if node is A|B|C,
|
||||||
* | C, we have a match after we convert tnode to A | (B | C).
|
* and ex is B|C, we have a match after we notionally convert node
|
||||||
|
* to A|(B|C). This does not work for NOT or PHRASE nodes, but we
|
||||||
|
* can't get here for those node types because they have a fixed
|
||||||
|
* number of children.
|
||||||
|
*
|
||||||
|
* Because we expect that the children are sorted, it suffices to
|
||||||
|
* make one pass through the two lists to find the matches.
|
||||||
*/
|
*/
|
||||||
int *counters = (int *) palloc(sizeof(int) * node->nchild);
|
bool *matched;
|
||||||
int i;
|
int nmatched;
|
||||||
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
|
int i,
|
||||||
|
j;
|
||||||
|
|
||||||
memset(tnode, 0, sizeof(QTNode));
|
/* Assert that the subset rule is OK */
|
||||||
tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
|
Assert(node->valnode->qoperator.oper == OP_AND ||
|
||||||
tnode->nchild = ex->nchild;
|
node->valnode->qoperator.oper == OP_OR);
|
||||||
tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
|
|
||||||
*(tnode->valnode) = *(ex->valnode);
|
|
||||||
|
|
||||||
for (i = 0; i < ex->nchild; i++)
|
/* matched[] will record which children of node matched */
|
||||||
counters[i] = i;
|
matched = (bool *) palloc0(node->nchild * sizeof(bool));
|
||||||
|
nmatched = 0;
|
||||||
do
|
i = j = 0;
|
||||||
|
while (i < node->nchild && j < ex->nchild)
|
||||||
{
|
{
|
||||||
tnode->sign = 0;
|
int cmp = QTNodeCompare(node->child[i], ex->child[j]);
|
||||||
for (i = 0; i < ex->nchild; i++)
|
|
||||||
|
if (cmp == 0)
|
||||||
{
|
{
|
||||||
tnode->child[i] = node->child[counters[i]];
|
/* match! */
|
||||||
tnode->sign |= tnode->child[i]->sign;
|
matched[i] = true;
|
||||||
|
nmatched++;
|
||||||
|
i++, j++;
|
||||||
}
|
}
|
||||||
|
else if (cmp < 0)
|
||||||
if (QTNEq(tnode, ex))
|
|
||||||
{
|
{
|
||||||
int j = 0;
|
/* node->child[i] has no match, ignore it */
|
||||||
|
i++;
|
||||||
pfree(tnode->valnode);
|
}
|
||||||
pfree(tnode->child);
|
else
|
||||||
pfree(tnode);
|
{
|
||||||
if (subs)
|
/* ex->child[j] has no match; we can give up immediately */
|
||||||
{
|
|
||||||
tnode = QTNCopy(subs);
|
|
||||||
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
tnode = NULL;
|
|
||||||
|
|
||||||
node->child[counters[0]] = tnode;
|
|
||||||
|
|
||||||
for (i = 1; i < ex->nchild; i++)
|
|
||||||
node->child[counters[i]] = NULL;
|
|
||||||
for (i = 0; i < node->nchild; i++)
|
|
||||||
{
|
|
||||||
if (node->child[i])
|
|
||||||
{
|
|
||||||
node->child[j] = node->child[i];
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
node->nchild = j;
|
|
||||||
|
|
||||||
*isfind = true;
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} while (addone(counters, ex->nchild - 1, node->nchild));
|
|
||||||
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
|
|
||||||
{
|
|
||||||
pfree(tnode->valnode);
|
|
||||||
pfree(tnode->child);
|
|
||||||
pfree(tnode);
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if (nmatched == ex->nchild)
|
||||||
|
{
|
||||||
|
/* collapse out the matched children of node */
|
||||||
|
j = 0;
|
||||||
|
for (i = 0; i < node->nchild; i++)
|
||||||
|
{
|
||||||
|
if (matched[i])
|
||||||
|
QTNFree(node->child[i]);
|
||||||
|
else
|
||||||
|
node->child[j++] = node->child[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* and instead insert a copy of subs */
|
||||||
|
if (subs)
|
||||||
|
{
|
||||||
|
subs = QTNCopy(subs);
|
||||||
|
subs->flags |= QTN_NOCHANGE;
|
||||||
|
node->child[j++] = subs;
|
||||||
|
}
|
||||||
|
|
||||||
|
node->nchild = j;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-sort the node to put new child in the right place. This
|
||||||
|
* is a bit bogus, because it won't matter for findsubquery's
|
||||||
|
* remaining processing, and it's insufficient to prepare the
|
||||||
|
* tree for another search (we would need to re-flatten as
|
||||||
|
* well, and we don't want to do that because we'd lose the
|
||||||
|
* QTN_NOCHANGE marking on the new child). But it's needed to
|
||||||
|
* keep the results the same as the regression tests expect.
|
||||||
|
*/
|
||||||
QTNSort(node);
|
QTNSort(node);
|
||||||
pfree(counters);
|
|
||||||
|
*isfind = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(matched);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -173,12 +182,20 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
|
||||||
|
* at the root node, and if we failed to do so, recursively match against
|
||||||
|
* child nodes.
|
||||||
|
*/
|
||||||
static QTNode *
|
static QTNode *
|
||||||
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
||||||
{
|
{
|
||||||
/* since this function recurses, it could be driven to stack overflow. */
|
/* since this function recurses, it could be driven to stack overflow. */
|
||||||
check_stack_depth();
|
check_stack_depth();
|
||||||
|
|
||||||
|
/* also, since it's a bit expensive, let's check for query cancel. */
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
root = findeq(root, ex, subs, isfind);
|
root = findeq(root, ex, subs, isfind);
|
||||||
|
|
||||||
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
|
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
|
||||||
@ -192,6 +209,10 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
|||||||
return root;
|
return root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Delete any void subtrees that may have been inserted when the replacement
|
||||||
|
* subtree is void.
|
||||||
|
*/
|
||||||
static QTNode *
|
static QTNode *
|
||||||
dropvoidsubtree(QTNode *root)
|
dropvoidsubtree(QTNode *root)
|
||||||
{
|
{
|
||||||
@ -231,6 +252,14 @@ dropvoidsubtree(QTNode *root)
|
|||||||
return root;
|
return root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Substitute "subs" for "ex" throughout the QTNode tree at root.
|
||||||
|
*
|
||||||
|
* If isfind isn't NULL, set *isfind to show whether we made any substitution.
|
||||||
|
*
|
||||||
|
* Both "root" and "ex" must have been through QTNTernary and QTNSort
|
||||||
|
* to ensure reliable matching.
|
||||||
|
*/
|
||||||
QTNode *
|
QTNode *
|
||||||
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
||||||
{
|
{
|
||||||
@ -344,6 +373,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
/* ready the tree for another pass */
|
/* ready the tree for another pass */
|
||||||
QTNClearFlags(tree, QTN_NOCHANGE);
|
QTNClearFlags(tree, QTN_NOCHANGE);
|
||||||
|
QTNTernary(tree);
|
||||||
QTNSort(tree);
|
QTNSort(tree);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user