From 121f49a00e432ee9cfad7270d99504350cd1015f Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sat, 16 Apr 2011 16:39:50 -0400 Subject: [PATCH] Clean up collation processing in prepunion.c. This area was a few bricks shy of a load, and badly under-commented too. We have to ensure that the generated targetlist entries for a set-operation node expose the correct collation for each entry, since higher-level processing expects the tlist to reflect the true ordering of the plan's output. This hackery wouldn't be necessary if SortGroupClause carried collation info ... but making it do so would inject more pain in the parser than would be saved here. Still, we might want to rethink that sometime. --- src/backend/optimizer/prep/prepjointree.c | 2 +- src/backend/optimizer/prep/prepunion.c | 131 ++++++++++++++++------ src/backend/optimizer/util/tlist.c | 34 ++++++ src/include/optimizer/tlist.h | 1 + 4 files changed, 132 insertions(+), 36 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index a40f116bf9..a70439cc67 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1136,7 +1136,7 @@ is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes) Assert(subquery != NULL); /* Leaf nodes are OK if they match the toplevel column types */ - /* We don't have to compare typmods here */ + /* We don't have to compare typmods or collations here */ return tlist_same_datatypes(subquery->targetList, colTypes, true); } else if (IsA(setOp, SetOperationStmt)) diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 0ed9535d94..76adb7cdae 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -54,7 +54,8 @@ static Plan *recurse_set_operations(Node *setOp, PlannerInfo *root, double tuple_fraction, - List *colTypes, bool junkOK, + List *colTypes, List *colCollations, + bool junkOK, int flag, List *refnames_tlist, List **sortClauses, double *pNumGroups); static Plan *generate_recursion_plan(SetOperationStmt *setOp, @@ -81,12 +82,14 @@ static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, double dNumGroups, double dNumOutputRows, double tuple_fraction, const char *construct); -static List *generate_setop_tlist(List *colTypes, int flag, +static List *generate_setop_tlist(List *colTypes, List *colCollations, + int flag, Index varno, bool hack_constants, List *input_tlist, List *refnames_tlist); -static List *generate_append_tlist(List *colTypes, List *colCollations, bool flag, +static List *generate_append_tlist(List *colTypes, List *colCollations, + bool flag, List *input_plans, List *refnames_tlist); static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); @@ -169,7 +172,8 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, * on upper-level nodes to deal with that). */ return recurse_set_operations((Node *) topop, root, tuple_fraction, - topop->colTypes, true, -1, + topop->colTypes, topop->colCollations, + true, -1, leftmostQuery->targetList, sortClauses, NULL); } @@ -179,7 +183,8 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, * Recursively handle one step in a tree of set operations * * tuple_fraction: fraction of tuples we expect to retrieve from node - * colTypes: list of type OIDs of expected output columns + * colTypes: OID list of set-op's result column datatypes + * colCollations: OID list of set-op's result column collations * junkOK: if true, child resjunk columns may be left in the result * flag: if >= 0, add a resjunk output column indicating value of flag * refnames_tlist: targetlist to take column names from @@ -196,7 +201,8 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, static Plan * recurse_set_operations(Node *setOp, PlannerInfo *root, double tuple_fraction, - List *colTypes, bool junkOK, + List *colTypes, List *colCollations, + bool junkOK, int flag, List *refnames_tlist, List **sortClauses, double *pNumGroups) { @@ -239,7 +245,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * Add a SubqueryScan with the caller-requested targetlist */ plan = (Plan *) - make_subqueryscan(generate_setop_tlist(colTypes, flag, + make_subqueryscan(generate_setop_tlist(colTypes, colCollations, + flag, rtr->rtindex, true, subplan->targetlist, @@ -287,11 +294,13 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * generate_setop_tlist() to use varno 0. */ if (flag >= 0 || - !tlist_same_datatypes(plan->targetlist, colTypes, junkOK)) + !tlist_same_datatypes(plan->targetlist, colTypes, junkOK) || + !tlist_same_collations(plan->targetlist, colCollations, junkOK)) { plan = (Plan *) make_result(root, - generate_setop_tlist(colTypes, flag, + generate_setop_tlist(colTypes, colCollations, + flag, 0, false, plan->targetlist, @@ -336,12 +345,14 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root, * separately without any intention of combining them into one Append. */ lplan = recurse_set_operations(setOp->larg, root, tuple_fraction, - setOp->colTypes, false, -1, + setOp->colTypes, setOp->colCollations, + false, -1, refnames_tlist, sortClauses, NULL); /* The right plan will want to look at the left one ... */ root->non_recursive_plan = lplan; rplan = recurse_set_operations(setOp->rarg, root, tuple_fraction, - setOp->colTypes, false, -1, + setOp->colTypes, setOp->colCollations, + false, -1, refnames_tlist, sortClauses, NULL); root->non_recursive_plan = NULL; @@ -499,12 +510,14 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, /* Recurse on children, ensuring their outputs are marked */ lplan = recurse_set_operations(op->larg, root, 0.0 /* all tuples needed */ , - op->colTypes, false, 0, + op->colTypes, op->colCollations, + false, 0, refnames_tlist, &child_sortclauses, &dLeftGroups); rplan = recurse_set_operations(op->rarg, root, 0.0 /* all tuples needed */ , - op->colTypes, false, 1, + op->colTypes, op->colCollations, + false, 1, refnames_tlist, &child_sortclauses, &dRightGroups); @@ -620,6 +633,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, * * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct * output rows will be lost anyway. + * + * NOTE: currently, we ignore collations while determining if a child has + * the same properties. This is semantically sound only so long as all + * collations have the same notion of equality. It is valid from an + * implementation standpoint because we don't care about the ordering of + * a UNION child's result: UNION ALL results are always unordered, and + * generate_union_plan will force a fresh sort if the top level is a UNION. */ static List * recurse_union_children(Node *setOp, PlannerInfo *root, @@ -660,8 +680,10 @@ recurse_union_children(Node *setOp, PlannerInfo *root, */ return list_make1(recurse_set_operations(setOp, root, tuple_fraction, - top_union->colTypes, false, - -1, refnames_tlist, + top_union->colTypes, + top_union->colCollations, + false, -1, + refnames_tlist, &child_sortclauses, NULL)); } @@ -830,7 +852,8 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, /* * Generate targetlist for a set-operation plan node * - * colTypes: column datatypes for non-junk columns + * colTypes: OID list of set-op's result column datatypes + * colCollations: OID list of set-op's result column collations * flag: -1 if no flag column needed, 0 or 1 to create a const flag column * varno: varno to use in generated Vars * hack_constants: true to copy up constants (see comments in code) @@ -838,7 +861,8 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * refnames_tlist: targetlist to take column names from */ static List * -generate_setop_tlist(List *colTypes, int flag, +generate_setop_tlist(List *colTypes, List *colCollations, + int flag, Index varno, bool hack_constants, List *input_tlist, @@ -846,19 +870,23 @@ generate_setop_tlist(List *colTypes, int flag, { List *tlist = NIL; int resno = 1; - ListCell *i, - *j, - *k; + ListCell *ctlc, + *cclc, + *itlc, + *rtlc; TargetEntry *tle; Node *expr; - j = list_head(input_tlist); - k = list_head(refnames_tlist); - foreach(i, colTypes) + /* there's no forfour() so we must chase one list manually */ + rtlc = list_head(refnames_tlist); + forthree(ctlc, colTypes, cclc, colCollations, itlc, input_tlist) { - Oid colType = lfirst_oid(i); - TargetEntry *inputtle = (TargetEntry *) lfirst(j); - TargetEntry *reftle = (TargetEntry *) lfirst(k); + Oid colType = lfirst_oid(ctlc); + Oid colColl = lfirst_oid(cclc); + TargetEntry *inputtle = (TargetEntry *) lfirst(itlc); + TargetEntry *reftle = (TargetEntry *) lfirst(rtlc); + + rtlc = lnext(rtlc); Assert(inputtle->resno == resno); Assert(reftle->resno == resno); @@ -887,21 +915,48 @@ generate_setop_tlist(List *colTypes, int flag, exprTypmod((Node *) inputtle->expr), exprCollation((Node *) inputtle->expr), 0); + if (exprType(expr) != colType) { + /* + * Note: it's not really cool to be applying coerce_to_common_type + * here; one notable point is that assign_expr_collations never + * gets run on any generated nodes. For the moment that's not a + * problem because we force the correct exposed collation below. + * It would likely be best to make the parser generate the correct + * output tlist for every set-op to begin with, though. + */ expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */ expr, colType, "UNION/INTERSECT/EXCEPT"); } + + /* + * Ensure the tlist entry's exposed collation matches the set-op. + * This is necessary because plan_set_operations() reports the result + * ordering as a list of SortGroupClauses, which don't carry collation + * themselves but just refer to tlist entries. If we don't show the + * right collation then planner.c might do the wrong thing in + * higher-level queries. + * + * Note we use RelabelType, not CollateExpr, since this expression + * will reach the executor without any further processing. + */ + if (exprCollation(expr) != colColl) + { + expr = (Node *) makeRelabelType((Expr *) expr, + exprType(expr), + exprTypmod(expr), + colColl, + COERCE_DONTCARE); + } + tle = makeTargetEntry((Expr *) expr, (AttrNumber) resno++, pstrdup(reftle->resname), false); tlist = lappend(tlist, tle); - - j = lnext(j); - k = lnext(k); } if (flag >= 0) @@ -928,17 +983,19 @@ generate_setop_tlist(List *colTypes, int flag, /* * Generate targetlist for a set-operation Append node * - * colTypes: column datatypes for non-junk columns + * colTypes: OID list of set-op's result column datatypes + * colCollations: OID list of set-op's result column collations * flag: true to create a flag column copied up from subplans * input_plans: list of sub-plans of the Append * refnames_tlist: targetlist to take column names from * * The entries in the Append's targetlist should always be simple Vars; - * we just have to make sure they have the right datatypes and typmods. + * we just have to make sure they have the right datatypes/typmods/collations. * The Vars are always generated with varno 0. */ static List * -generate_append_tlist(List *colTypes, List *colCollations, bool flag, +generate_append_tlist(List *colTypes, List *colCollations, + bool flag, List *input_plans, List *refnames_tlist) { @@ -1000,7 +1057,8 @@ generate_append_tlist(List *colTypes, List *colCollations, bool flag, * Now we can build the tlist for the Append. */ colindex = 0; - forthree(curColType, colTypes, curColCollation, colCollations, ref_tl_item, refnames_tlist) + forthree(curColType, colTypes, curColCollation, colCollations, + ref_tl_item, refnames_tlist) { Oid colType = lfirst_oid(curColType); int32 colTypmod = colTypmods[colindex++]; @@ -1331,7 +1389,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) * Build the list of translations from parent Vars to child Vars for * an inheritance child. * - * For paranoia's sake, we match type as well as attribute name. + * For paranoia's sake, we match type/collation as well as attribute name. */ static void make_inh_translation_list(Relation oldrelation, Relation newrelation, @@ -1410,10 +1468,13 @@ make_inh_translation_list(Relation oldrelation, Relation newrelation, attname, RelationGetRelationName(newrelation)); } - /* Found it, check type */ + /* Found it, check type and collation match */ if (atttypid != att->atttypid || atttypmod != att->atttypmod) elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type", attname, RelationGetRelationName(newrelation)); + if (attcollation != att->attcollation) + elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation", + attname, RelationGetRelationName(newrelation)); vars = lappend(vars, makeVar(newvarno, (AttrNumber) (new_attno + 1), diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index d7e3a38e6f..d17424e40f 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -195,6 +195,40 @@ tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK) return true; } +/* + * Does tlist have same exposed collations as listed in colCollations? + * + * Identical logic to the above, but for collations. + */ +bool +tlist_same_collations(List *tlist, List *colCollations, bool junkOK) +{ + ListCell *l; + ListCell *curColColl = list_head(colCollations); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + { + if (!junkOK) + return false; + } + else + { + if (curColColl == NULL) + return false; /* tlist longer than colCollations */ + if (exprCollation((Node *) tle->expr) != lfirst_oid(curColColl)) + return false; + curColColl = lnext(curColColl); + } + } + if (curColColl != NULL) + return false; /* tlist shorter than colCollations */ + return true; +} + /* * get_sortgroupref_tle diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index f7606d79a3..7af59589dc 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -25,6 +25,7 @@ extern List *add_to_flat_tlist(List *tlist, List *exprs); extern List *get_tlist_exprs(List *tlist, bool includeJunk); extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); +extern bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK); extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList);