Modify the definition of window-function PARTITION BY and ORDER BY clauses

so that their elements are always taken as simple expressions over the query's input columns. It originally seemed like a good idea to make them act exactly like GROUP BY and ORDER BY, right down to the SQL92-era behavior of accepting output column names or numbers. However, that was not such a great idea, for two reasons: 1. It permits circular references, as exhibited in bug #5018: the output column could be the one containing the window function itself. (We actually had a regression test case illustrating this, but nobody thought twice about how confusing that would be.) 2. It doesn't seem like a good idea for, eg, "lead(foo) OVER (ORDER BY foo)" to potentially use two completely different meanings for "foo". Accordingly, narrow down the behavior of window clauses to use only the SQL99-compliant interpretation that the expressions are simple expressions.
2025-01-12 18:34:36 +08:00 · 2009-08-27 20:08:03 +00:00 · 2009-08-27 20:08:03 +00:00 · bb16dc49ab
commit bb16dc49ab
parent f065b17d9f
7 changed files with 116 additions and 63 deletions
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.123 2009/08/18 23:40:20 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.124 2009/08/27 20:08:02 tgl Exp $
 PostgreSQL documentation
 -->

@ -595,17 +595,24 @@ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceabl

   <para>
    The elements of the <literal>PARTITION BY</> list are interpreted in
-    the same fashion as elements of a
-    <xref linkend="sql-groupby" endterm="sql-groupby-title">, and
-    the elements of the <literal>ORDER BY</> list are interpreted in the
-    same fashion as elements of an
-    <xref linkend="sql-orderby" endterm="sql-orderby-title">.
-    The only difference is that these expressions can contain aggregate
+    much the same fashion as elements of a
+    <xref linkend="sql-groupby" endterm="sql-groupby-title">, except that
+    they are always simple expressions and never the name or number of an
+    output column.
+    Another difference is that these expressions can contain aggregate
    function calls, which are not allowed in a regular <literal>GROUP BY</>
    clause.  They are allowed here because windowing occurs after grouping
    and aggregation.
   </para>

+   <para>
+    Similarly, the elements of the <literal>ORDER BY</> list are interpreted
+    in much the same fashion as elements of an
+    <xref linkend="sql-orderby" endterm="sql-orderby-title">, except that
+    the expressions are always taken as simple expressions and never the name
+    or number of an output column.
+   </para>
+
   <para>
    The optional <replaceable class="parameter">frame_clause</> defines
    the <firstterm>window frame</> for window functions that depend on the
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.133 2009/06/17 21:58:49 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.134 2009/08/27 20:08:02 tgl Exp $ -->

 <chapter id="sql-syntax">
 <title>SQL Syntax</title>
@ -1619,7 +1619,9 @@ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    expression that does not itself contain window function calls.
    The <literal>PARTITION BY</> and <literal>ORDER BY</> lists have
    essentially the same syntax and semantics as <literal>GROUP BY</>
-    and <literal>ORDER BY</> clauses of the whole query.
+    and <literal>ORDER BY</> clauses of the whole query, except that their
+    expressions are always just expressions and cannot be output-column
+    names or numbers.
    <replaceable>window_name</replaceable> is a reference to a named window
    specification defined in the query's <literal>WINDOW</literal> clause.
    Named window specifications are usually referenced with just
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@ -17,7 +17,7 @@
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.389 2009/06/11 14:48:59 momjian Exp $
+ *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.390 2009/08/27 20:08:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -825,13 +825,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->sortClause = transformSortClause(pstate,
 										  stmt->sortClause,
 										  &qry->targetList,
-										  true /* fix unknowns */ );
+										  true /* fix unknowns */,
+										  false /* not window function */);

 	qry->groupClause = transformGroupClause(pstate,
 											stmt->groupClause,
 											&qry->targetList,
 											qry->sortClause,
-											false);
+											false /* not window function */);

 	if (stmt->distinctClause == NIL)
 	{
@ -1039,7 +1040,8 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
 	qry->sortClause = transformSortClause(pstate,
 										  stmt->sortClause,
 										  &qry->targetList,
-										  true /* fix unknowns */ );
+										  true /* fix unknowns */,
+										  false /* not window function */);

 	qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
 											"OFFSET");
@ -1291,7 +1293,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->sortClause = transformSortClause(pstate,
 										  sortClause,
 										  &qry->targetList,
-										  false /* no unknowns expected */ );
+										  false /* no unknowns expected */,
+										  false /* not window function */);

 	pstate->p_rtable = list_truncate(pstate->p_rtable, sv_rtable_length);
 	pstate->p_relnamespace = sv_relnamespace;
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.190 2009/07/16 06:33:43 petere Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.191 2009/08/27 20:08:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -37,16 +37,15 @@
 #include "utils/rel.h"


+/* clause types for findTargetlistEntrySQL92 */
 #define ORDER_CLAUSE 0
 #define GROUP_CLAUSE 1
 #define DISTINCT_ON_CLAUSE 2
-#define PARTITION_CLAUSE 3

 static const char *const clauseText[] = {
 	"ORDER BY",
 	"GROUP BY",
-	"DISTINCT ON",
-	"PARTITION BY"
+	"DISTINCT ON"
 };

 static void extractRemainingColumns(List *common_colnames,
@ -73,8 +72,10 @@ static Node *transformFromClauseItem(ParseState *pstate, Node *n,
 						Relids *containedRels);
 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 				   Var *l_colvar, Var *r_colvar);
-static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
-					List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node,
+						 List **tlist, int clause);
+static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
+						 List **tlist);
 static int get_matching_location(int sortgroupref,
 					  List *sortgrouprefs, List *exprs);
 static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
@ -1216,21 +1217,27 @@ transformLimitClause(ParseState *pstate, Node *clause,


 /*
- *	findTargetlistEntry -
+ *	findTargetlistEntrySQL92 -
 *	  Returns the targetlist entry matching the given (untransformed) node.
 *	  If no matching entry exists, one is created and appended to the target
 *	  list as a "resjunk" node.
 *
+ * This function supports the old SQL92 ORDER BY interpretation, where the
+ * expression is an output column name or number.  If we fail to find a
+ * match of that sort, we fall through to the SQL99 rules.  For historical
+ * reasons, Postgres also allows this interpretation for GROUP BY, though
+ * the standard never did.  However, for GROUP BY we prefer a SQL99 match.
+ * This function is *not* used for WINDOW definitions.
+ *
 * node		the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
 * tlist	the target list (passed by reference so we can append to it)
 * clause	identifies clause type being processed
 */
 static TargetEntry *
-findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
+findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist,
+						 int clause)
 {
-	TargetEntry *target_result = NULL;
 	ListCell   *tl;
-	Node	   *expr;

 	/*----------
 	 * Handle two special cases as mandated by the SQL92 spec:
@ -1258,8 +1265,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 	 * 2. IntegerConstant
 	 *	  This means to use the n'th item in the existing target list.
 	 *	  Note that it would make no sense to order/group/distinct by an
-	 *	  actual constant, so this does not create a conflict with our
-	 *	  extension to order/group by an expression.
+	 *	  actual constant, so this does not create a conflict with SQL99.
 	 *	  GROUP BY column-number is not allowed by SQL92, but since
 	 *	  the standard has no other behavior defined for this syntax,
 	 *	  we may as well accept this common extension.
@ -1268,7 +1274,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 	 * since the user didn't write them in his SELECT list.
 	 *
 	 * If neither special case applies, fall through to treat the item as
-	 * an expression.
+	 * an expression per SQL99.
 	 *----------
 	 */
 	if (IsA(node, ColumnRef) &&
@ -1278,15 +1284,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 		char	   *name = strVal(linitial(((ColumnRef *) node)->fields));
 		int			location = ((ColumnRef *) node)->location;

-		if (clause == GROUP_CLAUSE || clause == PARTITION_CLAUSE)
+		if (clause == GROUP_CLAUSE)
 		{
 			/*
 			 * In GROUP BY, we must prefer a match against a FROM-clause
 			 * column to one against the targetlist.  Look to see if there is
-			 * a matching column.  If so, fall through to let transformExpr()
-			 * do the rest.  NOTE: if name could refer ambiguously to more
-			 * than one column name exposed by FROM, colNameToVar will
-			 * ereport(ERROR).	That's just what we want here.
+			 * a matching column.  If so, fall through to use SQL99 rules.
+			 * NOTE: if name could refer ambiguously to more than one column
+			 * name exposed by FROM, colNameToVar will ereport(ERROR). That's
+			 * just what we want here.
 			 *
 			 * Small tweak for 7.4.3: ignore matches in upper query levels.
 			 * This effectively changes the search order for bare names to (1)
@ -1295,8 +1301,6 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 			 * SQL99 do not allow GROUPing BY an outer reference, so this
 			 * breaks no cases that are legal per spec, and it seems a more
 			 * self-consistent behavior.
-			 *
-			 * Window PARTITION BY clauses should act exactly like GROUP BY.
 			 */
 			if (colNameToVar(pstate, name, true, location) != NULL)
 				name = NULL;
@ -1304,6 +1308,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)

 		if (name != NULL)
 		{
+			TargetEntry *target_result = NULL;
+
 			foreach(tl, *tlist)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(tl);
@ -1367,12 +1373,36 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 	}

 	/*
-	 * Otherwise, we have an expression (this is a Postgres extension not
-	 * found in SQL92).  Convert the untransformed node to a transformed
-	 * expression, and search for a match in the tlist. NOTE: it doesn't
-	 * really matter whether there is more than one match.	Also, we are
-	 * willing to match a resjunk target here, though the above cases must
-	 * ignore resjunk targets.
+	 * Otherwise, we have an expression, so process it per SQL99 rules.
+	 */
+	return findTargetlistEntrySQL99(pstate, node, tlist);
+}
+
+/*
+ *	findTargetlistEntrySQL99 -
+ *	  Returns the targetlist entry matching the given (untransformed) node.
+ *	  If no matching entry exists, one is created and appended to the target
+ *	  list as a "resjunk" node.
+ *
+ * This function supports the SQL99 interpretation, wherein the expression
+ * is just an ordinary expression referencing input column names.
+ *
+ * node		the ORDER BY, GROUP BY, etc expression to be matched
+ * tlist	the target list (passed by reference so we can append to it)
+ */
+static TargetEntry *
+findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist)
+{
+	TargetEntry *target_result;
+	ListCell   *tl;
+	Node	   *expr;
+
+	/*
+	 * Convert the untransformed node to a transformed expression, and search
+	 * for a match in the tlist.  NOTE: it doesn't really matter whether there
+	 * is more than one match.  Also, we are willing to match an existing
+	 * resjunk target here, though the SQL92 cases above must ignore resjunk
+	 * targets.
 	 */
 	expr = transformExpr(pstate, node);

@ -1403,16 +1433,15 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
 * GROUP BY items will be added to the targetlist (as resjunk columns)
 * if not already present, so the targetlist must be passed by reference.
 *
- * This is also used for window PARTITION BY clauses (which actually act
- * just the same, except for the clause name used in error messages).
+ * This is also used for window PARTITION BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
 */
 List *
 transformGroupClause(ParseState *pstate, List *grouplist,
 					 List **targetlist, List *sortClause,
-					 bool isPartition)
+					 bool isWindowFunc)
 {
 	List	   *result = NIL;
-	int			clause = isPartition ? PARTITION_CLAUSE : GROUP_CLAUSE;
 	ListCell   *gl;

 	foreach(gl, grouplist)
@ -1421,7 +1450,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
 		TargetEntry *tle;
 		bool		found = false;

-		tle = findTargetlistEntry(pstate, gexpr, targetlist, clause);
+		if (isWindowFunc)
+			tle = findTargetlistEntrySQL99(pstate, gexpr, targetlist);
+		else
+			tle = findTargetlistEntrySQL92(pstate, gexpr, targetlist,
+										   GROUP_CLAUSE);

 		/* Eliminate duplicates (GROUP BY x, x) */
 		if (targetIsInSortList(tle, InvalidOid, result))
@ -1475,12 +1508,16 @@ transformGroupClause(ParseState *pstate, List *grouplist,
 *
 * ORDER BY items will be added to the targetlist (as resjunk columns)
 * if not already present, so the targetlist must be passed by reference.
+ *
+ * This is also used for window ORDER BY clauses (which act almost the
+ * same, but are always interpreted per SQL99 rules).
 */
 List *
 transformSortClause(ParseState *pstate,
 					List *orderlist,
 					List **targetlist,
-					bool resolveUnknown)
+					bool resolveUnknown,
+					bool isWindowFunc)
 {
 	List	   *sortlist = NIL;
 	ListCell   *olitem;
@ -1490,8 +1527,11 @@ transformSortClause(ParseState *pstate,
 		SortBy	   *sortby = (SortBy *) lfirst(olitem);
 		TargetEntry *tle;

-		tle = findTargetlistEntry(pstate, sortby->node,
-								  targetlist, ORDER_CLAUSE);
+		if (isWindowFunc)
+			tle = findTargetlistEntrySQL99(pstate, sortby->node, targetlist);
+		else
+			tle = findTargetlistEntrySQL92(pstate, sortby->node, targetlist,
+										   ORDER_CLAUSE);

 		sortlist = addTargetToSortList(pstate, tle,
 									   sortlist, *targetlist, sortby,
@ -1550,18 +1590,19 @@ transformWindowDefinitions(ParseState *pstate,

 		/*
 		 * Transform PARTITION and ORDER specs, if any.  These are treated
-		 * exactly like top-level GROUP BY and ORDER BY clauses, including the
-		 * special handling of nondefault operator semantics.
+		 * almost exactly like top-level GROUP BY and ORDER BY clauses,
+		 * including the special handling of nondefault operator semantics.
 		 */
 		orderClause = transformSortClause(pstate,
 										  windef->orderClause,
 										  targetlist,
-										  true);
+										  true /* fix unknowns */,
+										  true /* window function */);
 		partitionClause = transformGroupClause(pstate,
 											   windef->partitionClause,
 											   targetlist,
 											   orderClause,
-											   true);
+											   true /* window function */);

 		/*
 		 * And prepare the new WindowClause.
@ -1736,8 +1777,8 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
 		int			sortgroupref;
 		TargetEntry *tle;

-		tle = findTargetlistEntry(pstate, dexpr,
-								  targetlist, DISTINCT_ON_CLAUSE);
+		tle = findTargetlistEntrySQL92(pstate, dexpr, targetlist,
+									   DISTINCT_ON_CLAUSE);
 		sortgroupref = assignSortGroupRef(tle, *targetlist);
 		sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
 	}
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.55 2009/06/11 14:49:11 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.56 2009/08/27 20:08:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -28,9 +28,9 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
 					 const char *constructName);
 extern List *transformGroupClause(ParseState *pstate, List *grouplist,
 					 List **targetlist, List *sortClause,
-					 bool isPartition);
+					 bool isWindowFunc);
 extern List *transformSortClause(ParseState *pstate, List *orderlist,
-					List **targetlist, bool resolveUnknown);
+					List **targetlist, bool resolveUnknown, bool isWindowFunc);

 extern List *transformWindowDefinitions(ParseState *pstate,
 						   List *windowdefs,
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@ -741,11 +741,11 @@ SELECT rank() OVER (ORDER BY length('abc'));
    1
 (1 row)

-- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
 ERROR:  window functions not allowed in window definition
-LINE 1: SELECT rank() OVER (ORDER BY 1);
-               ^
+LINE 1: SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random())...
+                                     ^
 -- some other errors
 SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;
 ERROR:  window functions not allowed in WHERE clause
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@ -167,8 +167,8 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SEL
 -- ordering by a non-integer constant is allowed
 SELECT rank() OVER (ORDER BY length('abc'));

-- but this draws an error: "ORDER BY 1" means order by first SELECT column
-SELECT rank() OVER (ORDER BY 1);
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));

 -- some other errors
 SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;