diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 38277cf8d8..9dbc495812 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -1,5 +1,5 @@ @@ -19,25 +19,36 @@ Postgres documentation - 1999-07-20 + 2000-12-11 SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ] - expression [ AS name ] [, ...] + * | expression [ AS output_name ] [, ...] [ INTO [ TEMPORARY | TEMP ] [ TABLE ] new_table ] - [ FROM [ ONLY ]table [ alias ] [, ...] ] + [ FROM from_item [, ...] ] [ WHERE condition ] - [ GROUP BY column [, ...] ] + [ GROUP BY expression [, ...] ] [ HAVING condition [, ...] ] - [ { UNION [ ALL ] | INTERSECT | EXCEPT } select ] - [ ORDER BY column [ ASC | DESC | USING operator ] [, ...] ] + [ { UNION | INTERSECT | EXCEPT [ ALL ] } select ] + [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ] [ FOR UPDATE [ OF class_name [, ...] ] ] [ LIMIT { count | ALL } [ { OFFSET | , } start ]] + +where from_item can be: + +[ ONLY ] table_name [ * ] + [ [ AS ] alias [ ( column_alias_list ) ] ] +| +( select ) + [ AS ] alias [ ( column_alias_list ) ] +| +from_item [ NATURAL ] join_type from_item + [ ON join_condition | USING ( join_column_list ) ] - 2000-03-15 + 2000-12-11 Inputs @@ -55,14 +66,14 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac </varlistentry> <varlistentry> - <term><replaceable class="PARAMETER">name</replaceable></term> + <term><replaceable class="PARAMETER">output_name</replaceable></term> <listitem> <para> - Specifies another name for a column or an expression using + Specifies another name for an output column using the AS clause. This name is primarily used to label the column for display. It can also be used to refer to the column's value in ORDER BY and GROUP BY clauses. But the - <replaceable class="PARAMETER">name</replaceable> + <replaceable class="PARAMETER">output_name</replaceable> cannot be used in the WHERE or HAVING clauses; write out the expression instead. </para> @@ -75,7 +86,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <listitem> <para> If TEMPORARY or TEMP is specified, - the table is created unique to this session, and is + the output table is created unique to this session, and is automatically dropped on session exit. </para> </listitem> @@ -87,7 +98,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <para> If the INTO TABLE clause is specified, the result of the query will be stored in a new table with the indicated - name. + name, rather than being returned to the client. The target table (<replaceable class="PARAMETER">new_table</replaceable>) will be created automatically and must not exist before this command. Refer to <command>SELECT INTO</command> for more information. @@ -95,7 +106,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <note> <para> The <command>CREATE TABLE AS</command> statement will also - create a new table from a select query. + create a new table from a SELECT query. </para> </note> </para> @@ -103,22 +114,10 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac </varlistentry> <varlistentry> - <term><replaceable class="PARAMETER">table</replaceable></term> + <term><replaceable class="PARAMETER">from_item</replaceable></term> <listitem> <para> - The name of an existing table referenced by the FROM clause. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><replaceable class="PARAMETER">alias</replaceable></term> - <listitem> - <para> - An alternate name for the preceding - <replaceable class="PARAMETER">table</replaceable>. - It is used for brevity or to eliminate ambiguity for joins - within a single table. + A table reference, sub-SELECT, or JOIN clause. See below for details. </para> </listitem> </varlistentry> @@ -128,16 +127,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <listitem> <para> A boolean expression giving a result of true or false. - See the WHERE clause. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><replaceable class="PARAMETER">column</replaceable></term> - <listitem> - <para> - The name of a table's column. + See the WHERE and HAVING clause descriptions below. </para> </listitem> </varlistentry> @@ -146,14 +136,102 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <term><replaceable class="PARAMETER">select</replaceable></term> <listitem> <para> - A select statement with all features except the ORDER BY and - LIMIT clauses. + A select statement with all features except the ORDER BY, FOR UPDATE, + and LIMIT clauses (even those can be used when the select is + parenthesized). </para> </listitem> </varlistentry> </variablelist> </para> + + <para> + FROM items can contain: + <variablelist> + + <varlistentry> + <term><replaceable class="PARAMETER">table_name</replaceable></term> + <listitem> + <para> + The name of an existing table or view. If ONLY is specified, only that + table is scanned. If ONLY is not specified, the table and all its + descendant tables (if any) are scanned. * can be appended to the + table name to indicate that descendant tables are to be scanned, + but as of <Productname>Postgres</Productname> 7.1 this is the default + behavior. (In releases before 7.1, ONLY was the default behavior.) + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="PARAMETER">alias</replaceable></term> + <listitem> + <para> + A substitute name for the preceding + <replaceable class="PARAMETER">table_name</replaceable>. + An alias is used for brevity or to eliminate ambiguity for self-joins + (where the same table is scanned multiple times). If an alias is + written, a column alias list can also be written to provide + substitute names for one or more columns of the table. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="PARAMETER">select</replaceable></term> + <listitem> + <para> + A sub-SELECT can appear in the FROM clause. This acts as though + its output were created as a temporary table for the duration of + this single SELECT command. Note that the sub-SELECT must be + surrounded by parentheses, and an alias <emphasis>must</emphasis> + be provided for it. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="PARAMETER">join_type</replaceable></term> + <listitem> + <para> + One of + <command>[ INNER ] JOIN</command>, + <command>LEFT [ OUTER ] JOIN</command>, + <command>RIGHT [ OUTER ] JOIN</command>, + <command>FULL [ OUTER ] JOIN</command>, or + <command>CROSS JOIN</command>. + For INNER and OUTER join types, exactly one of NATURAL, + ON <replaceable class="PARAMETER">join_condition</replaceable>, or + USING ( <replaceable class="PARAMETER">join_column_list</replaceable> ) + must appear. For CROSS JOIN, none of these items may appear. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="PARAMETER">join_condition</replaceable></term> + <listitem> + <para> + A qualification condition. This is similar to the WHERE condition + except that it only applies to the two from_items being joined in + this JOIN clause. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="PARAMETER">join_column_list</replaceable></term> + <listitem> + <para> + A USING column list ( a, b, ... ) is shorthand for the ON condition + left_table.a = right_table.a AND left_table.b = right_table.b ... + </para> + </listitem> + </varlistentry> + + </variablelist> + </para> </refsect2> <refsect2 id="R2-SQL-SELECT-2"> @@ -192,7 +270,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac <refsect1 id="R1-SQL-SELECT-1"> <refsect1info> - <date>2000-03-15</date> + <date>2000-12-11</date> </refsect1info> <title> Description @@ -203,12 +281,15 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac if WHERE is omitted, all rows are candidates. (See <xref linkend="sql-where" endterm="sql-where-title">.) </para> + <para> - <command>ONLY</command> will eliminate rows from subclasses of the table. - This was previously the default result, and getting subclasses was - obtained by appending <command>*</command> to the table name. - The old behaviour is available via the command - <command>SET SQL_Inheritance TO OFF;</command> + Actually, the returned rows are not directly the rows produced by the + FROM/WHERE/GROUP BY/HAVING clauses; rather, the output rows are formed + by computing the SELECT output expressions for each selected row. + <command>*</command> can be written in the output list as a shorthand + for all the columns of the selected rows. Also, one can write + <replaceable class="PARAMETER">table_name</replaceable><command>.*</command> + as a shorthand for the columns coming from just that table. </para> <para> @@ -257,19 +338,28 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac </para> <para> - The UNION operator allows the result to be the collection of rows + SELECT queries can be combined using UNION, INTERSECT, and EXCEPT + operators. Use parentheses if necessary to determine the ordering + of these operators. + </para> + + <para> + The UNION operator computes the collection of rows returned by the queries involved. + Duplicate rows are eliminated unless ALL is specified. (See <xref linkend="sql-union" endterm="sql-union-title">.) </para> <para> - The INTERSECT operator gives you the rows that are common to both queries. + The INTERSECT operator computes the rows that are common to both queries. + Duplicate rows are eliminated unless ALL is specified. (See <xref linkend="sql-intersect" endterm="sql-intersect-title">.) </para> <para> - The EXCEPT operator gives you the rows returned by the first query but + The EXCEPT operator computes the rows returned by the first query but not the second query. + Duplicate rows are eliminated unless ALL is specified. (See <xref linkend="sql-except" endterm="sql-except-title">.) </para> @@ -289,6 +379,95 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac (See the <command>GRANT</command>/<command>REVOKE</command> statements). </para> + <refsect2 id="SQL-FROM"> + <refsect2info> + <date>2000-12-11</date> + </refsect2info> + <title id="sql-from-title"> + FROM Clause + + + + The FROM clause specifies one or more source tables for the SELECT. + If multiple sources are specified, the result is conceptually the + Cartesian product of all the rows in all the sources --- but usually + qualification conditions are added to restrict the returned rows to + a small subset of the Cartesian product. + + + + When a FROM item is a simple table name, it implicitly includes rows + from subclasses (inheritance children) of the table. + ONLY will + suppress rows from subclasses of the table. Before + Postgres 7.1, + this was the default result, and adding subclasses was done + by appending * to the table name. + This old behaviour is available via the command + SET SQL_Inheritance TO OFF; + + + + A FROM item can also be a parenthesized sub-SELECT (note that an + alias clause is required for a sub-SELECT!). This is an extremely + handy feature since it's the only way to get multiple levels of + grouping, aggregation, or sorting in a single query. + + + + Finally, a FROM item can be a JOIN clause, which combines two simpler + FROM items. (Use parentheses if necessary to determine the order + of nesting.) + + + + A CROSS JOIN or INNER JOIN is a simple Cartesian product, + the same as you get from listing the two items at the top level of FROM. + CROSS JOIN is equivalent to INNER JOIN ON (TRUE), that is, no rows are + removed by qualification. These join types are just a notational + convenience, since they do nothing you couldn't do with plain FROM and + WHERE. + + + + LEFT OUTER JOIN returns all rows in the qualified Cartesian product + (ie, all combined rows that pass its ON condition), plus one copy of each + row in the left-hand table for which there was no right-hand row that + passed the ON condition. This left-hand row is extended to the full + width of the joined table by inserting NULLs for the right-hand columns. + Note that only the JOIN's own ON or USING condition is considered while + deciding which rows have matches. Outer ON or WHERE conditions are + applied afterwards. + + + + Conversely, RIGHT OUTER JOIN returns all the joined rows, plus one row + for each unmatched right-hand row (extended with nulls on the left). + This is just a notational + convenience, since you could convert it to a LEFT OUTER JOIN by switching + the left and right inputs. + + + + FULL OUTER JOIN returns all the joined rows, plus one row for each + unmatched left-hand row (extended with nulls on the right), plus one row + for each unmatched right-hand row (extended with nulls on the left). + + + + For all the JOIN types except CROSS JOIN, you must write exactly one of + ON join_condition, + USING ( join_column_list ), + or NATURAL. ON is the most general case: you can write any qualification + expression involving the two tables to be joined. + A USING column list ( a, b, ... ) is shorthand for the ON condition + left_table.a = right_table.a AND left_table.b = right_table.b ... + Also, USING implies that only one of each pair of equivalent columns will + be included in the JOIN output, not both. NATURAL is shorthand for + a USING list that mentions all similarly-named columns in the tables. + + + 2000-03-15 @@ -340,7 +519,7 @@ WHERE boolean_expr GROUP BY specifies a grouped table derived by the application of this clause: -GROUP BY column [, ...] +GROUP BY expression [, ...] @@ -357,9 +536,10 @@ GROUP BY column [, ...] - An item in GROUP BY can also be the name or ordinal number of an output - column (SELECT expression), or it can be an arbitrary expression formed - from input-column values. In case of ambiguity, a GROUP BY name will + A GROUP BY item can be an input column name, or the name or ordinal + number of an output column (SELECT expression), or it can be an arbitrary + expression formed from input-column values. In case of ambiguity, a GROUP + BY name will be interpreted as an input-column name rather than an output column name. @@ -375,17 +555,17 @@ GROUP BY column [, ...] The optional HAVING condition has the general form: -HAVING cond_expr +HAVING boolean_expr - where cond_expr is the same + where boolean_expr is the same as specified for the WHERE clause. HAVING specifies a grouped table derived by the elimination of group rows that do not satisfy the - cond_expr. + boolean_expr. HAVING is different from WHERE: WHERE filters individual rows before application of GROUP BY, while HAVING filters group rows created by GROUP BY. @@ -393,7 +573,7 @@ HAVING cond_expr Each column referenced in - cond_expr shall unambiguously + boolean_expr shall unambiguously reference a grouping column, unless the reference appears within an aggregate function. @@ -408,15 +588,17 @@ HAVING cond_expr -ORDER BY column [ ASC | DESC ] [, ...] +ORDER BY expression [ ASC | DESC | USING operator ] [, ...] - column can be either a - result column name or an ordinal number. + An ORDER BY item can be the name or ordinal + number of an output column (SELECT expression), or it can be an arbitrary + expression formed from input-column values. In case of ambiguity, an + ORDER BY name will be interpreted as an output-column name. - The ordinal numbers refers to the ordinal (left-to-right) position + The ordinal number refers to the ordinal (left-to-right) position of the result column. This feature makes it possible to define an ordering on the basis of a column that does not have a proper name. This is never absolutely necessary because it is always possible @@ -447,13 +629,13 @@ SELECT name FROM distributors ORDER BY code; or ASC (ascending) after each column name in the ORDER BY clause. If not specified, ASC is assumed by default. Alternatively, a specific ordering operator name may be specified. ASC is equivalent - to USING '<' and DESC is equivalent to USING '>'. + to USING < and DESC is equivalent to USING >. - 1998-09-24 + 2000-12-11 UNION Clause @@ -461,16 +643,21 @@ SELECT name FROM distributors ORDER BY code; <para> <synopsis> <replaceable class="PARAMETER">table_query</replaceable> UNION [ ALL ] <replaceable class="PARAMETER">table_query</replaceable> - [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, ...] ] + [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ] + [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]] </synopsis> where <replaceable class="PARAMETER">table_query</replaceable> - specifies any select expression without an ORDER BY or LIMIT clause. + specifies any select expression without an ORDER BY, FOR UPDATE, + or LIMIT clause. (ORDER BY and LIMIT can be attached to a sub-expression + if it is enclosed in parentheses. Without parentheses, these clauses + will be taken to apply to the result of the UNION, not to its right-hand + input expression.) </para> <para> - The UNION operator allows the result to be the collection of rows + The UNION operator computes the collection (set union) of the rows returned by the queries involved. The two SELECTs that represent the direct operands of the UNION must produce the same number of columns, and corresponding columns must be @@ -478,79 +665,98 @@ SELECT name FROM distributors ORDER BY code; </para> <para> - By default, the result of UNION does not contain any duplicate rows - unless the ALL clause is specified. + The result of UNION does not contain any duplicate rows + unless the ALL option is specified. ALL prevents elimination of + duplicates. </para> <para> Multiple UNION operators in the same SELECT statement are - evaluated left to right. - Note that the ALL keyword is not global in nature, being - applied only for the current pair of table results. + evaluated left to right, unless otherwise indicated by parentheses. + </para> + + <para> + Currently, FOR UPDATE may not be specified either for a UNION result + or for the inputs of a UNION. </para> </refsect2> <refsect2 id="SQL-INTERSECT"> <refsect2info> - <date>1998-09-24</date> + <date>2000-12-11</date> </refsect2info> <title id="sql-intersect-title"> INTERSECT Clause -table_query INTERSECT table_query - [ ORDER BY column [ ASC | DESC ] [, ...] ] +table_query INTERSECT [ ALL ] table_query + [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ] + [ LIMIT { count | ALL } [ { OFFSET | , } start ]] where table_query - specifies any select expression without an ORDER BY or LIMIT clause. + specifies any select expression without an ORDER BY, FOR UPDATE, + or LIMIT clause. - The INTERSECT operator gives you the rows that are common to both queries. - The two SELECTs that represent the direct operands of the INTERSECT must - produce the same number of columns, and corresponding columns must be - of compatible data types. + INTERSECT is similar to UNION, except that it produces only rows that + appear in both query outputs, rather than rows that appear in either. + + + + The result of INTERSECT does not contain any duplicate rows + unless the ALL option is specified. With ALL, a row that has + m duplicates in L and n duplicates in R will appear min(m,n) times. Multiple INTERSECT operators in the same SELECT statement are evaluated left to right, unless parentheses dictate otherwise. + INTERSECT binds more tightly than UNION --- that is, + A UNION B INTERSECT C will be read as + A UNION (B INTERSECT C) unless otherwise specified by parentheses. - 1998-09-24 + 2000-12-11 EXCEPT Clause -table_query EXCEPT table_query - [ ORDER BY column [ ASC | DESC ] [, ...] ] +table_query EXCEPT [ ALL ] table_query + [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ] + [ LIMIT { count | ALL } [ { OFFSET | , } start ]] where table_query - specifies any select expression without an ORDER BY or LIMIT clause. + specifies any select expression without an ORDER BY, FOR UPDATE, + or LIMIT clause. - The EXCEPT operator gives you the rows returned by the first query but - not the second query. - The two SELECTs that represent the direct operands of the EXCEPT must - produce the same number of columns, and corresponding columns must be - of compatible data types. + EXCEPT is similar to UNION, except that it produces only rows that + appear in the left query's output but not in the right query's output. - + + + The result of EXCEPT does not contain any duplicate rows + unless the ALL option is specified. With ALL, a row that has + m duplicates in L and n duplicates in R will appear max(m-n,0) times. + + Multiple EXCEPT operators in the same SELECT statement are evaluated left to right, unless parentheses dictate otherwise. + EXCEPT binds at the same level as UNION. @@ -758,7 +964,22 @@ SELECT actors.name Postgres allows one to omit the FROM clause from a query. This feature -was retained from the original PostQuel query language: +was retained from the original PostQuel query language. It has +a straightforward use to compute the results of simple constant +expressions: + + +SELECT 2+2; + + ?column? +---------- + 4 + + +Some other DBMSes cannot do this except by introducing a dummy one-row +table to do the select from. A less obvious use is to abbreviate a +normal select from one or more tables: + SELECT distributors.* WHERE name = 'Westward'; @@ -766,6 +987,26 @@ SELECT distributors.* WHERE name = 'Westward'; -----+---------- 108 | Westward + +This works because an implicit FROM item is added for each table that is +referenced in the query but not mentioned in FROM. While this is a convenient +shorthand, it's easy to misuse. For example, the query + +SELECT distributors.* FROM distributors d; + +is probably a mistake; most likely the user meant + +SELECT d.* FROM distributors d; + +rather than the unconstrained join + +SELECT distributors.* FROM distributors d, distributors distributors; + +that he will actually get. To help detect this sort of mistake, +Postgres 7.1 +and later will warn if the implicit-FROM feature is used in a query that also +contains an explicit FROM clause. + @@ -791,9 +1032,9 @@ SELECT distributors.* WHERE name = 'Westward'; is just noise and can be omitted without affecting the meaning. The Postgres parser requires this keyword when - renaming columns because the type extensibility features lead to + renaming output columns because the type extensibility features lead to parsing ambiguities - in this context. + in this context. "AS" is optional in FROM items, however. The DISTINCT ON phrase is not part of SQL92. @@ -818,11 +1059,11 @@ SELECT distributors.* WHERE name = 'Westward'; 1998-09-24 - UNION Clause + UNION/INTERSECT/EXCEPT Clause - The SQL92 syntax for UNION allows an - additional CORRESPONDING BY clause: + The SQL92 syntax for UNION/INTERSECT/EXCEPT allows an + additional CORRESPONDING BY option: table_query UNION [ALL] [CORRESPONDING [BY (column [,...])]] diff --git a/doc/src/sgml/ref/select_into.sgml b/doc/src/sgml/ref/select_into.sgml index f3751ca0fe..f1a483bfb1 100644 --- a/doc/src/sgml/ref/select_into.sgml +++ b/doc/src/sgml/ref/select_into.sgml @@ -1,5 +1,5 @@ @@ -19,20 +19,31 @@ Postgres documentation - 1999-07-20 + 2000-12-11 SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ] - expression [ AS name ] [, ...] - [ INTO [ TEMPORARY | TEMP ] [ TABLE ] new_table ] - [ FROM table [ alias ] [, ...] ] + * | expression [ AS output_name ] [, ...] + INTO [ TEMPORARY | TEMP ] [ TABLE ] new_table + [ FROM from_item [, ...] ] [ WHERE condition ] - [ GROUP BY column [, ...] ] + [ GROUP BY expression [, ...] ] [ HAVING condition [, ...] ] - [ { UNION [ ALL ] | INTERSECT | EXCEPT } select ] - [ ORDER BY column [ ASC | DESC | USING operator ] [, ...] ] + [ { UNION | INTERSECT | EXCEPT [ ALL ] } select ] + [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ] [ FOR UPDATE [ OF class_name [, ...] ] ] - [ LIMIT { count | ALL } [ { OFFSET | , } start ] ] + [ LIMIT { count | ALL } [ { OFFSET | , } start ]] + +where from_item can be: + +[ ONLY ] table_name [ * ] + [ [ AS ] alias [ ( column_alias_list ) ] ] +| +( select ) + [ AS ] alias [ ( column_alias_list ) ] +| +from_item [ NATURAL ] join_type from_item + [ ON join_condition | USING ( join_column_list ) ] diff --git a/doc/src/sgml/rules.sgml b/doc/src/sgml/rules.sgml index c84f1333ca..0e119f7d66 100644 --- a/doc/src/sgml/rules.sgml +++ b/doc/src/sgml/rules.sgml @@ -1,6 +1,13 @@ The <ProductName>Postgres</ProductName> Rule System + + Author + + Written by Jan Wieck. Updates for 7.1 by Tom Lane. + + + Production rule systems are conceptually simple, but there are many subtle points involved in actually using @@ -21,7 +28,7 @@ is totally different from stored procedures and triggers. It modifies queries to take rules into consideration, and then passes the modified - query to the query optimizer for execution. It + query to the query planner for planning and execution. It is very powerful, and can be used for many things such as query language procedures, views, and versions. The power of this rule system is discussed in @@ -34,15 +41,15 @@ To understand how the rule system works it is necessary to know - when it is invoked and what it's input and results are. + when it is invoked and what its input and results are. - The rule system is located between the query parser and the optimizer. + The rule system is located between the query parser and the planner. It takes the output of the parser, one querytree, and the rewrite rules from the pg_rewrite catalog, which are querytrees too with some extra information, and creates zero or many - querytrees as result. So it's input and output are always things + querytrees as result. So its input and output are always things the parser itself could have produced and thus, anything it sees is basically representable as an SQL statement. @@ -101,8 +108,8 @@ - The rangtable is a list of relations that are used in the query. - In a SELECT statement that are the relations given after + The rangetable is a list of relations that are used in the query. + In a SELECT statement these are the relations given after the FROM keyword. @@ -153,13 +160,15 @@ The targetlist is a list of expressions that define the result of the query. In the case of a SELECT, the expressions are what builds the final output of the query. They are the expressions - between the SELECT and the FROM keywords (* is just an - abbreviation for all the attribute names of a relation). + between the SELECT and the FROM keywords. (* is just an + abbreviation for all the attribute names of a relation. It is + expanded by the parser into the individual attributes, so the + rule system never sees it.) DELETE queries don't need a targetlist because they don't - produce any result. In fact the optimizer will add a special + produce any result. In fact the planner will add a special CTID entry to the empty targetlist. But this is after the rule system and will be discussed later. For the rule system the targetlist is empty. @@ -167,25 +176,25 @@ In INSERT queries the targetlist describes the new rows that - should go into the resultrelation. Missing columns of the - resultrelation will be added by the optimizer with a constant - NULL expression. It is the expressions in the VALUES clause - or the ones from the SELECT clause on INSERT ... SELECT. + should go into the resultrelation. It is the expressions in the VALUES + clause or the ones from the SELECT clause in INSERT ... SELECT. + Missing columns of the resultrelation will be filled in by the + planner with a constant NULL expression. - On UPDATE queries, it describes the new rows that should - replace the old ones. Here now the optimizer will add missing - columns by inserting expressions that put the values from the - old rows into the new one. And it will add the special entry - like for DELETE too. It is the expressions from the - SET attribute = expression part of the query. + In UPDATE queries, the targetlist describes the new rows that should + replace the old ones. In the rule system, it contains just the + expressions from the SET attribute = expression part of the query. + The planner will add missing columns by inserting expressions that + copy the values from the old row into the new one. And it will add + the special CTID entry just as for DELETE too. Every entry in the targetlist contains an expression that can be a constant value, a variable pointing to an attribute of one - of the relations in the rangetable, a parameter or an expression + of the relations in the rangetable, a parameter, or an expression tree made of function calls, constants, variables, operators etc. @@ -197,7 +206,7 @@ - The queries qualification is an expression much like one of those + The query's qualification is an expression much like one of those contained in the targetlist entries. The result value of this expression is a boolean that tells if the operation (INSERT, UPDATE, DELETE or SELECT) for the final result row should be @@ -207,6 +216,28 @@ + + + the join tree + + + + The query's join tree shows the structure of the FROM clause. + For a simple query like SELECT FROM a, b, c the join tree is just + a list of the FROM items, because we are allowed to join them in + any order. But when JOIN expressions --- particularly outer joins + --- are used, we have to join in the order shown by the JOINs. + The join tree shows the structure of the JOIN expressions. The + restrictions associated with particular JOIN clauses (from ON or + USING expressions) are stored as qualification expressions attached + to those join tree nodes. It turns out to be convenient to store + the top-level WHERE expression as a qualification attached to the + top-level join tree item, too. So really the join tree represents + both the FROM and WHERE clauses of a SELECT. + + + + the others @@ -214,11 +245,10 @@ The other parts of the querytree like the ORDER BY - clause arent of interest here. The rule system + clause aren't of interest here. The rule system substitutes entries there while applying rules, but that doesn't have much to do with the fundamentals of the rule - system. GROUP BY is a special thing when it appears in - a view definition and still needs to be documented. + system. @@ -255,7 +285,7 @@ This has some side effects. One of them is that the information about a view in the Postgres system catalogs is exactly the same as it is for a table. So for the - query parsers, there is absolutely no difference between + query parser, there is absolutely no difference between a table and a view. They are the same thing - relations. That is the important one for now. @@ -274,14 +304,14 @@ - Currently, there could be only one action and it must be a - SELECT action that is INSTEAD. This restriction was required - to make rules safe enough to open them for ordinary users and + Currently, there can be only one action in an ON SELECT rule, and it must + be an unconditional SELECT action that is INSTEAD. This restriction was + required to make rules safe enough to open them for ordinary users and it restricts rules ON SELECT to real view rules. - The example for this document are two join views that do some calculations + The examples for this document are two join views that do some calculations and some more views using them in turn. One of the two first views is customized later by adding rules for INSERT, UPDATE and DELETE operations so that the final result will @@ -293,7 +323,7 @@ - The database needed to play on the examples is named al_bundy. + The database needed to play with the examples is named al_bundy. You'll see soon why this is the database name. And it needs the procedural language PL/pgSQL installed, because we need a little min() function returning the lower of 2 @@ -312,7 +342,7 @@ - The real tables we need in the first two rule system descripitons + The real tables we need in the first two rule system descriptions are these: @@ -388,15 +418,15 @@ will create a relation shoelace and an entry in pg_rewrite that tells that there is a rewrite rule that must be applied - whenever the relation shoelace is referenced in a queries rangetable. - The rule has no rule qualification (discussed in the - non SELECT rules since SELECT rules currently cannot have them) and + whenever the relation shoelace is referenced in a query's rangetable. + The rule has no rule qualification (discussed later, with the + non SELECT rules, since SELECT rules currently cannot have them) and it is INSTEAD. Note that rule qualifications are not the same as - query qualifications! The rules action has a qualification. + query qualifications! The rule's action has a query qualification. - The rules action is one querytree that is an exact copy of the + The rule's action is one querytree that is a copy of the SELECT statement in the view creation command. @@ -489,73 +519,48 @@ Note that the parser changed the calculation and qualification into calls to the appropriate functions. But in fact this changes nothing. - The first step in rewriting is merging the two rangetables. The resulting - parsetree then reads + + + + To expand the view, the rewriter simply creates a subselect rangetable + entry containing the rule's action parsetree, and substitutes this + rangetable entry for the original one that referenced the view. The + resulting rewritten parsetree is almost the same as if Al had typed SELECT shoelace.sl_name, shoelace.sl_avail, shoelace.sl_color, shoelace.sl_len, shoelace.sl_unit, shoelace.sl_len_cm - FROM shoelace shoelace, shoelace *OLD*, - shoelace *NEW*, shoelace_data s, - unit u; + FROM (SELECT s.sl_name, + s.sl_avail, + s.sl_color, + s.sl_len, + s.sl_unit, + s.sl_len * u.un_fact AS sl_len_cm + FROM shoelace_data s, unit u + WHERE s.sl_unit = u.un_name) shoelace; - In step 2 it adds the qualification from the rule action to the - parsetree resulting in + There is one difference however: the sub-query's rangetable has two + extra entries shoelace *OLD*, shoelace *NEW*. These entries don't + participate directly in the query, since they aren't referenced by + the sub-query's join tree or targetlist. The rewriter uses them + to store the access permission check info that was originally present + in the rangetable entry that referenced the view. In this way, the + executor will still check that the user has proper permissions to access + the view, even though there's no direct use of the view in the rewritten + query. + - - SELECT shoelace.sl_name, shoelace.sl_avail, - shoelace.sl_color, shoelace.sl_len, - shoelace.sl_unit, shoelace.sl_len_cm - FROM shoelace shoelace, shoelace *OLD*, - shoelace *NEW*, shoelace_data s, - unit u - WHERE bpchareq(s.sl_unit, u.un_name); - - - And in step 3 it replaces all the variables in the parsetree, that - reference the rangetable entry (the one for - shoelace that is currently processed) - by the corresponding targetlist expressions - from the rule action. This results in the final query - - - SELECT s.sl_name, s.sl_avail, - s.sl_color, s.sl_len, - s.sl_unit, float8mul(s.sl_len, u.un_fact) AS sl_len_cm - FROM shoelace shoelace, shoelace *OLD*, - shoelace *NEW*, shoelace_data s, - unit u - WHERE bpchareq(s.sl_unit, u.un_name); - - - Turning this back into a real SQL statement a human - user would type reads - - - SELECT s.sl_name, s.sl_avail, - s.sl_color, s.sl_len, - s.sl_unit, s.sl_len * u.un_fact AS sl_len_cm - FROM shoelace_data s, unit u - WHERE s.sl_unit = u.un_name; - - - That was the first rule applied. While this was done, the rangetable has - grown. So the rule system continues checking the range table entries. - The next one is number 2 (shoelace *OLD*). - Relation shoelace - has a rule, but this rangetable entry isn't referenced - in any of the variables of the parsetree, so it is ignored. Since all the - remaining rangetable entries either have no rules in - pg_rewrite or aren't referenced, - it reaches the end of the rangetable. - Rewriting is complete and the above is the final result given into - the optimizer. - The optimizer ignores the extra rangetable entries that aren't - referenced by variables in the parsetree and the plan produced - by the planner/optimizer would be exactly the same as if Al had typed - the above SELECT query instead of the view selection. + + That was the first rule applied. The rule system will continue checking + the remaining rangetable entries in the top query (in this example there + are no more), and it will recursively check the rangetable entries in + the added sub-query to see if any of them reference views. (But it + won't expand *OLD* or *NEW* --- otherwise we'd have infinite recursion!) + In this example, there are no rewrite rules for shoelace_data or unit, + so rewriting is complete and the above is the final result given to + the planner. @@ -570,7 +575,7 @@ Al needs to know for which shoes currently in the store he has the matching shoelaces (color and size) and where the total number of exactly matching pairs is greater or equal to two. - We theach him how to do and he asks his database: + We teach him what to do and he asks his database: al_bundy=> SELECT * FROM shoe_ready WHERE total_avail >= 2; @@ -597,106 +602,77 @@ WHERE int4ge(shoe_ready.total_avail, 2); - The first rule applied will be that one for the - shoe_ready relation and it results in the + The first rule applied will be the one for the + shoe_ready view and it results in the parsetree - SELECT rsh.shoename, rsh.sh_avail, - rsl.sl_name, rsl.sl_avail, - min(rsh.sh_avail, rsl.sl_avail) AS total_avail - FROM shoe_ready shoe_ready, shoe_ready *OLD*, - shoe_ready *NEW*, shoe rsh, - shoelace rsl - WHERE int4ge(min(rsh.sh_avail, rsl.sl_avail), 2) - AND (bpchareq(rsl.sl_color, rsh.slcolor) - AND float8ge(rsl.sl_len_cm, rsh.slminlen_cm) - AND float8le(rsl.sl_len_cm, rsh.slmaxlen_cm) - ); + SELECT shoe_ready.shoename, shoe_ready.sh_avail, + shoe_ready.sl_name, shoe_ready.sl_avail, + shoe_ready.total_avail + FROM (SELECT rsh.shoename, + rsh.sh_avail, + rsl.sl_name, + rsl.sl_avail, + min(rsh.sh_avail, rsl.sl_avail) AS total_avail + FROM shoe rsh, shoelace rsl + WHERE rsl.sl_color = rsh.slcolor + AND rsl.sl_len_cm >= rsh.slminlen_cm + AND rsl.sl_len_cm <= rsh.slmaxlen_cm) shoe_ready + WHERE int4ge(shoe_ready.total_avail, 2); - In reality the AND clauses in the qualification will be - operator nodes of type AND with a left and right expression. But - that makes it lesser readable as it already is, and there are more - rules to apply. So I only put them into some parantheses to group - them into logical units in the order they where added and we continue - with the rule for relation - shoe as it is the next rangetable entry - that is referenced and has a rule. The result of applying it is + Similarly, the rules for shoe and + shoelace are substituted into the rangetable of + the sub-query, leading to a three-level final querytree: - SELECT sh.shoename, sh.sh_avail, - rsl.sl_name, rsl.sl_avail, - min(sh.sh_avail, rsl.sl_avail) AS total_avail, - FROM shoe_ready shoe_ready, shoe_ready *OLD*, - shoe_ready *NEW*, shoe rsh, - shoelace rsl, shoe *OLD*, - shoe *NEW*, shoe_data sh, - unit un - WHERE (int4ge(min(sh.sh_avail, rsl.sl_avail), 2) - AND (bpchareq(rsl.sl_color, sh.slcolor) - AND float8ge(rsl.sl_len_cm, - float8mul(sh.slminlen, un.un_fact)) - AND float8le(rsl.sl_len_cm, - float8mul(sh.slmaxlen, un.un_fact)) - ) - ) - AND bpchareq(sh.slunit, un.un_name); + SELECT shoe_ready.shoename, shoe_ready.sh_avail, + shoe_ready.sl_name, shoe_ready.sl_avail, + shoe_ready.total_avail + FROM (SELECT rsh.shoename, + rsh.sh_avail, + rsl.sl_name, + rsl.sl_avail, + min(rsh.sh_avail, rsl.sl_avail) AS total_avail + FROM (SELECT sh.shoename, + sh.sh_avail, + sh.slcolor, + sh.slminlen, + sh.slminlen * un.un_fact AS slminlen_cm, + sh.slmaxlen, + sh.slmaxlen * un.un_fact AS slmaxlen_cm, + sh.slunit + FROM shoe_data sh, unit un + WHERE sh.slunit = un.un_name) rsh, + (SELECT s.sl_name, + s.sl_avail, + s.sl_color, + s.sl_len, + s.sl_unit, + s.sl_len * u.un_fact AS sl_len_cm + FROM shoelace_data s, unit u + WHERE s.sl_unit = u.un_name) rsl + WHERE rsl.sl_color = rsh.slcolor + AND rsl.sl_len_cm >= rsh.slminlen_cm + AND rsl.sl_len_cm <= rsh.slmaxlen_cm) shoe_ready + WHERE int4ge(shoe_ready.total_avail, 2); - And finally we apply the already well known rule for - shoelace (this time on a parsetree that is - a little more complex) and get - - - SELECT sh.shoename, sh.sh_avail, - s.sl_name, s.sl_avail, - min(sh.sh_avail, s.sl_avail) AS total_avail - FROM shoe_ready shoe_ready, shoe_ready *OLD*, - shoe_ready *NEW*, shoe rsh, - shoelace rsl, shoe *OLD*, - shoe *NEW*, shoe_data sh, - unit un, shoelace *OLD*, - shoelace *NEW*, shoelace_data s, - unit u - WHERE ( (int4ge(min(sh.sh_avail, s.sl_avail), 2) - AND (bpchareq(s.sl_color, sh.slcolor) - AND float8ge(float8mul(s.sl_len, u.un_fact), - float8mul(sh.slminlen, un.un_fact)) - AND float8le(float8mul(s.sl_len, u.un_fact), - float8mul(sh.slmaxlen, un.un_fact)) - ) - ) - AND bpchareq(sh.slunit, un.un_name) - ) - AND bpchareq(s.sl_unit, u.un_name); - - - Again we reduce it to a real SQL statement - that is equivalent to the final output of the rule system: - - - SELECT sh.shoename, sh.sh_avail, - s.sl_name, s.sl_avail, - min(sh.sh_avail, s.sl_avail) AS total_avail - FROM shoe_data sh, shoelace_data s, unit u, unit un - WHERE min(sh.sh_avail, s.sl_avail) >= 2 - AND s.sl_color = sh.slcolor - AND s.sl_len * u.un_fact >= sh.slminlen * un.un_fact - AND s.sl_len * u.un_fact <= sh.slmaxlen * un.un_fact - AND sh.sl_unit = un.un_name - AND s.sl_unit = u.un_name; - - - Recursive processing of rules rewrote one SELECT from a view - into a parsetree, that is equivalent to exactly that what Al - had to type if there would be no views at all. + It turns out that the planner will collapse this tree into a two-level + querytree: the bottommost selects will be "pulled up" into the middle + select since there's no need to process them separately. But the + middle select will remain separate from the top, because it contains + aggregate functions. If we pulled those up it would change the behavior + of the topmost select, which we don't want. However, collapsing the + query tree is an optimization that the rewrite system doesn't + have to concern itself with. Note There is currently no recursion stopping mechanism for view - rules in the rule system (only for the other rules). + rules in the rule system (only for the other kinds of rules). This doesn't hurt much, because the only way to push this into an endless loop (blowing up the backend until it reaches the memory limit) @@ -704,7 +680,7 @@ view rules by hand with CREATE RULE in such a way, that one selects from the other that selects from the one. This could never happen if CREATE VIEW is used because - on the first CREATE VIEW, the second relation does not exist + for the first CREATE VIEW, the second relation does not exist and thus the first view cannot select from the second. @@ -718,14 +694,14 @@ Two details of the parsetree aren't touched in the description of view rules above. These are the commandtype and the resultrelation. - In fact, view rules don't need these informations. + In fact, view rules don't need this information. There are only a few differences between a parsetree for a SELECT and one for any other command. Obviously they have another commandtype and this time the resultrelation points to the rangetable entry where - the result should go. Anything else is absolutely the same. + the result should go. Everything else is absolutely the same. So having two tables t1 and t2 with attributes a and b, the parsetrees for the two statements @@ -757,11 +733,17 @@ ranges for equality. + + + + The jointrees show a simple join between t1 and t2. + + The consequence is, that both parsetrees result in similar execution plans. They are both joins over the two tables. For the UPDATE - the missing columns from t1 are added to the targetlist by the optimizer + the missing columns from t1 are added to the targetlist by the planner and the final parsetree will read as @@ -781,21 +763,17 @@ is a SELECT command and the other is an UPDATE is handled in the caller of the executor. The caller still knows (looking at the parsetree) that this is an UPDATE, and he knows that this result - should go into table t1. But which of the 666 rows that are there - has to be replaced by the new row? The plan executed is a join - with a qualification that potentially could produce any number of - rows between 0 and 666 in unknown order. + should go into table t1. But which of the rows that are there + has to be replaced by the new row? To resolve this problem, another entry is added to the targetlist - in UPDATE and DELETE statements. The current tuple ID (ctid). This - is a system attribute with a special feature. It contains the - block and position in the block for the row. Knowing the table, - the ctid can be used to find one specific row in a 1.5GB sized table - containing millions of rows by fetching one single data block. - After adding the ctid to the targetlist, the final result set - could be defined as + in UPDATE (and also in DELETE) statements: the current tuple ID (ctid). + This is a system attribute containing the file + block number and position in the block for the row. Knowing the table, + the ctid can be used to retrieve the original t1 row to be updated. + After adding the ctid to the targetlist, the query actually looks like SELECT t1.a, t2.b, t1.ctid FROM t1, t2 WHERE t1.a = t2.a; @@ -812,7 +790,7 @@ - Knowing that all, we can simply apply view rules in absolutely + Knowing all that, we can simply apply view rules in absolutely the same way to any command. There is no difference. @@ -832,137 +810,32 @@ The benefit of implementing views with the rule system is, - that the optimizer has all + that the planner has all the information about which tables have to be scanned plus the relationships between these tables plus the restrictive qualifications from the views plus the qualifications from the original query in one single parsetree. And this is still the situation when the original query is already a join over views. - Now the optimizer has to decide which is + Now the planner has to decide which is the best path to execute the query. The more information - the optimizer has, the better this decision can be. And + the planner has, the better this decision can be. And the rule system as implemented in Postgres ensures, that this is all information available about the query up to now. - - -Concerns - - - There was a long time where the Postgres - rule system was considered broken. The use of rules was not - recommended and the only part working was view rules. And also - these view rules gave problems because the rule system wasn't able - to apply them properly on statements other than a SELECT (for - example an UPDATE - that used data from a view didn't work). - - - - During that time, development moved on and many features were - added to the parser and optimizer. The rule system got more and more - out of sync with their capabilities and it became harder and harder - to start fixing it. Thus, no one did. - - - - For 6.4, someone locked the door, took a deep breath and shuffled - that damned thing up. What came out was a rule system with the - capabilities described in this document. But there are still some - constructs not handled and some where it fails due to - things that are currently not - supported by the Postgres query - optimizer. - - - - - Views with aggregate columns have bad problems. Aggregate - expressions in qualifications must be used in subselects. - Currently it is not possible to do a join of two views, - each having an aggregate column, and compare the two aggregate values - in the qualification. In the meantime it is possible to - put these aggregate expressions into functions with - the appropriate arguments and use - them in the view definition. - - - - - - Views of unions are currently not supported. Well it's easy - to rewrite a simple SELECT into a union. But it is a little - difficult if the view is part of a join doing an update. - - - - - - ORDER BY clauses in view definitions aren't supported. - - - - - - DISTINCT isn't supported in view definitions. - - - - - There is no good reason why the optimizer should not - handle parsetree constructs that the parser could never produce - due to limitations in the SQL syntax. - The author hopes that these items disappear in the future. - - -Implementation Side Effects +What about updating a view? - Using the described rule system to implement views has a funny - side effect. The following does not seem to work: - - - al_bundy=> INSERT INTO shoe (shoename, sh_avail, slcolor) - al_bundy-> VALUES ('sh5', 0, 'black'); - INSERT 20128 1 - al_bundy=> SELECT shoename, sh_avail, slcolor FROM shoe_data; - shoename |sh_avail|slcolor - ----------+--------+---------- - sh1 | 2|black - sh3 | 4|brown - sh2 | 0|black - sh4 | 3|brown - (4 rows) - - - The interesting thing is that the return code for INSERT gave - us an object ID and told that 1 row has been inserted. - But it doesn't appear in shoe_data. - Looking into the database - directory we can see, that the database file for the - view relation shoe seems now to have - a data block. And that is definitely the case. - - - - We can also issue a DELETE and if it does not have - a qualification, it tells us that rows have been deleted - and the next vacuum run will reset the file to zero size. - - - - The reason for that behaviour is, that the parsetree for the - INSERT does not reference the shoe relation - in any variable. The targetlist contains only constant values. - So there is no rule to apply and it goes - down unchanged into execution and the row is inserted. And - so for the DELETE. + What happens if a view is named as the target relation for an INSERT, + UPDATE, or DELETE? After doing the substitutions described above, + we will have a querytree in which the resultrelation points at a + subquery rangetable entry. This will not work, so the rewriter throws + an error if it sees it has produced such a thing. @@ -977,7 +850,7 @@ Rules on INSERT, UPDATE and DELETE -Differences to View Rules +Differences from View Rules Rules that are defined ON INSERT, UPDATE and DELETE are @@ -1071,7 +944,7 @@ The parsetree from the rule action where the - original parsetrees qualification has been added. + original parsetree's qualification has been added. @@ -1085,7 +958,7 @@ The parsetree from the rule action where the - original parsetrees qualification has been added. + original parsetree's qualification has been added. @@ -1099,7 +972,7 @@ The parsetree from the rule action where the rule - qualification and the original parsetrees + qualification and the original parsetree's qualification have been added. @@ -1114,7 +987,7 @@ The parsetree from the rule action where the rule - qualification and the original parsetrees + qualification and the original parsetree's qualification have been added. @@ -1133,7 +1006,7 @@ Finally, if the rule is not INSTEAD, the unchanged original parsetree is added to the list. Since only qualified INSTEAD rules already add the - original parsetree, we end up with a total maximum of two parsetrees + original parsetree, we end up with either one or two output parsetrees for a rule with one action. @@ -1155,19 +1028,26 @@ rangetable entries for NEW and OLD, some substitutions have to be made before they can be used. For any reference to NEW, the targetlist of the original query is searched for a corresponding entry. If found, - that entries expression is placed into the reference. Otherwise - NEW means the same as OLD. Any reference to OLD is replaced by a + that entry's expression replaces the reference. Otherwise + NEW means the same as OLD (for an UPDATE) or is replaced by NULL + (for an INSERT). Any reference to OLD is replaced by a reference to the rangetable entry which is the resultrelation. + + After we are done applying update rules, we apply view rules to the + produced parsetree(s). Views cannot insert new update actions so + there is no need to apply update rules to the output of view rewriting. + + A First Rule Step by Step We want to trace changes to the sl_avail column in the shoelace_data relation. So we setup a - log table and a rule that writes us entries every time - and UPDATE is performed on shoelace_data. + log table and a rule that conditionally writes a log entry when + an UPDATE is performed on shoelace_data. CREATE TABLE shoelace_log ( @@ -1193,9 +1073,10 @@ is a datetime and tries to make a constant from it - with success. So a constant datetime value would be stored in the rule action and all log entries would have the time of the CREATE RULE statement. - Not exactly what we want. The casting causes that the parser - constructs a datetime('now'::text) from it and this will be - evaluated when the rule is executed. + Not exactly what we want. The casting causes the parser to + construct a datetime('now'::text) expression and this will be + evaluated when the rule is executed. (Another way to do this is to + use the function now() instead of a literal constant.) @@ -1237,64 +1118,60 @@ and one action - INSERT INTO shoelace_log SELECT + INSERT INTO shoelace_log VALUES( *NEW*.sl_name, *NEW*.sl_avail, - getpgusername(), datetime('now'::text) - FROM shoelace_data *NEW*, shoelace_data *OLD*, - shoelace_log shoelace_log; + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*; - Don't trust the output of the pg_rules system view. It specially - handles the situation that there are only references to NEW - and OLD in the INSERT and outputs the VALUES format of INSERT. - In fact there is no difference between an INSERT ... VALUES - and an INSERT ... SELECT on parsetree level. They both have - rangetables, targetlists and maybe qualifications etc. The - optimizer later decides, if to create an execution plan of - type result, seqscan, indexscan, join or whatever for that - parsetree. If there are no references to - rangetable entries leftin the parsetree , it becomes - a result execution plan - (the INSERT ... VALUES version). The rule action above can - truely result in both variants. + This is a little strange-looking since you can't normally write + INSERT ... VALUES ... FROM. The FROM clause here is just to indicate + that there are rangetable entries in the parsetree for *NEW* and *OLD*. + These are needed so that they can be referenced by variables in the + INSERT command's querytree. The rule is a qualified non-INSTEAD rule, so the rule system - has to return two parsetrees. The modified rule action and the original + has to return two parsetrees: the modified rule action and the original parsetree. In the first step the rangetable of the original query is - incorporated into the rules action parsetree. This results in + incorporated into the rule's action parsetree. This results in - INSERT INTO shoelace_log SELECT - *NEW*.sl_name, *NEW*.sl_avai, - getpgusername(), datetime('now'::text) - FROM shoelace_data shoelace_data, shoelace_data *NEW*, - shoelace_data *OLD*, shoelace_log shoelace_log; + INSERT INTO shoelace_log VALUES( + *NEW*.sl_name, *NEW*.sl_avail, + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*, + shoelace_data shoelace_data; In step 2 the rule qualification is added to it, so the result set is restricted to rows where sl_avail changes. - INSERT INTO shoelace_log SELECT - *NEW*.sl_name, *NEW*.sl_avai, - getpgusername(), datetime('now'::text) - FROM shoelace_data shoelace_data, shoelace_data *NEW*, - shoelace_data *OLD*, shoelace_log shoelace_log + INSERT INTO shoelace_log VALUES( + *NEW*.sl_name, *NEW*.sl_avail, + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*, + shoelace_data shoelace_data WHERE int4ne(*NEW*.sl_avail, *OLD*.sl_avail); - In step 3 the original parsetrees qualification is added, + This is even stranger-looking, since INSERT ... VALUES doesn't have + a WHERE clause either, but the planner and executor will have no + difficulty with it. They need to support this same functionality + anyway for INSERT ... SELECT. + + In step 3 the original parsetree's qualification is added, restricting the resultset further to only the rows touched by the original parsetree. - INSERT INTO shoelace_log SELECT - *NEW*.sl_name, *NEW*.sl_avai, - getpgusername(), datetime('now'::text) - FROM shoelace_data shoelace_data, shoelace_data *NEW*, - shoelace_data *OLD*, shoelace_log shoelace_log + INSERT INTO shoelace_log VALUES( + *NEW*.sl_name, *NEW*.sl_avail, + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*, + shoelace_data shoelace_data WHERE int4ne(*NEW*.sl_avail, *OLD*.sl_avail) AND bpchareq(shoelace_data.sl_name, 'sl7'); @@ -1304,34 +1181,35 @@ from the result relation. - INSERT INTO shoelace_log SELECT + INSERT INTO shoelace_log VALUES( shoelace_data.sl_name, 6, - getpgusername(), datetime('now'::text) - FROM shoelace_data shoelace_data, shoelace_data *NEW*, - shoelace_data *OLD*, shoelace_log shoelace_log + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*, + shoelace_data shoelace_data WHERE int4ne(6, *OLD*.sl_avail) AND bpchareq(shoelace_data.sl_name, 'sl7'); - Step 5 replaces OLD references into resultrelation references. + Step 5 changes OLD references into resultrelation references. - INSERT INTO shoelace_log SELECT + INSERT INTO shoelace_log VALUES( shoelace_data.sl_name, 6, - getpgusername(), datetime('now'::text) - FROM shoelace_data shoelace_data, shoelace_data *NEW*, - shoelace_data *OLD*, shoelace_log shoelace_log + getpgusername(), datetime('now'::text)) + FROM shoelace_data *NEW*, shoelace_data *OLD*, + shoelace_data shoelace_data WHERE int4ne(6, shoelace_data.sl_avail) AND bpchareq(shoelace_data.sl_name, 'sl7'); - That's it. So reduced to the max the return from the rule system + That's it. Since the rule is not INSTEAD, we also output the + original parsetree. In short, the output from the rule system is a list of two parsetrees that are the same as the statements: - INSERT INTO shoelace_log SELECT + INSERT INTO shoelace_log VALUES( shoelace_data.sl_name, 6, - getpgusername(), 'now' + getpgusername(), datetime('now'::text)) FROM shoelace_data WHERE 6 != shoelace_data.sl_avail AND shoelace_data.sl_name = 'sl7'; @@ -1341,31 +1219,29 @@ These are executed in this order and that is exactly what - the rule defines. The subtitutions and the qualifications - added ensure, that if the original query would be an + the rule defines. The substitutions and the qualifications + added ensure that if the original query would be, say, UPDATE shoelace_data SET sl_color = 'green' WHERE sl_name = 'sl7'; - No log entry would get written because due to the fact that this + no log entry would get written. This time the original parsetree does not contain a targetlist - entry for sl_avail, NEW.sl_avail will get replaced by + entry for sl_avail, so NEW.sl_avail will get replaced by shoelace_data.sl_avail resulting in the extra query - INSERT INTO shoelace_log SELECT + INSERT INTO shoelace_log VALUES( shoelace_data.sl_name, shoelace_data.sl_avail, - getpgusername(), 'now' + getpgusername(), 'now') FROM shoelace_data WHERE shoelace_data.sl_avail != shoelace_data.sl_avail AND shoelace_data.sl_name = 'sl7'; - and that qualification will never be true. Since the is no - difference on parsetree level between an INSERT ... SELECT, - and an INSERT ... VALUES, it will also + and that qualification will never be true. It will also work if the original query modifies multiple rows. So if Al would issue the command @@ -1410,8 +1286,8 @@ A simple way to protect view relations from the mentioned - possibility that someone can INSERT, UPDATE and DELETE - invisible data on them is to let those parsetrees get + possibility that someone can try to INSERT, UPDATE and DELETE + on them is to let those parsetrees get thrown away. We create the rules @@ -1434,10 +1310,10 @@ Note - This fact might irritate frontend applications because + This way might irritate frontend applications because absolutely nothing happened on the database and thus, the backend will not return anything for the query. Not - even a PGRES_EMPTY_QUERY or so will be available in libpq. + even a PGRES_EMPTY_QUERY will be available in libpq. In psql, nothing happens. This might change in the future. @@ -1482,7 +1358,7 @@ we don't want him to manually update the shoelace view. Instead we setup two little tables, one where he can insert the items from the partlist and one with a special - trick. The create commands for anything are: + trick. The create commands for these are: CREATE TABLE shoelace_arrive ( @@ -1514,7 +1390,7 @@ (3 rows) - is exactly that what's on the part list. We take a quick look + is exactly what's on the part list. We take a quick look at the current data, @@ -1565,7 +1441,7 @@ It's a long way from the one INSERT ... SELECT to these - results. And it's description will be the last in this + results. And its description will be the last in this document (but not the last example :-). First there was the parsers output @@ -1606,7 +1482,7 @@ Again it's an INSTEAD rule and the previous parsetree is trashed. - Note that this query sill uses the view shoelace + Note that this query still uses the view shoelace. But the rule system isn't finished with this loop so it continues and applies the rule '_RETshoelace' on it and we get @@ -1688,7 +1564,7 @@ There is a little detail that's a bit ugly. Looking at the two queries turns out, that the shoelace_data relation appears twice in the rangetable where it could definitely - be reduced to one. The optimizer does not handle it and so the + be reduced to one. The planner does not handle it and so the execution plan for the rule systems output of the INSERT will be @@ -1725,7 +1601,7 @@ Merge Join A final demonstration of the Postgres - rule system and it's power. There is a cute blonde that + rule system and its power. There is a cute blonde that sells shoelaces. And what Al could never realize, she's not only cute, she's smart too - a little too smart. Thus, it happens from time to time that Al orders shoelaces that @@ -1752,7 +1628,7 @@ Merge Join (SELECT shoename FROM shoe WHERE slcolor = sl_color); - It's output is + Its output is al_bundy=> SELECT * FROM shoelace_obsolete; @@ -1840,8 +1716,8 @@ Merge Join rewrite rules that are defined for it. The Postgres rule system changes the behaviour of the default access control system. Relations that - are used due to rules get checked during the rewrite against the - permissions of the relation owner, the rule is defined on. + are used due to rules get checked against the + permissions of the rule owner, not the user invoking the rule. This means, that a user does only need the required permissions for the tables/views he names in his queries. @@ -1865,8 +1741,9 @@ Merge Join that only entries where private is false are wanted. Since the user is the owner of phone_number, the read access to phone_data is now checked against his permissions and the query is considered - granted. The check for accessing phone_number is still performed, - so nobody than the secretary can use it. + granted. The check for accessing phone_number is also performed, + but this is done against the invoking user, so nobody but the user and the + secretary can use it. @@ -1897,7 +1774,7 @@ Merge Join of the previous section, the owner of the tables in Al's database could GRANT SELECT, INSERT, UPDATE and DELETE on the shoelace view to al. But only SELECT on shoelace_log. The rule action to write log entries - will still be executed successfull. And Al could see the log entries. + will still be executed successfully. And Al could see the log entries. But he cannot create fake entries, nor could he manipulate or remove existing ones. @@ -1999,7 +1876,7 @@ Merge Join AND software.hostname = computer.hostname; - Since there are appropriate indices setup, the optimizer + Since there are appropriate indices setup, the planner will create a plan of @@ -2041,7 +1918,7 @@ Merge Join -> Index Scan using soft_hostidx on software - This shows, that the optimizer does not realize that the + This shows, that the planner does not realize that the qualification for the hostname on computer could also be used for an index scan on software when there are multiple qualification expressions combined with AND, what @@ -2092,7 +1969,7 @@ Merge Join change of an attribute if an action should be performed or not. In Postgres version 6.4, the attribute specification for rule events is disabled (it will have - it's comeback latest in 6.5, maybe earlier + its comeback latest in 6.5, maybe earlier - stay tuned). So for now the only way to create a rule as in the shoelace_log example is to do it with a rule qualification. That results in an extra query that is @@ -2101,10 +1978,10 @@ Merge Join of the initial query. When this is enabled again, it will be one more advantage of rules over triggers. Optimization of a trigger must fail by definition in this case, because the - fact that it's actions will only be done when a specific attribute - is updated is hidden in it's functionality. The definition of + fact that its actions will only be done when a specific attribute + is updated is hidden in its functionality. The definition of a trigger only allows to specify it on row level, so whenever a - row is touched, the trigger must be called to make it's + row is touched, the trigger must be called to make its decision. The rule system will know it by looking up the targetlist and will suppress the additional query completely if the attribute isn't touched. So the rule, qualified or not, @@ -2112,9 +1989,9 @@ Merge Join - Rules will only be significant slower than triggers if + Rules will only be significantly slower than triggers if their actions result in large and bad qualified joins, a situation - where the optimizer fails. They are a big hammer. + where the planner fails. They are a big hammer. Using a big hammer without caution can cause big damage. But used with the right touch, they can hit any nail on the head. diff --git a/doc/src/sgml/sql.sgml b/doc/src/sgml/sql.sgml index 3e7c1b9647..0b9660211f 100644 --- a/doc/src/sgml/sql.sgml +++ b/doc/src/sgml/sql.sgml @@ -1,5 +1,5 @@ @@ -854,17 +854,17 @@ A < B + 3. used to retrieve data. The syntax is: -SELECT [ALL|DISTINCT] - { * | expr_1 [AS c_alias_1] [, ... - [, expr_k [AS c_alias_k]]]} - FROM table_name_1 [t_alias_1] - [, ... [, table_name_n [t_alias_n]]] - [WHERE condition] - [GROUP BY name_of_attr_i - [,... [, name_of_attr_j]] [HAVING condition]] - [{UNION [ALL] | INTERSECT | EXCEPT} SELECT ...] - [ORDER BY name_of_attr_i [ASC|DESC] - [, ... [, name_of_attr_j [ASC|DESC]]]]; +SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ] + * | expression [ AS output_name ] [, ...] + [ INTO [ TEMPORARY | TEMP ] [ TABLE ] new_table ] + [ FROM from_item [, ...] ] + [ WHERE condition ] + [ GROUP BY expression [, ...] ] + [ HAVING condition [, ...] ] + [ { UNION | INTERSECT | EXCEPT [ ALL ] } select ] + [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ] + [ FOR UPDATE [ OF class_name [, ...] ] ] + [ LIMIT { count | ALL } [ { OFFSET | , } start ]] @@ -1037,11 +1037,13 @@ SELECT S.SNAME, P.PNAME SQL provides aggregate operators (e.g. AVG, COUNT, SUM, MIN, MAX) that - take the name of an attribute as an argument. The value of the - aggregate operator is calculated over all values of the specified - attribute (column) of the whole table. If groups are specified in the - query the calculation is done only over the values of a group (see next - section). + take an expression as argument. The expression is evaluated at + each row that satisfies the WHERE clause, and the aggregate operator + is calculated over this set of input values. Normally, an aggregate + delivers a single result for a whole SELECT statement. But if + grouping is specified in the query, then a separate calculation is done + over the rows of each group, and an aggregate result is delivered per + group (see next section). Aggregates @@ -1094,11 +1096,11 @@ SELECT COUNT(PNO) SQL allows one to partition the tuples of a table into groups. Then the - aggregate operators described above can be applied to the groups - (i.e. the value of the aggregate operator is no longer calculated over + aggregate operators described above can be applied to the groups --- + i.e. the value of the aggregate operator is no longer calculated over all the values of the specified column but over all values of a group. Thus the aggregate operator is evaluated individually for every - group.) + group. @@ -1179,19 +1181,26 @@ SELECT S.SNO, S.SNAME, COUNT(SE.PNO) In our example we got four groups and now we can apply the aggregate - operator COUNT to every group leading to the total result of the query + operator COUNT to every group leading to the final result of the query given above. - Note that for the result of a query using GROUP BY and aggregate - operators to make sense the attributes grouped by must also appear in - the target list. All further attributes not appearing in the GROUP - BY clause can only be selected by using an aggregate function. On - the other hand you can not use aggregate functions on attributes - appearing in the GROUP BY clause. + Note that for a query using GROUP BY and aggregate + operators to make sense the target list can only refer directly to + the attributes being grouped by. Other attributes may only be used + inside the argument of an aggregate function. Otherwise there would + not be a unique value to associate with the other attributes. + + + + Also observe that it makes no sense to ask for an aggregate of an + aggregate, eg, AVG(MAX(sno)), because a SELECT only does one pass + of grouping and aggregation. You can get a result of this kind by + using a temporary table or a sub-SELECT in the FROM clause to + do the first level of aggregation. @@ -1201,11 +1210,14 @@ SELECT S.SNO, S.SNAME, COUNT(SE.PNO) The HAVING clause works much like the WHERE clause and is used to consider only those groups satisfying the qualification given in the - HAVING clause. The expressions allowed in the HAVING clause must - involve aggregate functions. Every expression using only plain - attributes belongs to the WHERE clause. On the other hand every - expression involving an aggregate function must be put to the HAVING - clause. + HAVING clause. Essentially, WHERE filters out unwanted input rows + before grouping and aggregation are done, whereas HAVING filters out + unwanted group rows post-GROUP. Therefore, WHERE cannot refer to the + results of aggregate functions. On the other hand, there's no point + in writing a HAVING condition that doesn't involve an aggregate + function! If your condition doesn't involve aggregates, you might + as well write it in WHERE, and thereby avoid the computation of + aggregates for groups that you're just going to throw away anyway. Having @@ -1280,7 +1292,9 @@ SELECT * SELECT. For every tuple of the outer SELECT the inner SELECT has to be evaluated. After every evaluation we know the price of the tuple named 'Screw' and we can check if the price of the actual tuple is - greater. + greater. (Actually, in this example the inner query need only be + evaluated once, since it does not depend on the state of the outer + query.) @@ -1299,9 +1313,43 @@ SELECT * In our example the result will be empty because every supplier sells at least one part. Note that we use S.SNO from the outer SELECT within - the WHERE clause of the inner SELECT. As described above the subquery - is evaluated for every tuple from the outer query i.e. the value for - S.SNO is always taken from the actual tuple of the outer SELECT. + the WHERE clause of the inner SELECT. Here the subquery must be + evaluated for every tuple from the outer query, i.e. the value for + S.SNO is always taken from the current tuple of the outer SELECT. + + + + + + + Subqueries in FROM + + + A somewhat different way of using subqueries is to put them in the + FROM clause. This is a useful feature because a subquery of this + kind can output multiple columns and rows, whereas a subquery used + in an expression must deliver just a single result. It also lets + us get more than one round of grouping/aggregation without resorting + to a temporary table. + + + Subselect in FROM + + + If we want to know the highest average part price among all our + suppliers, we can't write MAX(AVG(PRICE)), but we can write: + + +SELECT MAX(subtable.avgprice) + FROM (SELECT AVG(P.PRICE) AS avgprice + FROM SUPPLIER S, PART P, SELLS SE + WHERE S.SNO = SE.SNO AND + P.PNO = SE.PNO + GROUP BY S.SNO) subtable; + + + The subquery returns one row per supplier (because of its GROUP BY) + and then we aggregate over those rows in the outer query. @@ -1311,7 +1359,7 @@ SELECT * Union, Intersect, Except - These operations calculate the union, intersect and set theoretic + These operations calculate the union, intersection and set theoretic difference of the tuples derived by two subqueries. @@ -1341,7 +1389,7 @@ gives the result: - Here an example for INTERSECT: + Here is an example for INTERSECT: SELECT S.SNO, S.SNAME, S.CITY @@ -1361,7 +1409,7 @@ SELECT S.SNO, S.SNAME, S.CITY 2 | Jones | Paris - The only tuple returned by both parts of the query is the one having $SNO=2$. + The only tuple returned by both parts of the query is the one having SNO=2. @@ -1469,14 +1517,11 @@ CREATE TABLE SELLS DECIMAL (p[,q]): - signed packed decimal number of + signed packed decimal number of up to p - digits precision with assumed + digits, with q - of them right to the decimal point. - -(15 ≥ pq ≥ 0). - + digits to the right of the decimal point. If q is omitted it is assumed to be 0. @@ -1547,8 +1592,8 @@ CREATE INDEX I ON SUPPLIER (SNAME); The created index is maintained automatically, i.e. whenever a new tuple is inserted into the relation SUPPLIER the index I is adapted. Note - that the only changes a user can percept when an index is present - are an increased speed. + that the only changes a user can perceive when an index is present + are increased speed for SELECT and decreases in speed of updates.