From c30446b9c901b357f9a7b859c51bee5740ac313f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 17 Jun 2009 21:58:49 +0000 Subject: [PATCH] Proofreading for Bruce's recent round of documentation proofreading. Most of those changes were good, but some not so good ... --- doc/src/sgml/advanced.sgml | 6 +- doc/src/sgml/array.sgml | 37 ++-- doc/src/sgml/config.sgml | 6 +- doc/src/sgml/datatype.sgml | 109 ++++++------ doc/src/sgml/ddl.sgml | 22 +-- doc/src/sgml/dml.sgml | 7 +- doc/src/sgml/docguide.sgml | 6 +- doc/src/sgml/func.sgml | 315 ++++++++++++++++++--------------- doc/src/sgml/indices.sgml | 36 ++-- doc/src/sgml/installation.sgml | 15 +- doc/src/sgml/monitoring.sgml | 4 +- doc/src/sgml/mvcc.sgml | 76 ++++---- doc/src/sgml/perform.sgml | 47 ++--- doc/src/sgml/postgres.sgml | 4 +- doc/src/sgml/queries.sgml | 63 ++++--- doc/src/sgml/query.sgml | 39 ++-- doc/src/sgml/rowtypes.sgml | 12 +- doc/src/sgml/start.sgml | 11 +- doc/src/sgml/storage.sgml | 39 +++- doc/src/sgml/syntax.sgml | 18 +- doc/src/sgml/textsearch.sgml | 41 ++--- doc/src/sgml/typeconv.sgml | 41 +++-- 22 files changed, 514 insertions(+), 440 deletions(-) diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml index 305ce9cc57..5ae2f59bae 100644 --- a/doc/src/sgml/advanced.sgml +++ b/doc/src/sgml/advanced.sgml @@ -1,4 +1,4 @@ - + Advanced Features @@ -19,7 +19,7 @@ This chapter will on occasion refer to examples found in to change or improve them, so it will be - good if you have read that chapter. Some examples from + useful to have read that chapter. Some examples from this chapter can also be found in advanced.sql in the tutorial directory. This file also contains some sample data to load, which is not @@ -173,7 +173,7 @@ UPDATE branches SET balance = balance + 100.00 - The details of these commands are not important; the important + The details of these commands are not important here; the important point is that there are several separate updates involved to accomplish this rather simple operation. Our bank's officers will want to be assured that either all these updates happen, or none of them happen. diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 6e731e1448..bfc373ac05 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ - + Arrays @@ -60,18 +60,17 @@ CREATE TABLE tictactoe ( - In addition, the current implementation does not enforce the declared + The current implementation does not enforce the declared number of dimensions either. Arrays of a particular element type are all considered to be of the same type, regardless of size or number - of dimensions. So, declaring the number of dimensions or sizes in - CREATE TABLE is simply documentation, it does not + of dimensions. So, declaring the array size or number of dimensions in + CREATE TABLE is simply documentation; it does not affect run-time behavior. An alternative syntax, which conforms to the SQL standard by using - they keyword ARRAY, can - be used for one-dimensional arrays; + the keyword ARRAY, can be used for one-dimensional arrays. pay_by_quarter could have been defined as: @@ -109,7 +108,7 @@ CREATE TABLE tictactoe ( for the type, as recorded in its pg_type entry. Among the standard data types provided in the PostgreSQL distribution, all use a comma - (,), except for the type box which uses a semicolon + (,), except for type box which uses a semicolon (;). Each val is either a constant of the array element type, or a subarray. An example of an array constant is: @@ -121,7 +120,7 @@ CREATE TABLE tictactoe ( - To set an element of an array to NULL, write NULL + To set an element of an array constant to NULL, write NULL for the element value. (Any upper- or lower-case variant of NULL will do.) If you want an actual string value NULL, you must put double quotes around it. @@ -211,7 +210,7 @@ INSERT INTO sal_emp First, we show how to access a single element of an array. This query retrieves the names of the employees whose pay changed in the second quarter: - + SELECT name FROM sal_emp WHERE pay_by_quarter[1] <> pay_by_quarter[2]; @@ -230,7 +229,7 @@ SELECT name FROM sal_emp WHERE pay_by_quarter[1] <> pay_by_quarter[2]; This query retrieves the third quarter pay of all employees: - + SELECT pay_by_quarter[3] FROM sal_emp; @@ -248,7 +247,7 @@ SELECT pay_by_quarter[3] FROM sal_emp; lower-bound:upper-bound for one or more array dimensions. For example, this query retrieves the first item on Bill's schedule for the first two days of the week: - + SELECT schedule[1:2][1:1] FROM sal_emp WHERE name = 'Bill'; @@ -417,14 +416,14 @@ SELECT ARRAY[5,6] || ARRAY[[1,2],[3,4]]; - The concatenation operator allows a single element to be pushed to the + The concatenation operator allows a single element to be pushed onto the beginning or end of a one-dimensional array. It also accepts two N-dimensional arrays, or an N-dimensional and an N+1-dimensional array. - When a single element is pushed to either the beginning or end of a + When a single element is pushed onto either the beginning or end of a one-dimensional array, the result is an array with the same lower bound subscript as the array operand. For example: @@ -463,7 +462,7 @@ SELECT array_dims(ARRAY[[1,2],[3,4]] || ARRAY[[5,6],[7,8],[9,0]]); - When an N-dimensional array is pushed to the beginning + When an N-dimensional array is pushed onto the beginning or end of an N+1-dimensional array, the result is analogous to the element-array case above. Each N-dimensional sub-array is essentially an element of the N+1-dimensional @@ -601,9 +600,9 @@ SELECT * FROM around the array value plus delimiter characters between adjacent items. The delimiter character is usually a comma (,) but can be something else: it is determined by the typdelim setting - for the array's element type. (Among the standard data types provided - in the PostgreSQL distribution, all - use a comma, except for box, which uses a semicolon (;).) + for the array's element type. Among the standard data types provided + in the PostgreSQL distribution, all use a comma, + except for type box, which uses a semicolon (;). In a multidimensional array, each dimension (row, plane, cube, etc.) gets its own level of curly braces, and delimiters must be written between adjacent curly-braced entities of the same level. @@ -657,7 +656,7 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 As shown previously, when writing an array value you can use double quotes around any individual array element. You must do so if the element value would otherwise confuse the array-value parser. - For example, elements containing curly braces, commas (or the matching + For example, elements containing curly braces, commas (or the data type's delimiter character), double quotes, backslashes, or leading or trailing whitespace must be double-quoted. Empty strings and strings matching the word NULL must be quoted, too. To put a double quote or @@ -668,7 +667,7 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 - You can use whitespace before a left brace or after a right + You can add whitespace before a left brace or after a right brace. You can also add whitespace before or after any individual item string. In all of these cases the whitespace will be ignored. However, whitespace within double-quoted elements, or surrounded on both sides by diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 89f7ab35db..dbdd0a0dbc 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -1252,8 +1252,8 @@ SET ENABLE_SEQSCAN TO OFF; Asynchronous I/O depends on an effective posix_fadvise function, which some operating systems lack. If the function is not present then setting this parameter to anything but zero will result - in an error. On some operating systems the function is present but - does not actually do anything (e.g., Solaris). + in an error. On some operating systems (e.g., Solaris), the function + is present but does not actually do anything. diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 4844fd63b5..763a114e7f 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ - + Data Types @@ -290,7 +290,7 @@ to PostgreSQL, such as geometric paths, or have several possible formats, such as the date and time types. - Some of the input and output functions are not invertible, i.e. + Some of the input and output functions are not invertible, i.e., the result of an output function might lose accuracy when compared to the original input. @@ -441,11 +441,11 @@ On very minimal operating systems the bigint type - might not function correctly because it relies on compiler support + might not function correctly, because it relies on compiler support for eight-byte integers. On such machines, bigint - acts the same as integer (but still takes up eight - bytes of storage). (We are not aware of any - platform where this is true.) + acts the same as integer, but still takes up eight + bytes of storage. (We are not aware of any modern + platform where this is the case.) @@ -453,7 +453,7 @@ integer (or int), smallint, and bigint. The type names int2, int4, and - int8 are extensions, which are also used by + int8 are extensions, which are also used by some other SQL database systems. @@ -481,7 +481,7 @@ especially recommended for storing monetary amounts and other quantities where exactness is required. However, arithmetic on numeric values is very slow compared to the integer - and floating-point types described in the next section. + types, or to the floating-point types described in the next section. @@ -681,7 +681,7 @@ NUMERIC not-a-number, respectively. (On a machine whose floating-point arithmetic does not follow IEEE 754, these values will probably not work as expected.) When writing these values - as constants in a SQL command, you must put quotes around them, + as constants in an SQL command, you must put quotes around them, for example UPDATE table SET x = 'Infinity'. On input, these strings are recognized in a case-insensitive manner. @@ -785,7 +785,7 @@ ALTER SEQUENCE tablename_NOT NULL - constraint is applied to ensure that a null value cannot be explicitly + constraint is applied to ensure that a null value cannot be inserted. (In most cases you would also want to attach a UNIQUE or PRIMARY KEY constraint to prevent duplicate values from being inserted by accident, but this is @@ -798,7 +798,7 @@ ALTER SEQUENCE tablename_PostgreSQL 7.3, serial implied UNIQUE. This is no longer automatic. If you wish a serial column to have a unique constraint or be a - primary key, it must now be specified just like + primary key, it must now be specified, just like any other data type. @@ -837,15 +837,15 @@ ALTER SEQUENCE tablename_ The money type stores a currency amount with a fixed fractional precision; see . The fractional precision - is controlled by the database locale. + linkend="datatype-money-table">. The fractional precision is + determined by the database's setting. Input is accepted in a variety of formats, including integer and floating-point literals, as well as typical currency formatting, such as '$1,000.00'. Output is generally in the latter form but depends on the locale. Non-quoted numeric values can be converted to money by casting the numeric value to text and then - money: + money, for example: SELECT 1234::text::money; @@ -961,7 +961,7 @@ SELECT regexp_replace('52093.89'::money::text, '[$,]', '', 'g')::numeric; character varying(n) and character(n), where n is a positive integer. Both of these types can store strings up to - n characters in length (not bytes). An attempt to store a + n characters (not bytes) in length. An attempt to store a longer string into a column of these types will result in an error, unless the excess characters are all spaces, in which case the string will be truncated to the maximum length. (This somewhat @@ -1033,13 +1033,15 @@ SELECT regexp_replace('52093.89'::money::text, '[$,]', '', 'g')::numeric; - There is no performance difference between these three types, + There is no performance difference among these three types, apart from increased storage space when using the blank-padded type, and a few extra CPU cycles to check the length when storing into a length-constrained column. While character(n) has performance advantages in some other database systems, there is no such advantage in - PostgreSQL. In most situations + PostgreSQL; in fact + character(n) is usually the slowest of + the three because of its additional storage costs. In most situations text or character varying should be used instead. @@ -1583,7 +1585,8 @@ SELECT b, char_length(b) FROM test2; type [ (p) ] 'value' - where p is an optional precision corresponding to the number of + where p is an optional precision + specification giving the number of fractional digits in the seconds field. Precision can be specified for time, timestamp, and interval types. The allowed values are mentioned @@ -1705,7 +1708,7 @@ SELECT b, char_length(b) FROM test2; The time-of-day types are time [ (p) ] without time zone and time [ (p) ] with time - zone; time is equivalent to + zone. time alone is equivalent to time without time zone. @@ -1752,7 +1755,7 @@ SELECT b, char_length(b) FROM test2; 04:05 AM - same as 04:05 (AM ignored) + same as 04:05; AM does not affect value 04:05 PM @@ -1878,14 +1881,15 @@ January 8 04:05:06 1999 PST - The SQL standard differentiates timestamp without time zone + The SQL standard differentiates + timestamp without time zone and timestamp with time zone literals by the presence of a - + or - symbol after the time - indicating the time zone offset. Hence, according to the standard: + + or - symbol and time zone offset after + the time. Hence, according to the standard, TIMESTAMP '2004-10-19 10:23:54' - is a timestamp without time zone, while: + is a timestamp without time zone, while TIMESTAMP '2004-10-19 10:23:54+02' @@ -2048,15 +2052,15 @@ January 8 04:05:06 1999 PST - The output format of the date/time types can one of the four - styles: ISO 8601, + The output format of the date/time types can be set to one of the four + styles ISO 8601, SQL (Ingres), traditional POSTGRES - (Unix date format), and - German. It can be set using the SET datestyle command. The default + (Unix date format), or + German. The default is the ISO format. (The SQL standard requires the use of the ISO 8601 - format. The name of the SQL output format poorly - chosen and an historical accident.) SQL output format is a + historical accident.) shows examples of each output style. The output of the date and time types is of course only the date or time part @@ -2273,7 +2277,7 @@ January 8 04:05:06 1999 PST - In summary, there is a difference between abbreviations + In short, this is the difference between abbreviations and full names: abbreviations always represent a fixed offset from UTC, whereas most of the full names imply a local daylight-savings time rule, and so have two possible UTC offsets. @@ -2358,7 +2362,7 @@ January 8 04:05:06 1999 PST - interval values can be written using the following: + interval values can be written using the following verbose syntax: @@ -2708,9 +2712,10 @@ P years-months-days < 'off' '0' - Leading and trailing whitespace and case are ignored. The key words - TRUE and FALSE is the preferred - usage (and SQL-compliant). + Leading or trailing whitespace is ignored, and case does not matter. + The key words + TRUE and FALSE are the preferred + (SQL-compliant) usage. @@ -3072,8 +3077,9 @@ SELECT person.name, holidays.num_weeks FROM person, holidays - Boxes are output using the first syntax. Any two opposite corners - can be supplied; the corners are reordered on input to store the + Boxes are output using the first syntax. + Any two opposite corners can be supplied on input, but the values + will be reordered as needed to store the upper right and lower left corners. @@ -3111,7 +3117,7 @@ SELECT person.name, holidays.num_weeks FROM person, holidays - Paths are output using the first appropriate syntax. + Paths are output using the first or second syntax, as appropriate. @@ -3190,7 +3196,7 @@ SELECT person.name, holidays.num_weeks FROM person, holidays PostgreSQL offers data types to store IPv4, IPv6, and MAC addresses, as shown in . It is better to use these types instead of plain text types to store - network addresses because + network addresses, because these types offer input error checking and specialized operators and functions (see ). @@ -3266,7 +3272,7 @@ SELECT person.name, holidays.num_weeks FROM person, holidays y is the number of bits in the netmask. If the /y - is missing, the + portion is missing, the netmask is 32 for IPv4 and 128 for IPv6, so the value represents just a single host. On display, the /y @@ -3560,8 +3566,8 @@ SELECT * FROM test; are designed to support full text search, which is the activity of searching through a collection of natural-language documents to locate those that best match a query. - The tsvector type represents a document stored in a form optimized - for text search; tsquery type similarly represents + The tsvector type represents a document in a form optimized + for text search; the tsquery type similarly represents a text query. provides a detailed explanation of this facility, and summarizes the @@ -3577,7 +3583,7 @@ SELECT * FROM test; A tsvector value is a sorted list of distinct - lexemes, which are words which have been + lexemes, which are words that have been normalized to merge different variants of the same word (see for details). Sorting and duplicate-elimination are done automatically during input, as shown in @@ -3687,7 +3693,7 @@ SELECT to_tsvector('english', 'The Fat Rats'); A tsquery value stores lexemes that are to be - searched for, and combines them by honoring the boolean operators + searched for, and combines them honoring the boolean operators & (AND), | (OR), and ! (NOT). Parentheses can be used to enforce grouping of the operators: @@ -3825,8 +3831,8 @@ a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11 The xml data type can be used to store XML data. Its advantage over storing XML data in a text field is that it - checks the input values for well-formedness, and support - functions can perform type-safe operations on it; see . Use of this data type requires the installation to have been built with configure --with-libxml. @@ -3870,8 +3876,9 @@ xml 'bar' The xml type does not validate input values - against an optionally-supplied document type declaration - (DTD).DTD + against a document type declaration + (DTD),DTD + even when the input value specifies a DTD. @@ -3883,7 +3890,7 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } value AS type can be character, character varying, or - text (or an alias name for those). Again, according + text (or an alias for one of those). Again, according to the SQL standard, this is the only way to convert between type xml and character types, but PostgreSQL also allows you to simply cast the value. @@ -3923,7 +3930,7 @@ SET xmloption TO { DOCUMENT | CONTENT }; representations of XML values, such as in the above examples. This would ordinarily mean that encoding declarations contained in XML data can become invalid as the character data is converted - to other encodings while travelling between client and server + to other encodings while travelling between client and server, because the embedded encoding declaration is not changed. To cope with this behavior, encoding declarations contained in character strings presented for input to the xml type @@ -3932,7 +3939,7 @@ SET xmloption TO { DOCUMENT | CONTENT }; processing, character strings of XML data must be sent from the client in the current client encoding. It is the responsibility of the client to either convert documents to the - current client encoding before sending them to the server or to + current client encoding before sending them to the server, or to adjust the client encoding appropriately. On output, values of type xml will not have an encoding declaration, and clients should assume all data is in the current client diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index caf4cfd025..406a521ff0 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -1,4 +1,4 @@ - + Data Definition @@ -557,8 +557,8 @@ CREATE TABLE products ( comparison. That means even in the presence of a unique constraint it is possible to store duplicate rows that contain a null value in at least one of the constrained - columns. This behavior conforms to the SQL standard, but there - might be other SQL databases might not follow this rule. So be + columns. This behavior conforms to the SQL standard, but we have + heard that other SQL databases might not follow this rule. So be careful when developing applications that are intended to be portable. @@ -1802,7 +1802,7 @@ REVOKE CREATE ON SCHEMA public FROM PUBLIC; such names, to ensure that you won't suffer a conflict if some future version defines a system table named the same as your table. (With the default search path, an unqualified reference to - your table name would be resolved as a system table instead.) + your table name would then be resolved as the system table instead.) System tables will continue to follow the convention of having names beginning with pg_, so that they will not conflict with unqualified user-table names so long as users avoid @@ -2571,14 +2571,14 @@ CREATE TRIGGER insert_measurement_trigger CREATE OR REPLACE FUNCTION measurement_insert_trigger() RETURNS TRIGGER AS $$ BEGIN - IF ( NEW.logdate >= DATE '2006-02-01' AND + IF ( NEW.logdate >= DATE '2006-02-01' AND NEW.logdate < DATE '2006-03-01' ) THEN INSERT INTO measurement_y2006m02 VALUES (NEW.*); - ELSIF ( NEW.logdate >= DATE '2006-03-01' AND + ELSIF ( NEW.logdate >= DATE '2006-03-01' AND NEW.logdate < DATE '2006-04-01' ) THEN INSERT INTO measurement_y2006m03 VALUES (NEW.*); ... - ELSIF ( NEW.logdate >= DATE '2008-01-01' AND + ELSIF ( NEW.logdate >= DATE '2008-01-01' AND NEW.logdate < DATE '2008-02-01' ) THEN INSERT INTO measurement_y2008m01 VALUES (NEW.*); ELSE @@ -2709,9 +2709,9 @@ SELECT count(*) FROM measurement WHERE logdate >= DATE '2008-01-01'; Without constraint exclusion, the above query would scan each of the partitions of the measurement table. With constraint exclusion enabled, the planner will examine the constraints of each - partition and try to determine which partitions need not - be scanned because they cannot not contain any rows meeting the query's - WHERE clause. When the planner can determine this, it + partition and try to prove that the partition need not + be scanned because it could not contain any rows meeting the query's + WHERE clause. When the planner can prove this, it excludes the partition from the query plan. @@ -2906,7 +2906,7 @@ ANALYZE measurement; - Keep the partitioning constraints simple or else the planner may not be + Keep the partitioning constraints simple, else the planner may not be able to prove that partitions don't need to be visited. Use simple equality conditions for list partitioning, or simple range tests for range partitioning, as illustrated in the preceding diff --git a/doc/src/sgml/dml.sgml b/doc/src/sgml/dml.sgml index 08fd5b7630..eb114263a5 100644 --- a/doc/src/sgml/dml.sgml +++ b/doc/src/sgml/dml.sgml @@ -1,4 +1,4 @@ - + Data Manipulation @@ -248,10 +248,7 @@ DELETE FROM products WHERE price = 10; DELETE FROM products; - then all rows in the table will be deleted! ( can also be used - to delete all rows.) - Caveat programmer. + then all rows in the table will be deleted! Caveat programmer. diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml index e37eac587e..4aafc4e2a0 100644 --- a/doc/src/sgml/docguide.sgml +++ b/doc/src/sgml/docguide.sgml @@ -1,4 +1,4 @@ - + Documentation @@ -358,7 +358,7 @@ CATALOG "dsssl/catalog" Create the directory /usr/local/share/sgml/docbook-4.2 and change to it. (The exact location is irrelevant, but this one is - reasonable within the layout we are following here.): + reasonable within the layout we are following here.) $ mkdir /usr/local/share/sgml/docbook-4.2 $ cd /usr/local/share/sgml/docbook-4.2 @@ -421,7 +421,7 @@ perl -pi -e 's/iso-(.*).gml/ISO\1/g' docbook.cat To install the style sheets, unzip and untar the distribution and move it to a suitable place, for example /usr/local/share/sgml. (The archive will - automatically create a subdirectory.): + automatically create a subdirectory.) $ gunzip docbook-dsssl-1.xx.tar.gz $ tar -C /usr/local/share/sgml -xf docbook-dsssl-1.xx.tar diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index a594a12ed7..cbadf0f99a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -268,8 +268,9 @@ a >= x AND a <= y - Note BETWEEN is inclusive in comparing the endpoint - values. NOT BETWEEN does the opposite comparison: + Notice that BETWEEN treats the endpoint values as included + in the range. + NOT BETWEEN does the opposite comparison: a NOT BETWEEN x AND y @@ -280,9 +281,11 @@ BETWEEN SYMMETRIC - BETWEEN SYMMETRIC is the same as BETWEEN - except there is no requirement that the argument to the left of AND be less than - or equal to the argument on the right; the proper range is automatically determined. + BETWEEN SYMMETRIC is the same as BETWEEN + except there is no requirement that the argument to the left of + AND be less than or equal to the argument on the right. + If it is not, those two arguments are automatically swapped, so that + a nonempty range is always implied. @@ -322,7 +325,7 @@ - Some applications might expect + Some applications might expect that expression = NULL returns true if expression evaluates to the null value. It is highly recommended that these applications @@ -358,11 +361,11 @@ IS NOT DISTINCT FROM - Ordinary comparison operators yield null (signifying unknown) - when either input is null, not true or false, e.g., 7 = - NULL yields null. - Another way to do comparisons is with the - IS NOT DISTINCT FROM construct: + Ordinary comparison operators yield null (signifying unknown), + not true or false, when either input is null. For example, + 7 = NULL yields null. When this behavior is not suitable, + use the + IS NOT DISTINCT FROM constructs: expression IS DISTINCT FROM expression expression IS NOT DISTINCT FROM expression @@ -440,8 +443,8 @@ Mathematical operators are provided for many - PostgreSQL types. For types that support - only limited mathematical operations + PostgreSQL types. For types without + standard mathematical conventions (e.g., date/time types) we describe the actual behavior in subsequent sections. @@ -1010,11 +1013,13 @@ - SQL defines some string functions with a special syntax - wherein certain key words rather than commas are used to separate the - arguments. Details are in . - These functions are also implemented using the regular syntax for - function invocation. (See .) + SQL defines some string functions that use + key words, rather than commas, to separate + arguments. Details are in + . + PostgreSQL also provides versions of these functions + that use the regular function invocation syntax + (see ). @@ -1795,8 +1800,8 @@ The conversion names follow a standard naming scheme: The official name of the source encoding with all - non-alphanumeric characters replaced by underscores followed - by _to_ followed by similarly + non-alphanumeric characters replaced by underscores, followed + by _to_, followed by the similarly processed destination encoding name. Therefore, the names might deviate from the customary encoding names. @@ -2598,12 +2603,12 @@ SQL defines some string functions that use - a key word syntax, rather than commas to separate + key words, rather than commas, to separate arguments. Details are in . - Such functions are also implemented using the regular syntax for - function invocation. - (See .) + PostgreSQL also provides versions of these functions + that use the regular function invocation syntax + (see ). @@ -2999,7 +3004,7 @@ cast(-44 as bit(12)) 111111010100 - The LIKE expression returns true if + The LIKE expression returns true if the string matches the supplied pattern. (As expected, the NOT LIKE expression returns @@ -3011,11 +3016,11 @@ cast(-44 as bit(12)) 111111010100 If pattern does not contain percent - signs or underscore, then the pattern only represents the string + signs or underscores, then the pattern only represents the string itself; in that case LIKE acts like the equals operator. An underscore (_) in pattern stands for (matches) any single - character; a percent sign (%) matches any string + character; a percent sign (%) matches any sequence of zero or more characters. @@ -3028,7 +3033,7 @@ cast(-44 as bit(12)) 111111010100 'abc' LIKE 'c' false - + LIKE pattern matching always covers the entire string. Therefore, to match a sequence anywhere within a string, the @@ -3036,9 +3041,9 @@ cast(-44 as bit(12)) 111111010100 - To match only a literal underscore or percent sign without matching + To match a literal underscore or percent sign without matching other characters, the respective character in - pattern must be + pattern must be preceded by the escape character. The default escape character is the backslash but a different one can be selected by using the ESCAPE clause. To match the escape @@ -3053,8 +3058,8 @@ cast(-44 as bit(12)) 111111010100 actually matches a literal backslash means writing four backslashes in the statement. You can avoid this by selecting a different escape character with ESCAPE; then a backslash is not special to - LIKE anymore. (But backslash is still special to the string - literal parser, so you still need two of them.) + LIKE anymore. (But backslash is still special to the + string literal parser, so you still need two of them to match a backslash.) @@ -3163,9 +3168,9 @@ cast(-44 as bit(12)) 111111010100 - Notice that bounded repetition (? and {...}) - is not provided, though they exist in POSIX. Also, the period (.) - is not a metacharacter. + Notice that bounded repetition operators (? and + {...}) are not provided, though they exist in POSIX. + Also, the period (.) is not a metacharacter. @@ -3295,7 +3300,7 @@ substring('foobar' from '#"o_b#"%' for '#') NULLLIKE, pattern characters match string characters exactly unless they are special characters in the regular expression language — but regular expressions use - different special characters than LIKE. + different special characters than LIKE does. Unlike LIKE patterns, a regular expression is allowed to match anywhere within a string, unless the regular expression is explicitly anchored to the beginning or @@ -3562,7 +3567,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; A branch is zero or more quantified atoms or constraints, concatenated. - It tries a match of the first, followed by a match for the second, etc; + It matches a match for the first, followed by a match for the second, etc; an empty branch matches the empty string. @@ -3579,7 +3584,8 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; A constraint matches an empty string, but matches only when - specific conditions are met. A constraint cannot be followed by a quantifier. + specific conditions are met. A constraint can be used where an atom + could be used, except it cannot be followed by a quantifier. The simple constraints are shown in ; some more constraints are described later. @@ -3788,12 +3794,12 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; ^ - matches the beginning of the string + matches at the beginning of the string $ - matches the end of the string + matches at the end of the string @@ -3842,12 +3848,12 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; To include a literal ] in the list, make it the - first character (possibly following a ^). To + first character (after ^, if that is used). To include a literal -, make it the first or last character, or the second endpoint of a range. To use a literal - - as the start of a range, enclose it + - as the first endpoint of a range, enclose it in [. and .] to make it a - collating element (see below). With the exception of these characters and + collating element (see below). With the exception of these characters, some combinations using [ (see next paragraphs), and escapes (AREs only), all other special characters lose their special significance within a bracket expression. @@ -3945,7 +3951,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; Character-entry escapes exist to make it easier to specify - non-printing and inconvenient characters in REs. They are + non-printing and other inconvenient characters in REs. They are shown in . @@ -4050,7 +4056,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; \uwxyz (where wxyz is exactly four hexadecimal digits) the UTF16 (Unicode, 16-bit) character U+wxyz - in the local byte encoding + in the local byte ordering @@ -4058,7 +4064,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; (where stuvwxyz is exactly eight hexadecimal digits) reserved for a hypothetical Unicode extension to 32 bits - + @@ -4067,11 +4073,11 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; - \x### - (where ### is any sequence of hexadecimal + \xhhh + (where hhh is any sequence of hexadecimal digits) the character whose hexadecimal value is - 0x### + 0xhhh (a single character no matter how many hexadecimal digits are used) @@ -4082,19 +4088,19 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; - \## - (where ## is exactly two octal digits, + \xy + (where xy is exactly two octal digits, and is not a back reference) the character whose octal value is - 0## + 0xy - \### - (where ### is exactly three octal digits, + \xyz + (where xyz is exactly three octal digits, and is not a back reference) the character whose octal value is - 0### + 0xyz @@ -4258,12 +4264,12 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; There is an inherent ambiguity between octal character-entry - escapes and back references, which is resolved by heuristics, + escapes and back references, which is resolved by the following heuristics, as hinted at above. A leading zero always indicates an octal escape. A single non-zero digit, not followed by another digit, is always taken as a back reference. - A multidigit sequence not starting with a zero is taken as a back + A multi-digit sequence not starting with a zero is taken as a back reference if it comes after a suitable subexpression (i.e., the number is in the legal range for a back reference), and otherwise is taken as octal. @@ -4749,7 +4755,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); double precision argument and converts from Unix epoch (seconds since 1970-01-01 00:00:00+00) to timestamp with time zone. - (Integer Unix epochs are implicitly cast to + (Integer Unix epochs are implicitly cast to double precision.) @@ -4817,7 +4823,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); to_timestamp(double precision) timestamp with time zone - convert UNIX epoch to time stamp + convert Unix epoch to time stamp to_timestamp(1284352323) @@ -4825,11 +4831,12 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
- In a to_char output template string, there are certain patterns that are - recognized and replaced with appropriately-formatted data based on the value. - Any text that is not a template pattern is simply - copied verbatim. Similarly, in an input template string (anything but to_char), template patterns - identify the values to be supplied by the input data string. + In a to_char output template string, there are certain + patterns that are recognized and replaced with appropriately-formatted + data based on the given value. Any text that is not a template pattern is + simply copied verbatim. Similarly, in an input template string (for the + other functions), template patterns identify the values to be supplied by + the input data string. @@ -5033,11 +5040,11 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); RM - uppercase month in Roman numerals (I-XII; I=January) + month in uppercase Roman numerals (I-XII; I=January) rm - lowercase month in Roman numerals (i-xii; i=January) + month in lowercase Roman numerals (i-xii; i=January) TZ @@ -5073,7 +5080,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); FM prefix - fill mode (suppress padding of blanks and zeroes) + fill mode (suppress padding blanks and zeroes) FMMonth @@ -5099,7 +5106,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); SP suffix - spell mode (not supported) + spell mode (not implemented) DDSP @@ -5127,8 +5134,8 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); to_timestamp and to_date - skip multiple blank spaces in the input string unless the FX option - is used. For example, + skip multiple blank spaces in the input string unless the + FX option is used. For example, to_timestamp('2000    JUN', 'YYYY MON') works, but to_timestamp('2000    JUN', 'FXYYYY MON') returns an error because to_timestamp expects one space only. @@ -5177,8 +5184,8 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); In conversions from string to timestamp or - date, the CC field (century) is ignored if there - is a YYY, YYYY or + date, the CC (century) field is ignored + if there is a YYY, YYYY or Y,YYY field. If CC is used with YY or Y then the year is computed as (CC-1)*100+YY. @@ -5220,7 +5227,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); In a conversion from string to timestamp, millisecond - (MS) and microsecond (US) + (MS) or microsecond (US) values are used as the seconds digits after the decimal point. For example to_timestamp('12:3', 'SS:MS') is not 3 milliseconds, @@ -5251,7 +5258,8 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); - to_char(interval) formats HH and + + to_char(interval) formats HH and HH12 as hours in a single day, while HH24 can output hours exceeding a single day, e.g., >24. @@ -5390,14 +5398,14 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); multiplies the input values by 10^n, where n is the number of digits following - V. + V. to_char does not support the use of - V with non-integer values. - (e.g., 99.9V99 is not allowed.) + V combined with a decimal point + (e.g., 99.9V99 is not allowed). - + Certain modifiers can be applied to any template pattern to alter its @@ -6129,7 +6137,7 @@ EXTRACT(field FROM source) century - The century: + The century @@ -6225,7 +6233,7 @@ SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); For date and timestamp values, the - number of seconds since 1970-01-01 00:00:00-00 GMT (can be negative); + number of seconds since 1970-01-01 00:00:00 UTC (can be negative); for interval values, the total number of seconds in the interval @@ -6778,6 +6786,9 @@ now() + transaction_timestamp() is equivalent to + CURRENT_TIMESTAMP, but is named to clearly reflect + what it returns. statement_timestamp() returns the start time of the current statement (more specifically, the time of receipt of the latest command message from the client). @@ -6792,10 +6803,7 @@ now() but as a formatted text string rather than a timestamp with time zone value. now() is a traditional PostgreSQL - equivalent to CURRENT_TIMESTAMP. - transaction_timestamp() is likewise equivalent to - CURRENT_TIMESTAMP, but is named to clearly reflect - what it returns. + equivalent to transaction_timestamp(). @@ -7428,7 +7436,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple It is possible to access the two component numbers of a point - as though they were an array with indices 0 and 1. For example, if + as though the point were an array with indexes 0 and 1. For example, if t.p is a point column then SELECT p[0] FROM t retrieves the X coordinate and UPDATE t SET p[1] = ... changes the Y coordinate. @@ -8235,7 +8243,7 @@ SELECT xmlelement(name test, xmlattributes(func(a, b))) FROM test; Element content, if specified, will be formatted according to - the data type. If the content is itself of type xml, + its data type. If the content is itself of type xml, complex XML documents can be constructed. For example: The xmlroot expression alters the properties of the root node of an XML value. If a version is specified, - this replaces the value in the version declaration; if a - standalone value is specified, this replaces the value in the - standalone declaration. + it replaces the value in the root node's version declaration; if a + standalone setting is specified, it replaces the value in the + root node's standalone declaration. @@ -8967,7 +8975,7 @@ SELECT setval('foo', 42, false); Next nextval wi If a sequence object has been created with default parameters, - nextval will return successive values + successive nextval calls will return successive values beginning with 1. Other behaviors can be obtained by using special parameters in the command; see its command reference page for more information. @@ -9028,16 +9036,17 @@ END CASE clauses can be used wherever - an expression is valid. condition is an - expression that returns a boolean result. If the result is true - the value of the CASE expression is the - result that follows the condition. If the result is false - subsequent WHEN clauses are searched in the same - manner. If no WHEN - condition is true then the value of the - case expression is the result of the + an expression is valid. Each condition is an + expression that returns a boolean result. If the condition's + result is true, the value of the CASE expression is the + result that follows the condition, and the + remainder of the CASE expression is not processed. If the + condition's result is not true, any subsequent WHEN clauses + are examined in the same manner. If no WHEN + condition yields true, the value of the + CASE expression is the result of the ELSE clause. If the ELSE clause is - omitted and no condition matches, the result is null. + omitted and no condition is true, the result is null. @@ -9074,8 +9083,8 @@ SELECT a, - The following CASE expression is a - variant of the general form above: + There is a simple form of CASE expression + that is a variant of the general form above: CASE expression @@ -9085,10 +9094,10 @@ CASE expression END - The - expression is computed and compared to - all the values in the - WHEN clauses until one is found that is equal. If + The first + expression is computed, then compared to + each of the value expressions in the + WHEN clauses until one is found that is equal to it. If no match is found, the result of the ELSE clause (or a null value) is returned. This is similar to the switch statement in C. @@ -9114,8 +9123,8 @@ SELECT a, - A CASE expression evaluates any subexpressions - that are needed to determine the result. For example, this is a + A CASE expression does not evaluate any subexpressions + that are not needed to determine the result. For example, this is a possible way of avoiding a division-by-zero failure: SELECT ... WHERE CASE WHEN x <> 0 THEN y/x > 1.5 ELSE false END; @@ -9154,7 +9163,7 @@ SELECT COALESCE(description, short_description, '(none)') ... Like a CASE expression, COALESCE only - evaluates arguments that are needed to determine the result; + evaluates the arguments that are needed to determine the result; that is, arguments to the right of the first non-null argument are not evaluated. This SQL-standard function provides capabilities similar to NVL and IFNULL, which are used in some other @@ -9804,17 +9813,18 @@ SELECT NULLIF(value, '(none)') ... SOME - Boolean aggregates bool_and and + Boolean aggregates bool_and and bool_or correspond to standard SQL aggregates every and any or - some. - As for any and some, + some. + As for any and some, it seems that there is an ambiguity built into the standard syntax: SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; - Here ANY can be considered as leading either - to a subquery or to an aggregate, if the select expression returns one row. + Here ANY can be considered either as introducing + a subquery, or as being an aggregate function, if the sub-select + returns one row with a boolean value. Thus the standard name cannot be given to these aggregates.
@@ -9829,7 +9839,7 @@ SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; SELECT count(*) FROM sometable; will be executed by PostgreSQL using a - sequential scan of an entire table. + sequential scan of the entire table.
@@ -10533,7 +10543,7 @@ EXISTS (subquery) or subquery. The subquery is evaluated to determine whether it returns any rows. If it returns at least one row, the result of EXISTS is - true; if the subquery returns no rows, the result of EXISTS + true; if the subquery returns no rows, the result of EXISTS is false. @@ -10882,7 +10892,7 @@ WHERE EXISTS (SELECT 1 FROM tab2 WHERE col2 = tab1.col2); The forms involving array subexpressions are PostgreSQL extensions; the rest are SQL-compliant. - All of the expressions documented in this section return + All of the expression forms documented in this section return Boolean (true/false) results. @@ -11594,8 +11604,8 @@ SET search_path TO schema , schema, .. pg_my_temp_schema returns the OID of the current - session's temporary schema, or 0 if it has none (because no - temporary tables have been created). + session's temporary schema, or zero if it has none (because it has not + created any temporary tables). pg_is_other_temp_schema returns true if the given OID is the OID of another session's temporary schema. (This can be useful, for example, to exclude other sessions' temporary @@ -11891,7 +11901,8 @@ SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION') has_any_column_privilege checks whether a user can - access any column of a table in a particular way; its argument possibilities + access any column of a table in a particular way. + Its argument possibilities are analogous to has_table_privilege, except that the desired access privilege type must evaluate to some combination of @@ -11908,7 +11919,8 @@ SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION') has_column_privilege checks whether a user - can access a column in a particular way; its argument possibilities + can access a column in a particular way. + Its argument possibilities are analogous to has_table_privilege, with the addition that the column can be specified either by name or attribute number. @@ -11922,7 +11934,8 @@ SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION') has_database_privilege checks whether a user - can access a database in a particular way; its argument possibilities + can access a database in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to some combination of CREATE, @@ -11934,7 +11947,8 @@ SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION') has_function_privilege checks whether a user - can access a function in a particular way; its argument possibilities + can access a function in a particular way. + Its argument possibilities are analogous to has_table_privilege. When specifying a function by a text string rather than by OID, the allowed input is the same as for the regprocedure data type @@ -11949,7 +11963,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); has_foreign_data_wrapper_privilege checks whether a user - can access a foreign-data wrapper in a particular way; its argument possibilities + can access a foreign-data wrapper in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to USAGE. @@ -11957,7 +11972,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); has_language_privilege checks whether a user - can access a procedural language in a particular way; its argument possibilities + can access a procedural language in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to USAGE. @@ -11965,7 +11981,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); has_schema_privilege checks whether a user - can access a schema in a particular way; its argument possibilities + can access a schema in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to some combination of CREATE or @@ -11974,7 +11991,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); has_server_privilege checks whether a user - can access a foreign server in a particular way; its argument possibilities + can access a foreign server in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to USAGE. @@ -11982,7 +12000,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); has_tablespace_privilege checks whether a user - can access a tablespace in a particular way; its argument possibilities + can access a tablespace in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to CREATE. @@ -11990,7 +12009,8 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); pg_has_role checks whether a user - can access a role in a particular way; its argument possibilities + can access a role in a particular way. + Its argument possibilities are analogous to has_table_privilege. The desired access privilege type must evaluate to some combination of MEMBER or @@ -12305,7 +12325,7 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); get CREATE [ CONSTRAINT ] TRIGGER command for trigger - pg_get_userbyid(roleid) + pg_get_userbyid(role_oid) name get role name with given OID @@ -12559,7 +12579,7 @@ SELECT typlen FROM pg_type WHERE oid = pg_typeof(33); The functions shown in - export server transaction information. The main + provide server transaction information in an exportable form. The main use of these functions is to determine which transactions were committed between two snapshots. @@ -12641,8 +12661,8 @@ SELECT typlen FROM pg_type WHERE oid = pg_typeof(33); xmax - First as-yet-unassigned txid. All txids later than this are - not yet started as of the time of the snapshot, and thus invisible. + First as-yet-unassigned txid. All txids greater than or equal to this + are not yet started as of the time of the snapshot, and thus invisible. @@ -12652,7 +12672,7 @@ SELECT typlen FROM pg_type WHERE oid = pg_typeof(33); Active txids at the time of the snapshot. The list includes only those active txids between xmin and xmax; there might be active txids higher - than xmax. A txid that is xmin <= txid < + than xmax. A txid that is xmin <= txid < xmax and not in this list was already completed at the time of the snapshot, and thus either visible or dead according to its commit status. The list does not @@ -12834,9 +12854,9 @@ SELECT set_config('log_statement_stats', 'off', false); The process ID of an active backend can be found from the procpid column of the pg_stat_activity view, or by listing the - postgres processes on the server using + postgres processes on the server (using ps on Unix or the Task - Manager on Windows. + Manager on Windows). @@ -12904,7 +12924,7 @@ SELECT set_config('log_statement_stats', 'off', false); pg_stop_backup() text - Finalize after performing on-line backup + Finish performing on-line backup @@ -12991,7 +13011,7 @@ postgres=# select pg_start_backup('label_goes_here'); pg_current_xlog_location displays the current transaction log write - location in the format used by the above functions. Similarly, + location in the same format used by the above functions. Similarly, pg_current_xlog_insert_location displays the current transaction log insertion point. The insertion point is the logical end of the transaction log @@ -13086,9 +13106,9 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); bigint - Disk space used by the specified fork, 'main' or - 'fsm', of a table or index with the specified OID - or name; the table name can be schema-qualified. + Disk space used by the specified fork ('main', + 'fsm' or 'vm') + of the table or index with the specified OID or name @@ -13128,8 +13148,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); bigint Total disk space used by the table with the specified OID or name, - including indexes and TOAST data; the table name can be - schema-qualified. + including indexes and TOAST data @@ -13154,6 +13173,8 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); size of the main data fork of the relation. Specifying 'fsm' returns the size of the Free Space Map (see ) associated with the + relation. Specifying 'vm' returns the size of the + Visibility Map (see ) associated with the relation. @@ -13240,7 +13261,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); size, last accessed time stamp, last modified time stamp, last file status change time stamp (Unix platforms only), file creation time stamp (Windows only), and a boolean - indicating if it is a directory. Typical usage include: + indicating if it is a directory. Typical usages include: SELECT * FROM pg_stat_file('filename'); SELECT (pg_stat_file('filename')).modification; @@ -13425,8 +13446,8 @@ SELECT (pg_stat_file('filename')).modification; pg_advisory_unlock_shared works the same as - pg_advisory_unlock, - except is releases a shared advisory lock. + pg_advisory_unlock, + except it releases a shared advisory lock. @@ -13435,7 +13456,7 @@ SELECT (pg_stat_file('filename')).modification; pg_advisory_unlock_all will release all advisory locks held by the current session. (This function is implicitly invoked - at session end, even if the client disconnects abruptly.) + at session end, even if the client disconnects ungracefully.) diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index e40724df17..974e1415f6 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1,4 +1,4 @@ - + Indexes @@ -36,7 +36,7 @@ SELECT content FROM test1 WHERE id = constant; matching entries. If there are many rows in test1 and only a few rows (perhaps zero or one) that would be returned by such a query, this is clearly an - inefficient method. But if the system maintains an + inefficient method. But if the system has been instructed to maintain an index on the id column, it can use a more efficient method for locating matching rows. For instance, it might only have to walk a few levels deep into a search tree. @@ -73,7 +73,7 @@ CREATE INDEX test1_id_index ON test1 (id); Once an index is created, no further intervention is required: the system will update the index when the table is modified, and it will - use the index in queries when it thinks it would be more efficient + use the index in queries when it thinks doing so would be more efficient than a sequential table scan. But you might have to run the ANALYZE command regularly to update statistics to allow the query planner to make educated decisions. @@ -294,7 +294,7 @@ CREATE TABLE test2 ( SELECT name FROM test2 WHERE major = constant AND minor = constant; - then it might be appropriate to define an index on columns + then it might be appropriate to define an index on the columns major and minor together, e.g.: @@ -384,16 +384,16 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor); The planner will consider satisfying an ORDER BY specification - by either scanning an available index that matches the specification, + either by scanning an available index that matches the specification, or by scanning the table in physical order and doing an explicit sort. For a query that requires scanning a large fraction of the - table, the explicit sort is likely to be faster than using an index + table, an explicit sort is likely to be faster than using an index because it requires - less disk I/O due to a sequential access pattern. Indexes are + less disk I/O due to following a sequential access pattern. Indexes are more useful when only a few rows need be fetched. An important special case is ORDER BY in combination with LIMIT n: an explicit sort will have to process - all data to identify the first n rows, but if there is + all the data to identify the first n rows, but if there is an index matching the ORDER BY, the first n rows can be retrieved directly, without scanning the remainder at all. @@ -433,14 +433,14 @@ CREATE INDEX test3_desc_index ON test3 (id DESC NULLS LAST); ORDER BY x DESC, y DESC if we scan backward. But it might be that the application frequently needs to use ORDER BY x ASC, y DESC. There is no way to get that - ordering from a simpler index, but it is possible if the index is defined + ordering from a plain index, but it is possible if the index is defined as (x ASC, y DESC) or (x DESC, y ASC). Obviously, indexes with non-default sort orderings are a fairly specialized feature, but sometimes they can produce tremendous - speedups for certain queries. Whether it's worth creating such an + speedups for certain queries. Whether it's worth maintaining such an index depends on how often you use queries that require a special sort ordering. @@ -584,9 +584,9 @@ CREATE UNIQUE INDEX name ON table - An index column need not be just a column of an underlying table, + An index column need not be just a column of the underlying table, but can be a function or scalar expression computed from one or - more columns of a table. This feature is useful to obtain fast + more columns of the table. This feature is useful to obtain fast access to tables based on the results of computations. @@ -666,8 +666,8 @@ CREATE INDEX people_names ON people ((first_name || ' ' || last_name)); values. Since a query searching for a common value (one that accounts for more than a few percent of all the table rows) will not use the index anyway, there is no point in keeping those rows in the - index. A partial index reduces the size of the index, which speeds - up queries that use the index. It will also speed up many table + index at all. This reduces the size of the index, which will speed + up those queries that do use the index. It will also speed up many table update operations because the index does not need to be updated in all cases. shows a possible application of this idea. @@ -701,7 +701,7 @@ CREATE TABLE access_log ( such as this: CREATE INDEX access_log_client_ip_ix ON access_log (client_ip) -WHERE NOT (client_ip > inet '192.168.100.0' AND +WHERE NOT (client_ip > inet '192.168.100.0' AND client_ip < inet '192.168.100.255'); @@ -724,14 +724,14 @@ WHERE client_ip = inet '192.168.100.23'; Observe that this kind of partial index requires that the common values be predetermined, so such partial indexes are best used for - data distribution that do not change. The indexes can be recreated + data distributions that do not change. The indexes can be recreated occasionally to adjust for new data distributions, but this adds - maintenance overhead. + maintenance effort. - Another possible use for partial indexes is to exclude values from the + Another possible use for a partial index is to exclude values from the index that the typical query workload is not interested in; this is shown in . This results in the same diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index c214374fca..f6f5166ada 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1,4 +1,4 @@ - + <![%standalone-include[<productname>PostgreSQL</>]]> @@ -85,7 +85,7 @@ su - postgres <listitem> <para> - You need an <acronym>ISO</>/<acronym>ANSI</> C compiler (minimum + You need an <acronym>ISO</>/<acronym>ANSI</> C compiler (at least C89-compliant). Recent versions of <productname>GCC</> are recommendable, but <productname>PostgreSQL</> is known to build using a wide variety @@ -118,7 +118,7 @@ su - postgres command you type, and allows you to use arrow keys to recall and edit previous commands. This is very helpful and is strongly recommended. If you don't want to use it then you must specify - the <option>--without-readline</option> option of + the <option>--without-readline</option> option to <filename>configure</>. As an alternative, you can often use the BSD-licensed <filename>libedit</filename> library, originally developed on <productname>NetBSD</productname>. The @@ -422,11 +422,10 @@ su - postgres On systems that have <productname>PostgreSQL</> started at boot time, there is probably a start-up file that will accomplish the same thing. For example, on a <systemitem class="osname">Red Hat Linux</> system one - might find that: + might find that this works: <screen> <userinput>/etc/rc.d/init.d/postgresql stop</userinput> </screen> - works. </para> </step> @@ -471,7 +470,7 @@ su - postgres <step> <para> - Start the database server, again the special database user + Start the database server, again using the special database user account: <programlisting> <userinput>/usr/local/pgsql/bin/postgres -D /usr/local/pgsql/data</> @@ -1648,7 +1647,7 @@ All of PostgreSQL is successfully made. Ready to install. later on. To reset the source tree to the state in which it was distributed, use <command>gmake distclean</>. If you are going to build for several platforms within the same source tree you must do - this and rebuild for each platform. (Alternatively, use + this and re-configure for each platform. (Alternatively, use a separate build tree for each platform, so that the source tree remains unmodified.) </para> @@ -1675,7 +1674,7 @@ All of PostgreSQL is successfully made. Ready to install. </indexterm> <para> - On several systems with shared libraries + On some systems with shared libraries you need to tell the system how to find the newly installed shared libraries. The systems on which this is <emphasis>not</emphasis> necessary include <systemitem diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index ae36d07832..f5dce009ff 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/monitoring.sgml,v 1.69 2009/04/27 16:27:36 momjian Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/monitoring.sgml,v 1.70 2009/06/17 21:58:49 tgl Exp $ --> <chapter id="monitoring"> <title>Monitoring Database Activity @@ -929,7 +929,7 @@ postgres: user database host read() calls issued for the table, index, or database; the number of actual physical reads is usually lower due to kernel-level buffering. The *_blks_read - statistics columns uses this subtraction, i.e., fetched minus hit. + statistics columns use this subtraction, i.e., fetched minus hit. diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 4637f0ae28..4213216cc4 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -1,4 +1,4 @@ - + Concurrency Control @@ -246,7 +246,7 @@ committed before the query began; it never sees either uncommitted data or changes committed during query execution by concurrent transactions. In effect, a SELECT query sees - a snapshot of the database at the instant the query begins to + a snapshot of the database as of the instant the query begins to run. However, SELECT does see the effects of previous updates executed within its own transaction, even though they are not yet committed. Also note that two successive @@ -260,7 +260,7 @@ FOR UPDATE, and SELECT FOR SHARE commands behave the same as SELECT in terms of searching for target rows: they will only find target rows - that were committed before the command start time. However, such a target + that were committed as of the command start time. However, such a target row might have already been updated (or deleted or locked) by another concurrent transaction by the time it is found. In this case, the would-be updater will wait for the first updating transaction to commit or @@ -367,16 +367,17 @@ COMMIT; transaction began; it never sees either uncommitted data or changes committed during transaction execution by concurrent transactions. (However, - SELECT does see the effects of previous updates + the query does see the effects of previous updates executed within its own transaction, even though they are not yet committed.) This is different from Read Committed in that - SELECT in a serializable transaction - sees a snapshot as of the start of the transaction, not as of the start + a query in a serializable transaction + sees a snapshot as of the start of the transaction, + not as of the start of the current query within the transaction. Thus, successive SELECT commands within a single - transaction see the same data, i.e. they never see changes made by - transactions that committed after its own transaction started. (This - behavior can be ideal for reporting applications.) + transaction see the same data, i.e., they do not see changes made by + other transactions that committed after their own transaction started. + (This behavior can be ideal for reporting applications.) @@ -384,7 +385,7 @@ COMMIT; FOR UPDATE, and SELECT FOR SHARE commands behave the same as SELECT in terms of searching for target rows: they will only find target rows - that were committed before the transaction start time. However, such a + that were committed as of the transaction start time. However, such a target row might have already been updated (or deleted or locked) by another concurrent transaction by the time it is found. In this case, the @@ -666,9 +667,10 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Conflicts all lock modes except ACCESS SHARE, - ROW SHARE, and SHARE (it - does not conflict with itself). + Conflicts with the ROW EXCLUSIVE, + SHARE UPDATE EXCLUSIVE, SHARE ROW + EXCLUSIVE, EXCLUSIVE, and + ACCESS EXCLUSIVE lock modes. This mode protects a table against concurrent data changes. @@ -685,8 +687,11 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Conflicts all lock modes except ACCESS SHARE - and ROW SHARE. + Conflicts with the ROW EXCLUSIVE, + SHARE UPDATE EXCLUSIVE, + SHARE, SHARE ROW + EXCLUSIVE, EXCLUSIVE, and + ACCESS EXCLUSIVE lock modes. @@ -702,7 +707,11 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Conflicts all lock modes except ACCESS SHARE. + Conflicts with the ROW SHARE, ROW + EXCLUSIVE, SHARE UPDATE + EXCLUSIVE, SHARE, SHARE + ROW EXCLUSIVE, EXCLUSIVE, and + ACCESS EXCLUSIVE lock modes. This mode allows only concurrent ACCESS SHARE locks, i.e., only reads from the table can proceed in parallel with a transaction holding this lock mode. @@ -711,7 +720,7 @@ SELECT SUM(value) FROM mytab WHERE class = 2; This lock mode is not automatically acquired on user tables by any PostgreSQL command. However it is - acquired during certain internal system catalogs operations. + acquired on certain system catalogs in some operations. @@ -722,7 +731,12 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Conflicts with all lock modes. + Conflicts with locks of all modes (ACCESS + SHARE, ROW SHARE, ROW + EXCLUSIVE, SHARE UPDATE + EXCLUSIVE, SHARE, SHARE + ROW EXCLUSIVE, EXCLUSIVE, and + ACCESS EXCLUSIVE). This mode guarantees that the holder is the only transaction accessing the table in any way. @@ -749,7 +763,7 @@ SELECT SUM(value) FROM mytab WHERE class = 2; Once acquired, a lock is normally held till end of transaction. But if a lock is acquired after establishing a savepoint, the lock is released - immediately if the savepoint is rolled back. This is consistent with + immediately if the savepoint is rolled back to. This is consistent with the principle that ROLLBACK cancels all effects of the commands since the savepoint. The same holds for locks acquired within a PL/pgSQL exception block: an error escape from the block @@ -882,8 +896,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2; can be exclusive or shared locks. An exclusive row-level lock on a specific row is automatically acquired when the row is updated or deleted. The lock is held until the transaction commits or rolls - back, like table-level locks. Row-level locks do - not affect data querying; they only block writers to the same + back, just like table-level locks. Row-level locks do + not affect data querying; they block only writers to the same row. @@ -918,7 +932,7 @@ SELECT SUM(value) FROM mytab WHERE class = 2; used to control read/write access to table pages in the shared buffer pool. These locks are released immediately after a row is fetched or updated. Application developers normally need not be concerned with - page-level locks, but they are mentioned for completeness. + page-level locks, but they are mentioned here for completeness. @@ -1100,7 +1114,7 @@ SELECT pg_advisory_lock(q.id) FROM after the current query began). The row might have been modified or deleted by an already-committed transaction that committed after the SELECT started. - Even if the row is still valid now, it could be changed or + Even if the row is still valid now, it could be changed or deleted before the current transaction does a commit or rollback. @@ -1121,7 +1135,7 @@ SELECT pg_advisory_lock(q.id) FROM concurrent updates one must use SELECT FOR UPDATE, SELECT FOR SHARE, or an appropriate LOCK TABLE statement. (SELECT FOR UPDATE - or SELECT FOR SHARE lock just the + and SELECT FOR SHARE lock just the returned rows against concurrent updates, while LOCK TABLE locks the whole table.) This should be taken into account when porting applications to @@ -1151,9 +1165,9 @@ SELECT pg_advisory_lock(q.id) FROM - Note also that if one is - relying on explicit locking to prevent concurrent changes, one should use - either Read Committed mode, or in Serializable mode be careful to obtain + Note also that if one is relying on explicit locking to prevent concurrent + changes, one should either use Read Committed mode, or in Serializable + mode be careful to obtain locks before performing queries. A lock obtained by a serializable transaction guarantees that no other transactions modifying the table are still running, but if the snapshot seen by the @@ -1162,7 +1176,7 @@ SELECT pg_advisory_lock(q.id) FROM frozen at the start of its first query or data-modification command (SELECT, INSERT, UPDATE, or DELETE), so - it is often desirable to obtain locks explicitly before the snapshot is + it is possible to obtain locks explicitly before the snapshot is frozen. @@ -1178,7 +1192,7 @@ SELECT pg_advisory_lock(q.id) FROM Though PostgreSQL provides nonblocking read/write access to table - data, nonblocking read/write access is currently not offered for every + data, nonblocking read/write access is not currently offered for every index access method implemented in PostgreSQL. The various index types are handled as follows: @@ -1221,8 +1235,8 @@ SELECT pg_advisory_lock(q.id) FROM Short-term share/exclusive page-level locks are used for read/write access. Locks are released immediately after each - index row is fetched or inserted. But note insertion of a GIN-indexed - value usually produces several index key insertions + index row is fetched or inserted. But note that insertion of a + GIN-indexed value usually produces several index key insertions per row, so GIN might do substantial work for a single value's insertion. diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 8744a5cb31..1a631d3d91 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1,4 +1,4 @@ - + Performance Tips @@ -45,8 +45,9 @@ table access methods: sequential scans, index scans, and bitmap index scans. If the query requires joining, aggregation, sorting, or other operations on the raw rows, then there will be additional nodes - above the scan nodes to perform these operations. Other nodes types - are also supported. The output + above the scan nodes to perform these operations. Again, + there is usually more than one possible way to do these operations, + so different node types can appear here too. The output of EXPLAIN has one line for each node in the plan tree, showing the basic node type plus the cost estimates that the planner made for the execution of that plan node. The first line (topmost node) @@ -83,24 +84,24 @@ EXPLAIN SELECT * FROM tenk1; - Estimated start-up cost, e.g., time expended before the output scan can start, - time to do the sorting in a sort node + Estimated start-up cost (time expended before the output scan can start, + e.g., time to do the sorting in a sort node) - Estimated total cost if all rows were to be retrieved (though they might - not be, e.g., a query with a LIMIT clause will stop - short of paying the total cost of the Limit node's + Estimated total cost (if all rows are retrieved, though they might + not be; e.g., a query with a LIMIT clause will stop + short of paying the total cost of the Limit plan node's input node) - Estimated number of rows output by this plan node (Again, only if - executed to completion.) + Estimated number of rows output by this plan node (again, only if + executed to completion) @@ -129,18 +130,18 @@ EXPLAIN SELECT * FROM tenk1; the cost only reflects things that the planner cares about. In particular, the cost does not consider the time spent transmitting result rows to the client, which could be an important - factor in the total elapsed time; but the planner ignores it because + factor in the real elapsed time; but the planner ignores it because it cannot change it by altering the plan. (Every correct plan will output the same row set, we trust.) - The EXPLAIN rows= value is a little tricky + The rows value is a little tricky because it is not the number of rows processed or scanned by the plan node. It is usually less, reflecting the estimated selectivity of any WHERE-clause conditions that are being - applied to the node. Ideally the top-level rows estimate will + applied at the node. Ideally the top-level rows estimate will approximate the number of rows actually returned, updated, or deleted by the query. @@ -197,7 +198,7 @@ EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 7000; - The actual number of rows this query would select is 7000, but the rows= + The actual number of rows this query would select is 7000, but the rows estimate is only approximate. If you try to duplicate this experiment, you will probably get a slightly different estimate; moreover, it will change after each ANALYZE command, because the @@ -234,7 +235,7 @@ EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 100; If the WHERE condition is selective enough, the planner might - switch to a simple index scan plan: + switch to a simple index scan plan: EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 3; @@ -248,8 +249,8 @@ EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 3; In this case the table rows are fetched in index order, which makes them even more expensive to read, but there are so few that the extra cost of sorting the row locations is not worth it. You'll most often see - this plan type in queries that fetch just a single row, and for queries - with an ORDER BY condition that matches the index + this plan type for queries that fetch just a single row, and for queries + that have an ORDER BY condition that matches the index order. @@ -320,7 +321,7 @@ WHERE t1.unique1 < 100 AND t1.unique2 = t2.unique2; - In this nested-loop join, the outer scan (upper) is the same bitmap index scan we + In this nested-loop join, the outer (upper) scan is the same bitmap index scan we saw earlier, and so its cost and row count are the same because we are applying the WHERE clause unique1 < 100 at that node. @@ -409,7 +410,7 @@ WHERE t1.unique1 < 100 AND t1.unique2 = t2.unique2;
Note that the actual time values are in milliseconds of - real time, whereas the cost= estimates are expressed in + real time, whereas the cost estimates are expressed in arbitrary units; so they are unlikely to match up. The thing to pay attention to is whether the ratios of actual time and estimated costs are consistent. @@ -419,11 +420,11 @@ WHERE t1.unique1 < 100 AND t1.unique2 = t2.unique2; In some query plans, it is possible for a subplan node to be executed more than once. For example, the inner index scan is executed once per outer row in the above nested-loop plan. In such cases, the - loops= value reports the + loops value reports the total number of executions of the node, and the actual time and rows values shown are averages per-execution. This is done to make the numbers comparable with the way that the cost estimates are shown. Multiply by - the loops= value to get the total time actually spent in + the loops value to get the total time actually spent in the node. @@ -780,7 +781,7 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse; - When doing INSERTs, turn off autocommit and just do + When using multiple INSERTs, turn off autocommit and just do one commit at the end. (In plain SQL, this means issuing BEGIN at the start and COMMIT at the end. Some client libraries might @@ -824,7 +825,7 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse; Note that loading a large number of rows using COPY is almost always faster than using - INSERT, even if the PREPARE ... INSERT is used and + INSERT, even if PREPARE is used and multiple insertions are batched into a single transaction. diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 296ad5bb94..19c76b8ec7 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -1,4 +1,4 @@ - + PostgreSQL database system. Readers are encouraged to look at and + Queries @@ -133,8 +133,8 @@ FROM table_reference , table_r When a table reference names a table that is the parent of a - table inheritance hierarchy, the table reference produces rows - not only of that table but all of its descendant tables, unless the + table inheritance hierarchy, the table reference produces rows of + not only that table but all of its descendant tables, unless the key word ONLY precedes the table name. However, the reference produces only the columns that appear in the named table — any columns added in subtables are ignored. @@ -174,12 +174,12 @@ FROM table_reference , table_r - Produce every possible combination of rows from + For every possible combination of rows from T1 and T2 (i.e., a Cartesian product), - with output columns consisting of - all T1 columns - followed by all T2 columns. If + the joined table will contain a + row consisting of all columns in T1 + followed by all columns in T2. If the tables have N and M rows respectively, the joined table will have N * M rows. @@ -245,7 +245,7 @@ FROM table_reference , table_r equality of each of these pairs of columns. Furthermore, the output of JOIN USING has one column for each of the equated pairs of input columns, followed by the - other columns from each table. Thus, USING (a, b, + remaining columns from each table. Thus, USING (a, b, c) is equivalent to ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) with the exception that if ON is used there will be two columns @@ -300,7 +300,7 @@ FROM table_reference , table_r First, an inner join is performed. Then, for each row in T1 that does not satisfy the join condition with any row in - T2, a row is added with null values in columns of + T2, a joined row is added with null values in columns of T2. Thus, the joined table always has at least one row for each row in T1. @@ -323,7 +323,7 @@ FROM table_reference , table_r First, an inner join is performed. Then, for each row in T2 that does not satisfy the join condition with any row in - T1, a row is added with null values in columns of + T1, a joined row is added with null values in columns of T1. This is the converse of a left join: the result table will always have a row for each row in T2. @@ -337,9 +337,9 @@ FROM table_reference , table_r First, an inner join is performed. Then, for each row in T1 that does not satisfy the join condition with any row in - T2, a row is added with null values in columns of + T2, a joined row is added with null values in columns of T2. Also, for each row of T2 that does not satisfy the - join condition with any row in T1, a row with null + join condition with any row in T1, a joined row with null values in the columns of T1 is added. @@ -575,7 +575,7 @@ FROM table_reference AS When an alias is applied to the output of a JOIN clause, the alias hides the original - name referenced in the JOIN. For example: + name(s) within the JOIN. For example: SELECT a.* FROM my_table AS a JOIN your_table AS b ON ... @@ -686,8 +686,7 @@ SELECT * FROM vw_getfoo; In some cases it is useful to define table functions that can return different column sets depending on how they are invoked. To support this, the table function can be declared as returning - the pseudotype record, rather than SET OF. - When such a function is used in + the pseudotype record. When such a function is used in a query, the expected row structure must be specified in the query itself, so that the system can know how to parse and plan the query. Consider this example: @@ -757,11 +756,11 @@ FROM a NATURAL JOIN b WHERE b.val > 5 probably not as portable to other SQL database management systems, even though it is in the SQL standard. For outer joins there is no choice: they must be done in - the FROM clause. The ON/USING + the FROM clause. The ON or USING clause of an outer join is not equivalent to a - WHERE condition, because it affects the addition + WHERE condition, because it results in the addition of rows (for unmatched input rows) as well as the removal of rows - from the final result. + in the final result. @@ -780,7 +779,7 @@ SELECT ... FROM fdt WHERE c1 BETWEEN (SELECT c3 FROM t2 WHERE c2 = fdt.c1 + 10) SELECT ... FROM fdt WHERE EXISTS (SELECT c1 FROM t2 WHERE c2 > fdt.c1) - fdt is the table used in the + fdt is the table derived in the FROM clause. Rows that do not meet the search condition of the WHERE clause are eliminated from fdt. Notice the use of scalar subqueries as @@ -860,7 +859,7 @@ SELECT select_list In general, if a table is grouped, columns that are not - the same in the group cannot be referenced except in aggregate + listed in GROUP BY cannot be referenced except in aggregate expressions. An example with aggregate expressions is: => SELECT x, sum(y) FROM test1 GROUP BY x; @@ -880,7 +879,7 @@ SELECT select_list Grouping without aggregate expressions effectively calculates the - set of distinct values in a column. This can more clearly be achieved + set of distinct values in a column. This can also be achieved using the DISTINCT clause (see ). @@ -1088,7 +1087,7 @@ SELECT tbl1.*, tbl2.a FROM ... the row's values substituted for any column references. But the expressions in the select list do not have to reference any columns in the table expression of the FROM clause; - they can be constant arithmetic expressions as well. + they can be constant arithmetic expressions, for instance. @@ -1101,8 +1100,8 @@ SELECT tbl1.*, tbl2.a FROM ... - The entries in the select list can be assigned names for further - processing, perhaps for reference in an ORDER BY clause + The entries in the select list can be assigned names for subsequent + processing, such as for use in an ORDER BY clause or for display by the client application. For example: SELECT a AS value, b + c AS sum FROM ... @@ -1141,7 +1140,7 @@ SELECT a "value", b + c AS sum FROM ... The naming of output columns here is different from that done in the FROM clause (see ). It is possible - to rename the same column twice, but the name used in + to rename the same column twice, but the name assigned in the select list is the one that will be passed on. @@ -1346,9 +1345,9 @@ SELECT a, b FROM table1 ORDER BY a + b, c; The NULLS FIRST and NULLS LAST options can be used to determine whether nulls appear before or after non-null values - in the sort ordering. The default behavior is for null values sort as - if larger than all non-null values (NULLS FIRST), except - in DESC ordering, where NULLS LAST is the default. + in the sort ordering. By default, null values sort as if larger than any + non-null value; that is, NULLS FIRST is the default for + DESC order, and NULLS LAST otherwise. @@ -1366,7 +1365,7 @@ SELECT a + b AS sum, c FROM table1 ORDER BY sum; SELECT a, max(b) FROM table1 GROUP BY a ORDER BY 1; both of which sort by the first output column. Note that an output - column name has to stand alone, e.g., it cannot be used in an expression + column name has to stand alone, that is, it cannot be used in an expression — for example, this is not correct: SELECT a + b AS sum, c FROM table1 ORDER BY sum + c; -- wrong @@ -1429,10 +1428,10 @@ SELECT select_list When using LIMIT, it is important to use an - ORDER BY clause that constrains the result rows in a + ORDER BY clause that constrains the result rows into a unique order. Otherwise you will get an unpredictable subset of the query's rows. You might be asking for the tenth through - twentieth rows, but tenth through twentieth using what ordering? The + twentieth rows, but tenth through twentieth in what ordering? The ordering is unknown, unless you specified ORDER BY. @@ -1472,7 +1471,7 @@ SELECT select_list VALUES ( expression [, ...] ) [, ...] - Each parenthesized list of expressions generates a row in the table expression. + Each parenthesized list of expressions generates a row in the table. The lists must all have the same number of elements (i.e., the number of columns in the table), and corresponding entries in each list must have compatible data types. The actual data type assigned to each column diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index c81c321134..49a1221ea0 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -1,4 +1,4 @@ - + The <acronym>SQL</acronym> Language @@ -53,7 +53,7 @@
The \i command reads in commands from the - specified file. The psql -s option puts you in + specified file. psql's -s option puts you in single step mode which pauses before sending each statement to the server. The commands used in this section are in the file basics.sql. @@ -150,7 +150,7 @@ CREATE TABLE weather ( int is the normal integer type. real is a type for storing single precision floating-point numbers. date should be self-explanatory. (Yes, the column of - type date is also named date. + type date is also named date. This might be convenient or confusing — you choose.) @@ -165,7 +165,7 @@ CREATE TABLE weather ( and a rich set of geometric types. PostgreSQL can be customized with an arbitrary number of user-defined data types. Consequently, type - names are not special key words in the syntax except where required to + names are not key words in the syntax, except where required to support special cases in the SQL standard. @@ -291,7 +291,7 @@ COPY weather FROM '/home/user/weather.txt'; tables from which to retrieve the data), and an optional qualification (the part that specifies any restrictions). For example, to retrieve all the rows of table - weather, type: + weather, type: SELECT * FROM weather; @@ -450,9 +450,10 @@ SELECT DISTINCT city of the same or different tables at one time is called a join query. As an example, say you wish to list all the weather records together with the location of the - associated city. To do that, we need to compare the city column of - each row of the weather table with the name column of all rows in - the cities table, and select the pairs of rows where these values match. + associated city. To do that, we need to compare the city + column of each row of the weather table with the + name column of all rows in the cities + table, and select the pairs of rows where these values match. This is only a conceptual model. The join is usually performed @@ -485,8 +486,8 @@ SELECT * There is no result row for the city of Hayward. This is because there is no matching entry in the - cities table for Hayward, so the join - ignores the unmatched rows in the weather table. We will see + cities table for Hayward, so the join + ignores the unmatched rows in the weather table. We will see shortly how this can be fixed. @@ -494,9 +495,9 @@ SELECT * There are two columns containing the city name. This is - correct because the columns from the - weather and the - cities tables are concatenated. In + correct because the lists of columns from the + weather and + cities tables are concatenated. In practice this is undesirable, though, so you will probably want to list the output columns explicitly rather than using *: @@ -556,10 +557,10 @@ SELECT * Now we will figure out how we can get the Hayward records back in. What we want the query to do is to scan the - weather table and for each row to find the - matching cities row(s). If no matching row is + weather table and for each row to find the + matching cities row(s). If no matching row is found we want some empty values to be substituted - for the cities table's columns. This kind + for the cities table's columns. This kind of query is called an outer join. (The joins we have seen so far are inner joins.) The command looks like this: @@ -603,10 +604,10 @@ SELECT * to find all the weather records that are in the temperature range of other weather records. So we need to compare the temp_lo and temp_hi columns of - each weather row to the + each weather row to the temp_lo and temp_hi columns of all other - weather rows. We can do this with the + weather rows. We can do this with the following query: @@ -756,7 +757,7 @@ SELECT city, max(temp_lo) which gives us the same results for only the cities that have all - temp_lo values below 40. Finally, if we only care about + temp_lo values below 40. Finally, if we only care about cities whose names begin with S, we might do: diff --git a/doc/src/sgml/rowtypes.sgml b/doc/src/sgml/rowtypes.sgml index d699c39f4a..a95f4c583d 100644 --- a/doc/src/sgml/rowtypes.sgml +++ b/doc/src/sgml/rowtypes.sgml @@ -1,4 +1,4 @@ - + Composite Types @@ -41,7 +41,7 @@ CREATE TYPE inventory_item AS ( NULL) can presently be included. Note that the AS keyword is essential; without it, the system will think a different kind of CREATE TYPE command is meant, and you will get odd syntax - error. + errors. @@ -68,8 +68,8 @@ SELECT price_extension(item, 10) FROM on_hand; - Whenever you create a table, a composite type is automatically - created also, with the same name as the table, to represent the table's + Whenever you create a table, a composite type is also automatically + created, with the same name as the table, to represent the table's row type. For example, had we said: CREATE TABLE inventory_item ( @@ -250,7 +250,7 @@ INSERT INTO mytab (complex_col.r, complex_col.i) VALUES(1.1, 2.2); The external text representation of a composite value consists of items that are interpreted according to the I/O conversion rules for the individual field types, plus decoration that indicates the composite structure. - The decoration consists of parentheses + The decoration consists of parentheses (( and )) around the whole value, plus commas (,) between adjacent items. Whitespace outside the parentheses is ignored, but within the parentheses it is considered part of the field value, and might or might not be @@ -264,7 +264,7 @@ INSERT INTO mytab (complex_col.r, complex_col.i) VALUES(1.1, 2.2); - As shown previously, when writing a composite value you can use double + As shown previously, when writing a composite value you can write double quotes around any individual field value. You must do so if the field value would otherwise confuse the composite-value parser. In particular, fields containing diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml index 11bd7895d1..863011a9dd 100644 --- a/doc/src/sgml/start.sgml +++ b/doc/src/sgml/start.sgml @@ -1,4 +1,4 @@ - + Getting Started @@ -74,7 +74,7 @@ A server process, which manages the database files, accepts connections to the database from client applications, and - performs database actions on the behalf of the clients. The + performs database actions on behalf of the clients. The database server program is called postgres. postgres @@ -164,8 +164,8 @@ createdb: command not found then PostgreSQL was not installed properly. Either it was not - installed at all or your shell's search path was not set correctly. Try - calling the command with an absolute path instead: + installed at all or your shell's search path was not set to include it. + Try calling the command with an absolute path instead: $ /usr/local/pgsql/bin/createdb mydb @@ -177,8 +177,7 @@ createdb: command not found Another response could be this: -createdb: could not connect to database postgres: could not connect -to server: No such file or directory +createdb: could not connect to database postgres: could not connect to server: No such file or directory Is the server running locally and accepting connections on Unix domain socket "/tmp/.s.PGSQL.5432"? diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index 8e4df1bdff..ee7f814ea8 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1,4 +1,4 @@ - + @@ -135,8 +135,9 @@ main file (a/k/a main fork), each table and index has a free space map (see ), which stores information about free space available in the relation. The free space map is stored in a file named with the filenode number plus the suffix _fsm. Tables also have a -visibility map fork, with the suffix _vm, to track which pages are -known to have no dead tuples and therefore need no vacuuming. +visibility map, stored in a fork with the suffix +_vm, to track which pages are known to have no dead tuples. +The visibility map is described further in . @@ -417,6 +418,38 @@ information stored in free space maps (see ). + + +Visibility Map + + + Visibility Map + +VMVisibility Map + + +Each heap relation has a Visibility Map +(VM) to keep track of which pages contain only tuples that are known to be +visible to all active transactions. It's stored +alongside the main relation data in a separate relation fork, named after the +filenode number of the relation, plus a _vm suffix. For example, +if the filenode of a relation is 12345, the VM is stored in a file called +12345_vm, in the same directory as the main relation file. +Note that indexes do not have VMs. + + + +The visibility map simply stores one bit per heap page. A set bit means +that all tuples on the page are known to be visible to all transactions. +This means that the page does not contain any tuples that need to be vacuumed; +in future it might also be used to avoid visiting the page for visibility +checks. The map is conservative in the sense that we +make sure that whenever a bit is set, we know the condition is true, but if +a bit is not set, it might or might not be true. + + + + Database Page Layout diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index cf929f0b72..bc562e1f97 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,4 +1,4 @@ - + SQL Syntax @@ -442,7 +442,7 @@ SELECT 'foo' 'bar'; - The zero-byte (null byte) character cannot be in a string constant. + The character with the code zero cannot be in a string constant. @@ -929,8 +929,8 @@ CAST ( 'string' AS type ) - Comment are removed from the input stream before further syntax - analysis and are effectively replaced by whitespace. + A comment is removed from the input stream before further syntax + analysis and is effectively replaced by whitespace. @@ -1244,9 +1244,9 @@ SELECT 3 OPERATOR(pg_catalog.+) 4; - Another value expression in parentheses, useful to group + Another value expression in parentheses (used to group subexpressions and override - precedence.parenthesis + precedenceparenthesis) @@ -1725,7 +1725,7 @@ CAST ( expression AS type casts that are marked OK to apply implicitly in the system catalogs. Other casts must be invoked with explicit casting syntax. This restriction is intended to prevent - surprising conversions from being silently applied. + surprising conversions from being applied silently. @@ -1805,7 +1805,7 @@ SELECT name, (SELECT max(pop) FROM cities WHERE cities.state = states.name) An array constructor is an expression that builds an - array using values for its member elements. A simple array + array value using values for its member elements. A simple array constructor consists of the key word ARRAY, a left square bracket [, a list of expressions (separated by commas) for the @@ -1936,7 +1936,7 @@ SELECT ARRAY(SELECT oid FROM pg_proc WHERE proname LIKE 'bytea%'); - A row constructor is an expression that builds a row (also + A row constructor is an expression that builds a row value (also called a composite value) using values for its member fields. A row constructor consists of the key word ROW, a left parenthesis, zero or more diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index d3e7a148ea..547c0153ac 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1,4 +1,4 @@ - + Full Text Search @@ -389,7 +389,7 @@ text @@ text Text search parsers and templates are built from low-level C functions; - therefore C programming ability is required to develop new ones, and + therefore it requires C programming ability to develop new ones, and superuser privileges to install one into a database. (There are examples of add-on parsers and templates in the contrib/ area of the PostgreSQL distribution.) Since dictionaries and @@ -519,7 +519,7 @@ CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector(config_name, body)); recording which configuration was used for each index entry. This would be useful, for example, if the document collection contained documents in different languages. Again, - queries that wish to use the index must be phrased to match, e.g., + queries that are meant to use the index must be phrased to match, e.g., WHERE to_tsvector(config_name, body) @@ 'a & b'. @@ -860,7 +860,8 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); - ts_rank( weights float4[], vector tsvector, query tsquery , normalization integer ) returns float4 + ts_rank( weights float4[], vector tsvector, + query tsquery , normalization integer ) returns float4 @@ -1042,7 +1043,7 @@ LIMIT 10; Ranking can be expensive since it requires consulting the tsvector of each matching document, which can be I/O bound and therefore slow. Unfortunately, it is almost impossible to avoid since - practical queries often result in a large number of matches. + practical queries often result in large numbers of matches. @@ -1068,7 +1069,7 @@ LIMIT 10; ts_headline accepts a document along - with a query, and returns an excerpt of + with a query, and returns an excerpt from the document in which terms from the query are highlighted. The configuration to be used to parse the document can be specified by config; if config @@ -1085,8 +1086,8 @@ LIMIT 10; - StartSel, StopSel: the strings to delimit - query words appearing in the document, to distinguish + StartSel, StopSel: the strings with + which to delimit query words appearing in the document, to distinguish them from other excerpted words. You must double-quote these strings if they contain spaces or commas. @@ -1188,7 +1189,7 @@ SELECT id, ts_headline(body, q), rank FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank FROM apod, to_tsquery('stars') q WHERE ti @@ q - ORDER BY rank DESC + ORDER BY rank DESC LIMIT 10) AS foo; @@ -1678,9 +1679,9 @@ SELECT title, body FROM messages WHERE tsv @@ to_tsquery('title & body'); - A limitation of built-in triggers is that they treat all the + A limitation of these built-in triggers is that they treat all the input columns alike. To process columns differently — for - example, to weigh title differently from body — it is necessary + example, to weight title differently from body — it is necessary to write a custom trigger. Here is an example using PL/pgSQL as the trigger language: @@ -1722,8 +1723,8 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger(); - ts_stat(sqlquery text, weights text, - OUT word text, OUT ndoc integer, + ts_stat(sqlquery text, weights text, + OUT word text, OUT ndoc integer, OUT nentry integer) returns setof record @@ -2087,7 +2088,7 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h by the parser, each dictionary in the list is consulted in turn, until some dictionary recognizes it as a known word. If it is identified as a stop word, or if no dictionary recognizes the token, it will be - discarded and not indexed or searched. + discarded and not indexed or searched for. The general rule for configuring a list of dictionaries is to place first the most narrow, most specific dictionary, then the more general dictionaries, finishing with a very general dictionary, like @@ -2439,7 +2440,7 @@ CREATE TEXT SEARCH DICTIONARY thesaurus_simple ( ALTER TEXT SEARCH CONFIGURATION russian - ALTER MAPPING FOR asciiword, asciihword, hword_asciipart + ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH thesaurus_simple; @@ -2679,9 +2680,9 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( - As an example, we will create a configuration - pg by duplicating the built-in - english configuration. + As an example we will create a configuration + pg, starting by duplicating the built-in + english configuration: CREATE TEXT SEARCH CONFIGURATION public.pg ( COPY = pg_catalog.english ); @@ -3137,7 +3138,7 @@ SELECT plainto_tsquery('supernovae stars'); - There are two kinds of indexes which can be used to speed up full text + There are two kinds of indexes that can be used to speed up full text searches. Note that indexes are not mandatory for full text searching, but in cases where a column is searched on a regular basis, an index is @@ -3204,7 +3205,7 @@ SELECT plainto_tsquery('supernovae stars'); to check the actual table row to eliminate such false matches. (PostgreSQL does this automatically when needed.) GiST indexes are lossy because each document is represented in the - index using a fixed-length signature. The signature is generated by hashing + index by a fixed-length signature. The signature is generated by hashing each word into a random bit in an n-bit string, with all these bits OR-ed together to produce an n-bit document signature. When two words hash to the same bit position there will be a false match. If all words in diff --git a/doc/src/sgml/typeconv.sgml b/doc/src/sgml/typeconv.sgml index beb74f9a57..a1b04ce417 100644 --- a/doc/src/sgml/typeconv.sgml +++ b/doc/src/sgml/typeconv.sgml @@ -1,4 +1,4 @@ - + Type Conversion @@ -161,7 +161,7 @@ categories, including boolean, numeric, user-defined. (For a list see ; but note it is also possible to create custom type categories.) Within each category there can be one or more preferred types, which -are selected when there is ambiguity. With careful selection +are preferred when there is a choice of possible types. With careful selection of preferred types and available implicit casts, it is possible to ensure that ambiguous expressions (those with multiple candidate parsing solutions) can be resolved in a useful way. @@ -189,7 +189,7 @@ calls in the query. Additionally, if a query usually requires an implicit conversion for a function, and if then the user defines a new function with the correct argument types, the parser -should use this new function and no longer do implicit conversion using the old function. +should use this new function and no longer do implicit conversion to use the old function. @@ -206,10 +206,12 @@ should use this new function and no longer do implicit conversion using the old - The specific operator invoked is determined by the following - steps. Note that this procedure is affected - by the precedence of the involved operators. See for more information. + The specific operator that is referenced by an operator expression + is determined using the following procedure. + Note that this procedure is indirectly affected + by the precedence of the involved operators, since that will determine + which sub-expressions are taken to be the inputs of which operators. + See for more information. @@ -220,7 +222,7 @@ should use this new function and no longer do implicit conversion using the old Select the operators to be considered from the pg_operator system catalog. If a non-schema-qualified operator name was used (the usual case), the operators -considered are those with a matching name and argument count that are +considered are those with the matching name and argument count that are visible in the current search path (see ). If a qualified operator name was given, only operators in the specified schema are considered. @@ -250,8 +252,8 @@ operators considered), use it. If one argument of a binary operator invocation is of the unknown type, then assume it is the same type as the other argument for this check. -Cases involving two unknown types will never find a match at -this step. +Invocations involving two unknown inputs, or a unary operator +with an unknown input, will never find a match at this step. @@ -390,9 +392,9 @@ In this case there is no initial hint for which type to use, since no types are specified in the query. So, the parser looks for all candidate operators and finds that there are candidates accepting both string-category and bit-string-category inputs. Since string category is preferred when available, -that category is selected, and the +that category is selected, and then the preferred type for strings, text, is used as the specific -type to resolve the unknown literals. +type to resolve the unknown literals as. @@ -459,8 +461,8 @@ SELECT ~ CAST('20' AS int8) AS "negation"; - The specific function to be invoked is determined - according to the following steps. + The specific function that is referenced by a function call + is determined using the following procedure. @@ -471,7 +473,7 @@ SELECT ~ CAST('20' AS int8) AS "negation"; Select the functions to be considered from the pg_proc system catalog. If a non-schema-qualified function name was used, the functions -considered are those with a matching name and argument count that are +considered are those with the matching name and argument count that are visible in the current search path (see ). If a qualified function name was given, only functions in the specified schema are considered. @@ -554,7 +556,7 @@ Look for the best match. -Discard candidate functions in which the input types do not match +Discard candidate functions for which the input types do not match and cannot be converted (using an implicit conversion) to match. unknown literals are assumed to be convertible to anything for this purpose. If only one @@ -615,9 +617,10 @@ Some examples follow. Rounding Function Argument Type Resolution -There is only one round function which takes two -arguments; it takes a first argument of numeric and -a second argument of integer. So the following query automatically converts +There is only one round function that takes two +arguments; it takes a first argument of type numeric and +a second argument of type integer. +So the following query automatically converts the first argument of type integer to numeric: