From a2367f89ffc8de48fe681d2187eb108e3f469adc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 5 Sep 2009 23:58:01 +0000 Subject: [PATCH] Add a note warning that COPY BINARY is very datatype-specific. Per a complaint from Gordon Shannon. --- doc/src/sgml/ref/copy.sgml | 66 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 2ea68de912..8ab9ff7eca 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -1,5 +1,5 @@ @@ -24,30 +24,30 @@ PostgreSQL documentation COPY tablename [ ( column [, ...] ) ] FROM { 'filename' | STDIN } - [ [ WITH ] + [ [ WITH ] [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] [ NULL [ AS ] 'null string' ] [ CSV [ HEADER ] - [ QUOTE [ AS ] 'quote' ] + [ QUOTE [ AS ] 'quote' ] [ ESCAPE [ AS ] 'escape' ] [ FORCE NOT NULL column [, ...] ] COPY { tablename [ ( column [, ...] ) ] | ( query ) } TO { 'filename' | STDOUT } - [ [ WITH ] + [ [ WITH ] [ BINARY ] [ OIDS ] [ DELIMITER [ AS ] 'delimiter' ] [ NULL [ AS ] 'null string' ] [ CSV [ HEADER ] - [ QUOTE [ AS ] 'quote' ] + [ QUOTE [ AS ] 'quote' ] [ ESCAPE [ AS ] 'escape' ] [ FORCE QUOTE { column [, ...] | * } ] - + Description @@ -79,7 +79,7 @@ COPY { tablename [ ( - + Parameters @@ -215,7 +215,7 @@ COPY { tablename [ ( Specifies that the file contains a header line with the names of each - column in the file. On output, the first line contains the column + column in the file. On output, the first line contains the column names from the table, and on input, the first line is ignored. @@ -249,7 +249,7 @@ COPY { tablename [ ( CSV COPY TO mode, forces quoting to be used for all non-NULL values in each specified column. NULL output is never quoted. If * is specified, - non-NULL values for all columns of the table will be + non-NULL values for all columns of the table will be quoted. @@ -300,6 +300,10 @@ COPY count somewhat faster than the normal text mode, but a binary-format file is less portable across machine architectures and PostgreSQL versions. + Also, the binary format is very data type specific; for example + it will not work to output binary data from a smallint column + and read it into an integer column, even though that would work + fine in text format. @@ -379,7 +383,7 @@ COPY count - + File Formats @@ -479,7 +483,7 @@ COPY count - Presently, COPY TO will never emit an octal or + Presently, COPY TO will never emit an octal or hex-digits backslash sequence, but it does use the other sequences listed above for those control characters. @@ -498,7 +502,7 @@ COPY count data newlines and carriage returns to the \n and \r sequences respectively. At present it is possible to represent a data carriage return by a backslash and carriage - return, and to represent a data newline by a backslash and newline. + return, and to represent a data newline by a backslash and newline. However, these representations might not be accepted in future releases. They are also highly vulnerable to corruption if the COPY file is transferred across different machines (for example, from Unix to Windows @@ -506,7 +510,7 @@ COPY count - COPY TO will terminate each row with a Unix-style + COPY TO will terminate each row with a Unix-style newline (\n). Servers running on Microsoft Windows instead output carriage return/newline (\r\n), but only for COPY to a server file; for consistency across platforms, @@ -543,7 +547,7 @@ COPY count non-NULL values in specific columns. - + The CSV format has no standard way to distinguish a NULL value from an empty string. PostgreSQL's COPY handles this by @@ -557,28 +561,28 @@ COPY count comparisons for specific columns. - + Because backslash is not a special character in the CSV format, \., the end-of-data marker, could also appear as a data value. To avoid any misinterpretation, a \. - data value appearing as a lone entry on a line is automatically - quoted on output, and on input, if quoted, is not interpreted as the + data value appearing as a lone entry on a line is automatically + quoted on output, and on input, if quoted, is not interpreted as the end-of-data marker. If you are loading a file created by another - application that has a single unquoted column and might have a - value of \., you might need to quote that value in the + application that has a single unquoted column and might have a + value of \., you might need to quote that value in the input file. - In CSV mode, all characters are significant. A quoted value - surrounded by white space, or any characters other than - DELIMITER, will include those characters. This can cause - errors if you import data from a system that pads CSV - lines with white space out to some fixed width. If such a situation - arises you might need to preprocess the CSV file to remove - the trailing white space, before importing the data into - PostgreSQL. + In CSV mode, all characters are significant. A quoted value + surrounded by white space, or any characters other than + DELIMITER, will include those characters. This can cause + errors if you import data from a system that pads CSV + lines with white space out to some fixed width. If such a situation + arises you might need to preprocess the CSV file to remove + the trailing white space, before importing the data into + PostgreSQL. @@ -600,7 +604,7 @@ COPY count programs cannot process. - + @@ -747,7 +751,7 @@ OIDs to be shown as null if that ever proves desirable. - + Examples @@ -806,10 +810,10 @@ ZW ZIMBABWE - + Compatibility - + There is no COPY statement in the SQL standard.