diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 850af1f0775..b4a226876a9 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -1,5 +1,5 @@
@@ -74,7 +74,7 @@ COPY [ BINARY ] table [ WITH OIDS ]
filename
- The absolute Unix file name of the input or output file.
+ The absolute Unix path name of the input or output file.
@@ -225,7 +225,7 @@ ERROR: reason
By default, a text copy uses a tab ("\t") character as a delimiter
between fields. The field delimiter may be changed to any other single
character with the keyword phrase USING DELIMITERS. Characters
- in data fields which happen to match the delimiter character will
+ in data fields that happen to match the delimiter character will
be backslash quoted.
@@ -265,8 +265,8 @@ ERROR: reason
by the PostgreSQL user (the user ID the
server runs as), not the client.
COPY naming a file is only allowed to database
- superusers, since it allows writing on any file that the backend has
- privileges to write on.
+ superusers, since it allows reading or writing any file that the backend
+ has privileges to access.
@@ -297,57 +297,109 @@ ERROR: reason
File Formats
- 2001-01-02
+ 2002-02-12
Text Format
- When COPY TO is used without the BINARY option,
- the file generated will have each row (instance) on a single line, with each
- column (attribute) separated by the delimiter character. Embedded
- delimiter characters will be preceded by a backslash character
- ("\"). The attribute values themselves are strings generated by the
- output function associated with each attribute type. The output
- function for a type should not try to generate the backslash
- character; this will be handled by COPY itself.
+ When COPY is used without the BINARY option,
+ the file read or written is a text file with one line per table row.
+ Columns (attributes) in a row are separated by the delimiter character.
+ The attribute values themselves are strings generated by the
+ output function, or acceptable to the input function, of each
+ attribute's data type. The specified null-value string is used in
+ place of attributes that are NULL.
+
+
+ If WITH OIDS is specified, the OID is read or written as the first column,
+ preceding the user data columns. (An error is raised if WITH OIDS is
+ specified for a table that does not have OIDs.)
+
+
+ End of data can be represented by a single line containing just
+ backslash-period (\.>). An end-of-data marker is
+ not necessary when reading from a Unix file, since the end of file
+ serves perfectly well; but an end marker must be provided when copying
+ data to or from a client application.
+
+
+ Backslash characters (\>) may be used in the
+ COPY data to quote data characters that might otherwise
+ be taken as row or column delimiters. In particular, the following
+ characters must> be preceded by a backslash if they appear
+ as part of an attribute value: backslash itself, newline, and the current
+ delimiter character.
+
+
+ The following special backslash sequences are recognized by
+ COPY FROM:
+
+
+
+
+
+ Sequence
+ Represents
+
+
+
+
+
+ \b>
+ Backspace (ASCII 8)
+
+
+ \f>
+ Form feed (ASCII 12)
+
+
+ \n>
+ Newline (ASCII 10)
+
+
+ \r>
+ Carriage return (ASCII 13)
+
+
+ \t>
+ Tab (ASCII 9)
+
+
+ \v>
+ Vertical tab (ASCII 11)
+
+
+ \>digits>
+ Backslash followed by one to three octal digits specifies
+ the character with that numeric code
+
+
+
+
+
+ Presently, COPY TO will never emit an octal-digits
+ backslash sequence, but it does use the other sequences listed above
+ for those control characters.
+
+
+ Never put a backslash before a data character N> or period
+ (.>). Such pairs will be mistaken for the default null string
+ or the end-of-data marker, respectively. Any other backslashed character
+ that is not mentioned in the above table will be taken to represent itself.
+
+
+ It is strongly recommended that applications generating COPY data convert
+ data newlines and carriage returns to the \n> and
+ \r> sequences respectively. At present
+ (PostgreSQL 7.2 and older versions) it is
+ possible to represent a data carriage return without any special quoting,
+ and to represent a data newline by a backslash and newline. However,
+ these representations will not be accepted by default in future releases.
- The actual format for each instance is
-
-<attr1><separator><attr2><separator>...<separator><attrn><newline>
-
Note that the end of each row is marked by a Unix-style newline
- ("\n"). COPY FROM will not behave as desired
- if given a file containing DOS- or Mac-style newlines.
-
-
- The OID is emitted as the first column if WITH OIDS is specified.
- (An error is raised if WITH OIDS is specified for a table that does not
- have OIDs.)
-
-
- If COPY TO is sending its output to standard
- output instead of a file, after the last row it will send a backslash ("\")
- and a period (".") followed by a newline.
- Similarly, if COPY FROM is reading
- from standard input, it will expect a backslash ("\") and a period
- (".") followed by a newline, as the first three characters on a
- line to denote end-of-file. However, COPY FROM
- will terminate correctly (followed by the backend itself) if the
- input connection is closed before this special end-of-file pattern is
- found.
-
-
- The backslash character has other special meanings. A literal backslash
- character is represented as two
- consecutive backslashes ("\\"). A literal tab character is represented
- as a backslash and a tab. (If you are using something other than tab
- as the column delimiter, backslash that delimiter character to include
- it in data.) A literal newline character is
- represented as a backslash and a newline. When loading text data
- not generated by PostgreSQL,
- you will need to convert backslash
- characters ("\") to double-backslashes ("\\") to ensure that they
- are loaded properly.
+ ("\n"). Presently, COPY FROM will not behave as
+ desired if given a file containing DOS- or Mac-style newlines.
+ This is expected to change in future releases.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index b944279d7e1..f42b865687c 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.144 2001/12/04 21:19:57 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.145 2002/02/12 21:25:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -41,7 +41,7 @@
#endif
#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
-#define VALUE(c) ((c) - '0')
+#define OCTVALUE(c) ((c) - '0')
/* non-export function prototypes */
@@ -83,13 +83,13 @@ static int server_encoding;
* Internal communications functions
*/
static void CopySendData(void *databuf, int datasize, FILE *fp);
-static void CopySendString(char *str, FILE *fp);
+static void CopySendString(const char *str, FILE *fp);
static void CopySendChar(char c, FILE *fp);
static void CopyGetData(void *databuf, int datasize, FILE *fp);
static int CopyGetChar(FILE *fp);
static int CopyGetEof(FILE *fp);
static int CopyPeekChar(FILE *fp);
-static void CopyDonePeek(FILE *fp, int c, int pickup);
+static void CopyDonePeek(FILE *fp, int c, bool pickup);
/*
* CopySendData sends output data either to the file
@@ -118,9 +118,9 @@ CopySendData(void *databuf, int datasize, FILE *fp)
}
static void
-CopySendString(char *str, FILE *fp)
+CopySendString(const char *str, FILE *fp)
{
- CopySendData(str, strlen(str), fp);
+ CopySendData((void *) str, strlen(str), fp);
}
static void
@@ -178,10 +178,12 @@ CopyGetEof(FILE *fp)
/*
* CopyPeekChar reads a byte in "peekable" mode.
+ *
* after each call to CopyPeekChar, a call to CopyDonePeek _must_
* follow, unless EOF was returned.
- * CopyDonePeek will either take the peeked char off the steam
- * (if pickup is != 0) or leave it on the stream (if pickup == 0)
+ *
+ * CopyDonePeek will either take the peeked char off the stream
+ * (if pickup is true) or leave it on the stream (if pickup is false).
*/
static int
CopyPeekChar(FILE *fp)
@@ -199,15 +201,13 @@ CopyPeekChar(FILE *fp)
}
static void
-CopyDonePeek(FILE *fp, int c, int pickup)
+CopyDonePeek(FILE *fp, int c, bool pickup)
{
if (!fp)
{
if (pickup)
{
- /*
- * We want to pick it up
- */
+ /* We want to pick it up */
(void) pq_getbyte();
}
/* If we didn't want to pick it up, just leave it where it sits */
@@ -219,7 +219,7 @@ CopyDonePeek(FILE *fp, int c, int pickup)
/* We don't want to pick it up - so put it back in there */
ungetc(c, fp);
}
- /* If we wanted to pick it up, it's already there */
+ /* If we wanted to pick it up, it's already done */
}
}
@@ -1078,31 +1078,30 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
{
int val;
- val = VALUE(c);
+ val = OCTVALUE(c);
c = CopyPeekChar(fp);
if (ISOCTAL(c))
{
- val = (val << 3) + VALUE(c);
- CopyDonePeek(fp, c, 1); /* Pick up the
- * character! */
+ val = (val << 3) + OCTVALUE(c);
+ CopyDonePeek(fp, c, true /*pick up*/);
c = CopyPeekChar(fp);
if (ISOCTAL(c))
{
- CopyDonePeek(fp, c, 1); /* pick up! */
- val = (val << 3) + VALUE(c);
+ val = (val << 3) + OCTVALUE(c);
+ CopyDonePeek(fp, c, true /*pick up*/);
}
else
{
if (c == EOF)
goto endOfFile;
- CopyDonePeek(fp, c, 0); /* Return to stream! */
+ CopyDonePeek(fp, c, false /*put back*/);
}
}
else
{
if (c == EOF)
goto endOfFile;
- CopyDonePeek(fp, c, 0); /* Return to stream! */
+ CopyDonePeek(fp, c, false /*put back*/);
}
c = val & 0377;
}
@@ -1144,6 +1143,7 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
}
appendStringInfoCharMacro(&attribute_buf, c);
#ifdef MULTIBYTE
+ /* XXX shouldn't this be done even when encoding is the same? */
if (client_encoding != server_encoding)
{
/* get additional bytes of the char, if any */
@@ -1190,15 +1190,18 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
{
char *string;
char c;
+ char delimc = delim[0];
#ifdef MULTIBYTE
+ bool same_encoding;
char *string_start;
int mblen;
int i;
#endif
#ifdef MULTIBYTE
- if (client_encoding != server_encoding)
+ same_encoding = (server_encoding == client_encoding);
+ if (!same_encoding)
{
string = (char *) pg_server_to_client((unsigned char *) server_string,
strlen(server_string));
@@ -1207,31 +1210,64 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
else
{
string = server_string;
- string_start = NULL; /* unused, but keep compiler quiet */
+ string_start = NULL;
}
#else
string = server_string;
#endif
#ifdef MULTIBYTE
- for (; (mblen = (server_encoding == client_encoding ? 1 : pg_encoding_mblen(client_encoding, string))) &&
- ((c = *string) != '\0'); string += mblen)
+ for (; (c = *string) != '\0'; string += mblen)
#else
for (; (c = *string) != '\0'; string++)
#endif
{
- if (c == delim[0] || c == '\n' || c == '\\')
- CopySendChar('\\', fp);
#ifdef MULTIBYTE
- for (i = 0; i < mblen; i++)
- CopySendChar(*(string + i), fp);
-#else
- CopySendChar(c, fp);
+ mblen = 1;
#endif
+ switch (c)
+ {
+ case '\b':
+ CopySendString("\\b", fp);
+ break;
+ case '\f':
+ CopySendString("\\f", fp);
+ break;
+ case '\n':
+ CopySendString("\\n", fp);
+ break;
+ case '\r':
+ CopySendString("\\r", fp);
+ break;
+ case '\t':
+ CopySendString("\\t", fp);
+ break;
+ case '\v':
+ CopySendString("\\v", fp);
+ break;
+ case '\\':
+ CopySendString("\\\\", fp);
+ break;
+ default:
+ if (c == delimc)
+ CopySendChar('\\', fp);
+ CopySendChar(c, fp);
+#ifdef MULTIBYTE
+ /* XXX shouldn't this be done even when encoding is same? */
+ if (!same_encoding)
+ {
+ /* send additional bytes of the char, if any */
+ mblen = pg_encoding_mblen(client_encoding, string);
+ for (i = 1; i < mblen; i++)
+ CopySendChar(string[i], fp);
+ }
+#endif
+ break;
+ }
}
#ifdef MULTIBYTE
- if (client_encoding != server_encoding)
+ if (string_start)
pfree(string_start); /* pfree pg_server_to_client result */
#endif
}