From 87289ff35ca372f8c88d26cc9bffa942dd8d34a8 Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Wed, 28 Dec 2005 03:25:32 +0000
Subject: [PATCH] Add regression tests for CSV and \., and add automatic
 quoting of a single column dump that has a \. value, so the load works
 properly.  I also added documentation describing this issue.

---
 doc/src/sgml/ref/copy.sgml          | 33 ++++++++++++++++--------
 src/backend/commands/copy.c         | 40 +++++++++++++++++++----------
 src/test/regress/expected/copy2.out |  3 +++
 src/test/regress/sql/copy2.sql      | 10 ++++++++
 4 files changed, 61 insertions(+), 25 deletions(-)
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 2d8dc945716..becaecf5b82 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
 PostgreSQL documentation
 -->
 
@@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
     comparisons for specific columns.
    </para>
 
+   <para> 
+    Because backslash is not a special character in the <literal>CSV</>
+    format, <literal>\.</>, the end-of-data marker, could also appear
+    as a data value.  To avoid any misinterpretation, a <literal>\.</>
+    data value appearing as a lone entry on a line is automatically 
+    quoted on output, and on input, if quoted, is not interpreted as the 
+    end-of-data marker.  If you are loading a single-column table that
+    might have a column value of <literal>\.</>, you might need to quote
+    that value in the input file.
+   </para>
+
    <note>
-	<para>
-	 In <literal>CSV</> mode, all characters are significant. A quoted value 
-	 surrounded by white space, or any characters other than 
-	 <literal>DELIMITER</>, will include those characters. This can cause 
-	 errors if  you import data from a system that pads <literal>CSV</> 
-	 lines with white space out to some fixed width. If such a situation 
-	 arises you might need to preprocess the <literal>CSV</> file to remove 
-	 the trailing white space, before importing the data into 
-	 <productname>PostgreSQL</>. 
-	</para>
+    <para>
+     In <literal>CSV</> mode, all characters are significant. A quoted value 
+     surrounded by white space, or any characters other than 
+     <literal>DELIMITER</>, will include those characters. This can cause 
+     errors if  you import data from a system that pads <literal>CSV</> 
+     lines with white space out to some fixed width. If such a situation 
+     arises you might need to preprocess the <literal>CSV</> file to remove 
+     the trailing white space, before importing the data into 
+     <productname>PostgreSQL</>. 
+    </para>
    </note>
 
    <note>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index ae1d40e2ef3..f97aafc2034 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
 						bool *isnull);
 static void CopyAttributeOutText(CopyState cstate, char *server_string);
 static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
-					bool use_quote);
+					bool use_quote, bool single_attr);
 static List *CopyGetAttnums(Relation rel, List *attnamelist);
 static char *limit_printout_length(const char *str);
 
@@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
 
 				colname = NameStr(attr[attnum - 1]->attname);
 
-				CopyAttributeOutCSV(cstate, colname, false);
+				CopyAttributeOutCSV(cstate, colname, false,
+									list_length(cstate->attnumlist) == 1);
 			}
 
 			CopySendEndOfRow(cstate);
@@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
 														   value));
 					if (cstate->csv_mode)
 						CopyAttributeOutCSV(cstate, string,
-											force_quote[attnum - 1]);
+											force_quote[attnum - 1],
+											list_length(cstate->attnumlist) == 1);
 					else
 						CopyAttributeOutText(cstate, string);
 				}
@@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
  */
 static void
 CopyAttributeOutCSV(CopyState cstate, char *server_string,
-					bool use_quote)
+					bool use_quote, bool single_attr)
 {
 	char	   *string;
 	char		c;
@@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
 	 */
 	if (!use_quote)
 	{
-		for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
-		{
-			if (c == delimc || c == quotec || c == '\n' || c == '\r')
+		/*
+		 *	Because '\.' can be a data value, quote it if it appears
+		 *	alone on a line so it is not interpreted as the end-of-data
+		 *	marker.
+		 */
+		if (single_attr && strcmp(string, "\\.") == 0)
+ 			use_quote = true;
+ 		else
+ 		{
+			for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
 			{
-				use_quote = true;
-				break;
+				if (c == delimc || c == quotec || c == '\n' || c == '\r')
+				{
+					use_quote = true;
+					break;
+				}
+				if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
+					mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
+				else
+					mblen = 1;
 			}
-			if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
-				mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
-			else
-				mblen = 1;
 		}
 	}
 
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 78f20605702..524e88cbae6 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
 --test that we read consecutive LFs properly
 CREATE TEMP TABLE testnl (a int, b text, c int);
 COPY testnl FROM stdin CSV;
+-- test end of copy marker
+CREATE TEMP TABLE testeoc (a text);
+COPY testeoc FROM stdin CSV;
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index add8214d19d..d962d2e048e 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
 inside",2
 \.
 
+-- test end of copy marker
+CREATE TEMP TABLE testeoc (a text);
+
+COPY testeoc FROM stdin CSV;
+a\.
+\.b
+c\.d
+"\."
+\.
+
 
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();