mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-05 19:09:58 +08:00
Add regression tests for CSV and \., and add automatic quoting of a
single column dump that has a \. value, so the load works properly. I also added documentation describing this issue.
This commit is contained in:
parent
1b184c990f
commit
87289ff35c
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
|
||||
comparisons for specific columns.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Because backslash is not a special character in the <literal>CSV</>
|
||||
format, <literal>\.</>, the end-of-data marker, could also appear
|
||||
as a data value. To avoid any misinterpretation, a <literal>\.</>
|
||||
data value appearing as a lone entry on a line is automatically
|
||||
quoted on output, and on input, if quoted, is not interpreted as the
|
||||
end-of-data marker. If you are loading a single-column table that
|
||||
might have a column value of <literal>\.</>, you might need to quote
|
||||
that value in the input file.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
In <literal>CSV</> mode, all characters are significant. A quoted value
|
||||
surrounded by white space, or any characters other than
|
||||
<literal>DELIMITER</>, will include those characters. This can cause
|
||||
errors if you import data from a system that pads <literal>CSV</>
|
||||
lines with white space out to some fixed width. If such a situation
|
||||
arises you might need to preprocess the <literal>CSV</> file to remove
|
||||
the trailing white space, before importing the data into
|
||||
<productname>PostgreSQL</>.
|
||||
</para>
|
||||
<para>
|
||||
In <literal>CSV</> mode, all characters are significant. A quoted value
|
||||
surrounded by white space, or any characters other than
|
||||
<literal>DELIMITER</>, will include those characters. This can cause
|
||||
errors if you import data from a system that pads <literal>CSV</>
|
||||
lines with white space out to some fixed width. If such a situation
|
||||
arises you might need to preprocess the <literal>CSV</> file to remove
|
||||
the trailing white space, before importing the data into
|
||||
<productname>PostgreSQL</>.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<note>
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
|
||||
bool *isnull);
|
||||
static void CopyAttributeOutText(CopyState cstate, char *server_string);
|
||||
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
||||
bool use_quote);
|
||||
bool use_quote, bool single_attr);
|
||||
static List *CopyGetAttnums(Relation rel, List *attnamelist);
|
||||
static char *limit_printout_length(const char *str);
|
||||
|
||||
@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
|
||||
|
||||
colname = NameStr(attr[attnum - 1]->attname);
|
||||
|
||||
CopyAttributeOutCSV(cstate, colname, false);
|
||||
CopyAttributeOutCSV(cstate, colname, false,
|
||||
list_length(cstate->attnumlist) == 1);
|
||||
}
|
||||
|
||||
CopySendEndOfRow(cstate);
|
||||
@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
|
||||
value));
|
||||
if (cstate->csv_mode)
|
||||
CopyAttributeOutCSV(cstate, string,
|
||||
force_quote[attnum - 1]);
|
||||
force_quote[attnum - 1],
|
||||
list_length(cstate->attnumlist) == 1);
|
||||
else
|
||||
CopyAttributeOutText(cstate, string);
|
||||
}
|
||||
@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
|
||||
*/
|
||||
static void
|
||||
CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
||||
bool use_quote)
|
||||
bool use_quote, bool single_attr)
|
||||
{
|
||||
char *string;
|
||||
char c;
|
||||
@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
||||
*/
|
||||
if (!use_quote)
|
||||
{
|
||||
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
|
||||
{
|
||||
if (c == delimc || c == quotec || c == '\n' || c == '\r')
|
||||
/*
|
||||
* Because '\.' can be a data value, quote it if it appears
|
||||
* alone on a line so it is not interpreted as the end-of-data
|
||||
* marker.
|
||||
*/
|
||||
if (single_attr && strcmp(string, "\\.") == 0)
|
||||
use_quote = true;
|
||||
else
|
||||
{
|
||||
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
|
||||
{
|
||||
use_quote = true;
|
||||
break;
|
||||
if (c == delimc || c == quotec || c == '\n' || c == '\r')
|
||||
{
|
||||
use_quote = true;
|
||||
break;
|
||||
}
|
||||
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
|
||||
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
|
||||
else
|
||||
mblen = 1;
|
||||
}
|
||||
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
|
||||
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
|
||||
else
|
||||
mblen = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
|
||||
--test that we read consecutive LFs properly
|
||||
CREATE TEMP TABLE testnl (a int, b text, c int);
|
||||
COPY testnl FROM stdin CSV;
|
||||
-- test end of copy marker
|
||||
CREATE TEMP TABLE testeoc (a text);
|
||||
COPY testeoc FROM stdin CSV;
|
||||
DROP TABLE x, y;
|
||||
DROP FUNCTION fn_x_before();
|
||||
DROP FUNCTION fn_x_after();
|
||||
|
@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
|
||||
inside",2
|
||||
\.
|
||||
|
||||
-- test end of copy marker
|
||||
CREATE TEMP TABLE testeoc (a text);
|
||||
|
||||
COPY testeoc FROM stdin CSV;
|
||||
a\.
|
||||
\.b
|
||||
c\.d
|
||||
"\."
|
||||
\.
|
||||
|
||||
|
||||
DROP TABLE x, y;
|
||||
DROP FUNCTION fn_x_before();
|
||||
|
Loading…
Reference in New Issue
Block a user