diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 1e12cbf3516..7d53d5d3aad 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -1,5 +1,5 @@ @@ -26,7 +26,10 @@ COPY tablename [ ( delimiter' ] - [ NULL [ AS ] 'null string' ] ] + [ NULL [ AS ] 'null string' ] + [ CSV [ QUOTE [ AS ] 'quote' ] + [ ESCAPE [ AS ] 'escape' ] + [ LITERAL column [, ...] ] COPY tablename [ ( column [, ...] ) ] TO { 'filename' | STDOUT } @@ -34,7 +37,10 @@ COPY tablename [ ( delimiter' ] - [ NULL [ AS ] 'null string' ] ] + [ NULL [ AS ] 'null string' ] + [ CSV [ QUOTE [ AS ] 'quote' ] + [ ESCAPE [ AS ] 'escape' ] + [ FORCE column [, ...] ] @@ -146,7 +152,8 @@ COPY tablename [ ( The single character that separates columns within each row - (line) of the file. The default is a tab character. + (line) of the file. The default is a tab character in text mode, + a comma in CSV mode. @@ -156,20 +163,86 @@ COPY tablename [ ( The string that represents a null value. The default is - \N (backslash-N). You might prefer an empty - string, for example. + \N (backslash-N) in text mode, and a empty + value with no quotes in CSV mode. You might prefer an + empty string even in text mode for cases where you don't want to + distinguish nulls from empty strings. - On a COPY FROM, any data item that matches + When using COPY FROM, any data item that matches this string will be stored as a null value, so you should make sure that you use the same string as you used with COPY TO. + + + + CSV + + + Enables Comma Separated Variable (CSV) mode. (Also called + Comma Separated Value). It sets the default DELIMITER to + comma, and QUOTE and ESCAPE values to + double-quote. + + + + + + quote + + + Specifies the quotation character in CSV mode. + The default is double-quote. + + + + + + escape + + + Specifies the character that should appear before a QUOTE + data character value in CSV mode. The default is the + QUOTE value (usually double-quote). + + + + + + FORCE + + + In CSV COPY TO mode, forces quoting + to be used for all non-NULL values in each specified + column. NULL output is never quoted. + + + + + + LITERAL + + + In CSV COPY FROM mode, for each column specified, + do not do a null string comparison; instead load the value + literally. QUOTE and ESCAPE processing are still + performed. + + + If the null string is '' (the default + in CSV mode), a missing input value (delimiter, + delimiter), will load as a zero-length string. Delimiter, quote, + quote, delimiter is always treated as a zero-length string on input. + + + + @@ -233,6 +306,17 @@ COPY tablename [ ( + + COPY input and output is affected by + DateStyle . For portability with other + PostgreSQL installations which might use + non-default DateStyle settings, + DateStyle should be set to ISO before + using COPY. In CSV mode, use ISO + or a DateStyle setting appropriate for the + external application. + + COPY stops operation at the first error. This should not lead to problems in the event of a COPY @@ -253,7 +337,8 @@ COPY tablename [ ( When COPY is used without the BINARY option, - the data read or written is a text file with one line per table row. + the data read or written is a text file with one line per table row, + unless CSV mode is used. Columns in a row are separated by the delimiter character. The column values themselves are strings generated by the output function, or acceptable to the input function, of each @@ -379,6 +464,63 @@ COPY tablename [ ( + + CSV Format + + + This format is used for importing and exporting the Comma + Separated Variable (CSV) file format used by many other + programs, such as spreadsheets. Instead of the escaping used by + PostgreSQL's standard text mode, it + produces and recognises the common CSV escaping mechanism. + + + + The values in each record are separated by the DELIMITER + character. If the value contains the delimiter character, the + QUOTE character, the NULL string, a carriage + return, or line feed character, then the whole value is prefixed and + suffixed by the QUOTE character, and any occurrence + within the value of a QUOTE character or the + ESCAPE character is preceded by the escape character. + You can also use FORCE to force quotes when outputting + non-NULL values in specific columns. + + + + In general, the CSV format has no way to distinguish a + NULL from an empty string. + PostgreSQL's COPY handles this by + quoting. A NULL is output as the NULL string + and is not quoted, while a data value matching the NULL string + is quoted. Therefore, using the default settings, a NULL is + written as an unquoted empty string, while an empty string is + written with double quotes (""). Reading values follows + similar rules. You can use LITERAL to prevent NULL + input comparisons for specific columns. + + + + + CSV mode will both recognize and produce CSV files with quoted + values containing embedded carriage returns and line feeds. Thus + the files are not strictly one line per table row like text-mode + files. + + + + + + Many programs produce strange and occasionally perverse CSV files, + so the file format is more a convention than a standard. Thus you + might encounter some files that cannot be imported using this + mechanism, and COPY might produce files that other + programs can not process. + + + + + Binary Format diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 206fe2719bb..c41080e1bac 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1,5 +1,5 @@ @@ -711,6 +711,10 @@ testdb=> [ oids ] [ delimiter [as] 'character' ] [ null [as] 'string' ] + [ csv [ quote [as] 'character' ] + [ escape [as] 'character' ] + [ force column_list ] + [ literal column_list ] ] diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 07c9ff4d531..abb153616aa 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.220 2004/04/15 22:36:03 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.221 2004/04/19 17:22:30 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -70,7 +70,8 @@ typedef enum CopyDest typedef enum CopyReadResult { NORMAL_ATTR, - END_OF_LINE + END_OF_LINE, + UNTERMINATED_FIELD } CopyReadResult; /* @@ -130,15 +131,22 @@ static bool line_buf_converted; /* non-export function prototypes */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print); + char *delim, char *null_print, bool csv_mode, char *quote, char *escape, + List *force_atts); static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print); + char *delim, char *null_print, bool csv_mode, char *quote, char *escape, + List *literal_atts); static bool CopyReadLine(void); static char *CopyReadAttribute(const char *delim, const char *null_print, CopyReadResult *result, bool *isnull); +static char *CopyReadAttributeCSV(const char *delim, const char *null_print, + char *quote, char *escape, + CopyReadResult *result, bool *isnull); static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo, Oid typelem, bool *isnull); static void CopyAttributeOut(char *string, char *delim); +static void CopyAttributeOutCSV(char *string, char *delim, char *quote, + char *escape, bool force_quote); static List *CopyGetAttnums(Relation rel, List *attnamelist); static void limit_printout_length(StringInfo buf); @@ -682,8 +690,15 @@ DoCopy(const CopyStmt *stmt) List *attnumlist; bool binary = false; bool oids = false; + bool csv_mode = false; char *delim = NULL; + char *quote = NULL; + char *escape = NULL; char *null_print = NULL; + List *force = NIL; + List *literal = NIL; + List *force_atts = NIL; + List *literal_atts = NIL; Relation rel; AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT); AclResult aclresult; @@ -725,6 +740,46 @@ DoCopy(const CopyStmt *stmt) errmsg("conflicting or redundant options"))); null_print = strVal(defel->arg); } + else if (strcmp(defel->defname, "csv") == 0) + { + if (csv_mode) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + csv_mode = intVal(defel->arg); + } + else if (strcmp(defel->defname, "quote") == 0) + { + if (quote) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + quote = strVal(defel->arg); + } + else if (strcmp(defel->defname, "escape") == 0) + { + if (escape) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + escape = strVal(defel->arg); + } + else if (strcmp(defel->defname, "force") == 0) + { + if (force) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + force = (List *)defel->arg; + } + else if (strcmp(defel->defname, "literal") == 0) + { + if (literal) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + literal = (List *)defel->arg; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -735,6 +790,11 @@ DoCopy(const CopyStmt *stmt) (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify DELIMITER in BINARY mode"))); + if (binary && csv_mode) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify CSV in BINARY mode"))); + if (binary && null_print) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -742,10 +802,92 @@ DoCopy(const CopyStmt *stmt) /* Set defaults */ if (!delim) - delim = "\t"; - + delim = csv_mode ? "," : "\t"; + if (!null_print) - null_print = "\\N"; + null_print = csv_mode ? "" : "\\N"; + + if (csv_mode) + { + if (!quote) + quote = "\""; + if (!escape) + escape = quote; + } + + /* + * Only single-character delimiter strings are supported. + */ + if (strlen(delim) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY delimiter must be a single character"))); + + /* + * Check quote + */ + if (!csv_mode && quote != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY quote available only in CSV mode"))); + + if (csv_mode && strlen(quote) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY quote must be a single character"))); + + /* + * Check escape + */ + if (!csv_mode && escape != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY escape available only in CSV mode"))); + + if (csv_mode && strlen(escape) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY escape must be a single character"))); + + /* + * Check force + */ + if (!csv_mode && force != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force available only in CSV mode"))); + if (force != NIL && is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force only available using COPY TO"))); + + /* + * Check literal + */ + if (!csv_mode && literal != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY literal available only in CSV mode"))); + if (literal != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY literal only available using COPY FROM"))); + + /* + * Don't allow the delimiter to appear in the null string. + */ + if (strchr(null_print, delim[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY delimiter must not appear in the NULL specification"))); + + /* + * Don't allow the csv quote char to appear in the null string. + */ + if (csv_mode && strchr(null_print, quote[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("CSV quote character must not appear in the NULL specification"))); /* * Open and lock the relation, using the appropriate lock type. @@ -771,22 +913,6 @@ DoCopy(const CopyStmt *stmt) errhint("Anyone can COPY to stdout or from stdin. " "psql's \\copy command also works for anyone."))); - /* - * Presently, only single-character delimiter strings are supported. - */ - if (strlen(delim) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY delimiter must be a single character"))); - - /* - * Don't allow the delimiter to appear in the null string. - */ - if (strchr(null_print, delim[0]) != NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY delimiter must not appear in the NULL specification"))); - /* * Don't allow COPY w/ OIDs to or from a table without them */ @@ -801,6 +927,52 @@ DoCopy(const CopyStmt *stmt) */ attnumlist = CopyGetAttnums(rel, attnamelist); + /* + * Check that FORCE references valid COPY columns + */ + if (force) + { + TupleDesc tupDesc = RelationGetDescr(rel); + Form_pg_attribute *attr = tupDesc->attrs; + List *cur; + + force_atts = CopyGetAttnums(rel, force); + + foreach(cur, force_atts) + { + int attnum = lfirsti(cur); + + if (!intMember(attnum, attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE column \"%s\" not referenced by COPY", + NameStr(attr[attnum - 1]->attname)))); + } + } + + /* + * Check that LITERAL references valid COPY columns + */ + if (literal) + { + List *cur; + TupleDesc tupDesc = RelationGetDescr(rel); + Form_pg_attribute *attr = tupDesc->attrs; + + literal_atts = CopyGetAttnums(rel, literal); + + foreach(cur, literal_atts) + { + int attnum = lfirsti(cur); + + if (!intMember(attnum, attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("LITERAL column \"%s\" not referenced by COPY", + NameStr(attr[attnum - 1]->attname)))); + } + } + /* * Set up variables to avoid per-attribute overhead. */ @@ -864,7 +1036,8 @@ DoCopy(const CopyStmt *stmt) errmsg("\"%s\" is a directory", filename))); } } - CopyFrom(rel, attnumlist, binary, oids, delim, null_print); + CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + quote, escape, literal_atts); } else { /* copy from database to file */ @@ -926,7 +1099,8 @@ DoCopy(const CopyStmt *stmt) errmsg("\"%s\" is a directory", filename))); } } - CopyTo(rel, attnumlist, binary, oids, delim, null_print); + CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + quote, escape, force_atts); } if (!pipe) @@ -958,7 +1132,8 @@ DoCopy(const CopyStmt *stmt) */ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print) + char *delim, char *null_print, bool csv_mode, char *quote, + char *escape, List *force_atts) { HeapTuple tuple; TupleDesc tupDesc; @@ -967,6 +1142,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, int attr_count; Form_pg_attribute *attr; FmgrInfo *out_functions; + bool *force_quote; Oid *elements; bool *isvarlena; char *string; @@ -988,11 +1164,12 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo)); elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid)); isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); + force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); foreach(cur, attnumlist) { int attnum = lfirsti(cur); Oid out_func_oid; - + if (binary) getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid, &out_func_oid, &elements[attnum - 1], @@ -1002,6 +1179,11 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, &out_func_oid, &elements[attnum - 1], &isvarlena[attnum - 1]); fmgr_info(out_func_oid, &out_functions[attnum - 1]); + + if (intMember(attnum, force_atts)) + force_quote[attnum - 1] = true; + else + force_quote[attnum - 1] = false; } /* @@ -1051,7 +1233,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL) { bool need_delim = false; - CHECK_FOR_INTERRUPTS(); MemoryContextReset(mycontext); @@ -1113,7 +1294,15 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, value, ObjectIdGetDatum(elements[attnum - 1]), Int32GetDatum(attr[attnum - 1]->atttypmod))); - CopyAttributeOut(string, delim); + if (csv_mode) + { + CopyAttributeOutCSV(string, delim, quote, escape, + (strcmp(string, null_print) == 0 || + force_quote[attnum - 1])); + } + else + CopyAttributeOut(string, delim); + } else { @@ -1148,6 +1337,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, pfree(out_functions); pfree(elements); pfree(isvarlena); + pfree(force_quote); } @@ -1243,7 +1433,8 @@ limit_printout_length(StringInfo buf) */ static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print) + char *delim, char *null_print, bool csv_mode, char *quote, + char *escape, List *literal_atts) { HeapTuple tuple; TupleDesc tupDesc; @@ -1256,9 +1447,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, Oid *elements; Oid oid_in_element; ExprState **constraintexprs; + bool *literal_nullstr; bool hasConstraints = false; - int i; int attnum; + int i; List *cur; Oid in_func_oid; Datum *values; @@ -1317,6 +1509,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int)); defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *)); constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *)); + literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool)); for (attnum = 1; attnum <= num_phys_attrs; attnum++) { @@ -1333,6 +1526,11 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, &in_func_oid, &elements[attnum - 1]); fmgr_info(in_func_oid, &in_functions[attnum - 1]); + if (intMember(attnum, literal_atts)) + literal_nullstr[attnum - 1] = true; + else + literal_nullstr[attnum - 1] = false; + /* Get default info if needed */ if (!intMember(attnum, attnumlist)) { @@ -1389,9 +1587,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, ExecBSInsertTriggers(estate, resultRelInfo); if (!binary) - { file_has_oids = oids; /* must rely on user to tell us this... */ - } else { /* Read and verify binary header */ @@ -1500,6 +1696,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, if (file_has_oids) { + /* can't be in CSV mode here */ string = CopyReadAttribute(delim, null_print, &result, &isnull); @@ -1538,14 +1735,27 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, errmsg("missing data for column \"%s\"", NameStr(attr[m]->attname)))); - string = CopyReadAttribute(delim, null_print, - &result, &isnull); - - if (isnull) + if (csv_mode) { - /* we read an SQL NULL, no need to do anything */ + string = CopyReadAttributeCSV(delim, null_print, quote, + escape, &result, &isnull); + if (result == UNTERMINATED_FIELD) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unterminated CSV quoted field"))); } else + string = CopyReadAttribute(delim, null_print, + &result, &isnull); + + if (csv_mode && isnull && literal_nullstr[m]) + { + string = null_print; /* set to NULL string */ + isnull = false; + } + + /* we read an SQL NULL, no need to do anything */ + if (!isnull) { copy_attname = NameStr(attr[m]->attname); values[m] = FunctionCall3(&in_functions[m], @@ -1732,11 +1942,12 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, pfree(values); pfree(nulls); - if (!binary) - { - pfree(in_functions); - pfree(elements); - } + pfree(in_functions); + pfree(elements); + pfree(defmap); + pfree(defexprs); + pfree(constraintexprs); + pfree(literal_nullstr); ExecDropTupleTable(tupleTable, true); @@ -2070,6 +2281,152 @@ CopyReadAttribute(const char *delim, const char *null_print, return attribute_buf.data; } + +/* + * Read the value of a single attribute in CSV mode, + * performing de-escaping as needed. Escaping does not follow the normal + * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage. + * + * Quoted fields can span lines, in which case the line end is embedded + * in the returned string. + * + * null_print is the null marker string. Note that this is compared to + * the pre-de-escaped input string (thus if it is quoted it is not a NULL). + * + * *result is set to indicate what terminated the read: + * NORMAL_ATTR: column delimiter + * END_OF_LINE: end of line + * UNTERMINATED_FIELD no quote detected at end of a quoted field + * + * In any case, the string read up to the terminator (or end of file) + * is returned. + * + * *isnull is set true or false depending on whether the input matched + * the null marker. Note that the caller cannot check this since the + * returned string will be the post-de-escaping equivalent, which may + * look the same as some valid data string. + *---------- + */ + +static char * +CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote, + char *escape, CopyReadResult *result, bool *isnull) +{ + char delimc = delim[0]; + char quotec = quote[0]; + char escapec = escape[0]; + char c; + int start_cursor = line_buf.cursor; + int end_cursor = start_cursor; + int input_len; + bool in_quote = false; + bool saw_quote = false; + + /* reset attribute_buf to empty */ + attribute_buf.len = 0; + attribute_buf.data[0] = '\0'; + + /* set default status */ + *result = END_OF_LINE; + + for (;;) + { + /* handle multiline quoted fields */ + if (in_quote && line_buf.cursor >= line_buf.len) + { + bool done; + + switch(eol_type) + { + case EOL_NL: + appendStringInfoString(&attribute_buf,"\n"); + break; + case EOL_CR: + appendStringInfoString(&attribute_buf,"\r"); + break; + case EOL_CRNL: + appendStringInfoString(&attribute_buf,"\r\n"); + break; + case EOL_UNKNOWN: + /* shouldn't happen - just keep going */ + break; + } + + copy_lineno++; + done = CopyReadLine(); + if (done && line_buf.len == 0) + break; + start_cursor = line_buf.cursor; + } + + end_cursor = line_buf.cursor; + if (line_buf.cursor >= line_buf.len) + break; + c = line_buf.data[line_buf.cursor++]; + /* + * unquoted field delimiter + */ + if (!in_quote && c == delimc) + { + *result = NORMAL_ATTR; + break; + } + /* + * start of quoted field (or part of field) + */ + if (!in_quote && c == quotec) + { + saw_quote = true; + in_quote = true; + continue; + } + /* + * escape within a quoted field + */ + if (in_quote && c == escapec) + { + /* + * peek at the next char if available, and escape it if it + * is an escape char or a quote char + */ + if (line_buf.cursor <= line_buf.len) + { + char nextc = line_buf.data[line_buf.cursor]; + if (nextc == escapec || nextc == quotec) + { + appendStringInfoCharMacro(&attribute_buf, nextc); + line_buf.cursor++; + continue; + } + } + } + /* + * end of quoted field. + * Must do this test after testing for escape in case quote char + * and escape char are the same (which is the common case). + */ + if (in_quote && c == quotec) + { + in_quote = false; + continue; + } + appendStringInfoCharMacro(&attribute_buf, c); + } + + if (in_quote) + *result = UNTERMINATED_FIELD; + + /* check whether raw input matched null marker */ + input_len = end_cursor - start_cursor; + if (!saw_quote && input_len == strlen(null_print) && + strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0) + *isnull = true; + else + *isnull = false; + + return attribute_buf.data; +} + /* * Read a binary attribute */ @@ -2195,6 +2552,73 @@ CopyAttributeOut(char *server_string, char *delim) } } +/* + * Send CSV representation of one attribute, with conversion and + * CSV type escaping + */ +static void +CopyAttributeOutCSV(char *server_string, char *delim, char *quote, + char *escape, bool force_quote) +{ + char *string; + char c; + char delimc = delim[0]; + char quotec = quote[0]; + char escapec = escape[0]; + bool need_quote = force_quote; + char *test_string; + bool same_encoding; + int mblen; + int i; + + same_encoding = (server_encoding == client_encoding); + if (!same_encoding) + string = (char *) pg_server_to_client((unsigned char *) server_string, + strlen(server_string)); + else + string = server_string; + + /* have to run through the string twice, + * first time to see if it needs quoting, second to actually send it + */ + + for(test_string = string; + !need_quote && (c = *test_string) != '\0'; + test_string += mblen) + { + if (c == delimc || c == quotec || c == '\n' || c == '\r') + need_quote = true; + if (!same_encoding) + mblen = pg_encoding_mblen(client_encoding, test_string); + else + mblen = 1; + } + + if (need_quote) + CopySendChar(quotec); + + for (; (c = *string) != '\0'; string += mblen) + { + if (c == quotec || c == escapec) + CopySendChar(escapec); + + CopySendChar(c); + + if (!same_encoding) + { + /* send additional bytes of the char, if any */ + mblen = pg_encoding_mblen(client_encoding, string); + for (i = 1; i < mblen; i++) + CopySendChar(string[i]); + } + else + mblen = 1; + } + + if (need_quote) + CopySendChar(quotec); +} + /* * CopyGetAttnums - build an integer list of attnums to be copied * diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 84efc2875ee..461a39dbb70 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.450 2004/04/05 03:07:26 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.451 2004/04/19 17:22:30 momjian Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -343,7 +343,7 @@ static void doNegateFloat(Value *v); CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB - CREATEUSER CROSS CURRENT_DATE CURRENT_TIME + CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS @@ -370,7 +370,7 @@ static void doNegateFloat(Value *v); KEY LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT - LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION + LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE @@ -386,6 +386,8 @@ static void doNegateFloat(Value *v); PRECISION PRESERVE PREPARE PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE + QUOTE + READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS RULE @@ -1360,6 +1362,26 @@ copy_opt_item: { $$ = makeDefElem("null", (Node *)makeString($3)); } + | CSV + { + $$ = makeDefElem("csv", (Node *)makeInteger(TRUE)); + } + | QUOTE opt_as Sconst + { + $$ = makeDefElem("quote", (Node *)makeString($3)); + } + | ESCAPE opt_as Sconst + { + $$ = makeDefElem("escape", (Node *)makeString($3)); + } + | FORCE columnList + { + $$ = makeDefElem("force", (Node *)$2); + } + | LITERAL columnList + { + $$ = makeDefElem("literal", (Node *)$2); + } ; /* The following exist for backward compatibility */ @@ -7420,6 +7442,7 @@ unreserved_keyword: | COPY | CREATEDB | CREATEUSER + | CSV | CURSOR | CYCLE | DATABASE @@ -7473,6 +7496,7 @@ unreserved_keyword: | LAST_P | LEVEL | LISTEN + | LITERAL | LOAD | LOCAL | LOCATION @@ -7507,6 +7531,7 @@ unreserved_keyword: | PRIVILEGES | PROCEDURAL | PROCEDURE + | QUOTE | READ | RECHECK | REINDEX diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 54ac767126f..613d22ac30c 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.147 2004/03/11 01:47:40 ishii Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.148 2004/04/19 17:22:31 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -90,6 +90,7 @@ static const ScanKeyword ScanKeywords[] = { {"createdb", CREATEDB}, {"createuser", CREATEUSER}, {"cross", CROSS}, + {"csv", CSV}, {"current_date", CURRENT_DATE}, {"current_time", CURRENT_TIME}, {"current_timestamp", CURRENT_TIMESTAMP}, @@ -186,6 +187,7 @@ static const ScanKeyword ScanKeywords[] = { {"like", LIKE}, {"limit", LIMIT}, {"listen", LISTEN}, + {"literal", LITERAL}, {"load", LOAD}, {"local", LOCAL}, {"localtime", LOCALTIME}, @@ -248,6 +250,7 @@ static const ScanKeyword ScanKeywords[] = { {"privileges", PRIVILEGES}, {"procedural", PROCEDURAL}, {"procedure", PROCEDURE}, + {"quote", QUOTE}, {"read", READ}, {"real", REAL}, {"recheck", RECHECK}, diff --git a/src/backend/tcop/fastpath.c b/src/backend/tcop/fastpath.c index 856a9e8589e..a0eda360c4e 100644 --- a/src/backend/tcop/fastpath.c +++ b/src/backend/tcop/fastpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.71 2004/01/07 18:56:27 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.72 2004/04/19 17:22:31 momjian Exp $ * * NOTES * This cruft is the server side of PQfn. @@ -154,8 +154,7 @@ SendFunctionResult(Datum retval, bool isnull, Oid rettype, int16 format) bool typisvarlena; char *outputstr; - getTypeOutputInfo(rettype, - &typoutput, &typelem, &typisvarlena); + getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena); outputstr = DatumGetCString(OidFunctionCall3(typoutput, retval, ObjectIdGetDatum(typelem), diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index 49b8b8a064d..e01afb82142 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.43 2004/04/12 15:58:52 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.44 2004/04/19 17:22:31 momjian Exp $ */ #include "postgres_fe.h" #include "copy.h" @@ -66,8 +66,13 @@ struct copy_options bool from; bool binary; bool oids; + bool csv_mode; char *delim; char *null; + char *quote; + char *escape; + char *force_list; + char *literal_list; }; @@ -81,6 +86,10 @@ free_copy_options(struct copy_options * ptr) free(ptr->file); free(ptr->delim); free(ptr->null); + free(ptr->quote); + free(ptr->escape); + free(ptr->force_list); + free(ptr->literal_list); free(ptr); } @@ -272,11 +281,19 @@ parse_slash_copy(const char *args) while (token) { + bool fetch_next; + + fetch_next = true; + /* someday allow BINARY here */ if (strcasecmp(token, "oids") == 0) { result->oids = true; } + else if (strcasecmp(token, "csv") == 0) + { + result->csv_mode = true; + } else if (strcasecmp(token, "delimiter") == 0) { token = strtokx(NULL, whitespace, NULL, "'", @@ -301,11 +318,78 @@ parse_slash_copy(const char *args) else goto error; } + else if (strcasecmp(token, "quote") == 0) + { + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token && strcasecmp(token, "as") == 0) + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token) + result->quote = pg_strdup(token); + else + goto error; + } + else if (strcasecmp(token, "escape") == 0) + { + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token && strcasecmp(token, "as") == 0) + token = strtokx(NULL, whitespace, NULL, "'", + '\\', false, pset.encoding); + if (token) + result->escape = pg_strdup(token); + else + goto error; + } + else if (strcasecmp(token, "force") == 0) + { + /* handle column list */ + fetch_next = false; + for (;;) + { + token = strtokx(NULL, whitespace, ",", "\"", + 0, false, pset.encoding); + if (!token || strchr(",", token[0])) + goto error; + if (!result->force_list) + result->force_list = pg_strdup(token); + else + xstrcat(&result->force_list, token); + token = strtokx(NULL, whitespace, ",", "\"", + 0, false, pset.encoding); + if (!token || token[0] != ',') + break; + xstrcat(&result->force_list, token); + } + } + else if (strcasecmp(token, "literal") == 0) + { + /* handle column list */ + fetch_next = false; + for (;;) + { + token = strtokx(NULL, whitespace, ",", "\"", + 0, false, pset.encoding); + if (!token || strchr(",", token[0])) + goto error; + if (!result->literal_list) + result->literal_list = pg_strdup(token); + else + xstrcat(&result->literal_list, token); + token = strtokx(NULL, whitespace, ",", "\"", + 0, false, pset.encoding); + if (!token || token[0] != ',') + break; + xstrcat(&result->literal_list, token); + } + } else goto error; - token = strtokx(NULL, whitespace, NULL, NULL, - 0, false, pset.encoding); + if (fetch_next) + token = strtokx(NULL, whitespace, NULL, NULL, + 0, false, pset.encoding); } } @@ -340,7 +424,7 @@ do_copy(const char *args) PGresult *result; bool success; struct stat st; - + /* parse options */ options = parse_slash_copy(args); @@ -379,6 +463,7 @@ do_copy(const char *args) options->delim); } + /* There is no backward-compatible CSV syntax */ if (options->null) { if (options->null[0] == '\'') @@ -387,6 +472,37 @@ do_copy(const char *args) appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null); } + if (options->csv_mode) + { + appendPQExpBuffer(&query, " CSV"); + } + + if (options->quote) + { + if (options->quote[0] == '\'') + appendPQExpBuffer(&query, " QUOTE AS %s", options->quote); + else + appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote); + } + + if (options->escape) + { + if (options->escape[0] == '\'') + appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape); + else + appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape); + } + + if (options->force_list) + { + appendPQExpBuffer(&query, " FORCE %s", options->force_list); + } + + if (options->literal_list) + { + appendPQExpBuffer(&query, " LITERAL %s", options->literal_list); + } + if (options->from) { if (options->file)