file_fdw: Add REJECT_LIMIT option to file_fdw.

Commit 4ac2a9bece introduced the REJECT_LIMIT option for the COPY
command. This commit extends the support for this option to file_fdw.

As well as REJECT_LIMIT option for COPY, this option limits
the maximum number of erroneous rows that can be skipped.
If the number of data type conversion errors exceeds this limit,
accessing the file_fdw foreign table will fail with an error,
even when on_error = 'ignore' is specified.

Since the CREATE/ALTER FOREIGN TABLE commands require foreign
table options to be single-quoted, this commit updates
defGetCopyRejectLimitOption() to handle also string value for them,
in addition to int64 value for COPY command option.

Author: Atsushi Torikoshi
Reviewed-by: Fujii Masao, Yugo Nagata, Kirill Reshke
Discussion: https://postgr.es/m/bab68a9fc502b12693f0755b6f35f327@oss.nttdata.com
This commit is contained in:
Fujii Masao 2024-11-20 23:53:19 +09:00
parent 15afb7d61c
commit 6c8f670323
6 changed files with 58 additions and 4 deletions

View File

@ -2,3 +2,4 @@
100;@99.097@
0;@aaa@
42;@324.78@
1;@bbb@

View File

@ -90,6 +90,8 @@ ERROR: COPY delimiter cannot be newline or carriage return
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
ERROR: COPY null representation cannot use newline or carriage return
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR
ERROR: either filename or program is required for file_fdw foreign tables
\set filename :abs_srcdir '/data/agg.data'
@ -206,10 +208,10 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
CONTEXT: COPY agg_bad, line 3, column b: "aaa"
-- on_error and log_verbosity tests
-- on_error, log_verbosity and reject_limit tests
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
SELECT * FROM agg_bad;
NOTICE: 1 row was skipped due to data type incompatibility
NOTICE: 2 rows were skipped due to data type incompatibility
a | b
-----+--------
100 | 99.097
@ -224,6 +226,18 @@ SELECT * FROM agg_bad;
42 | 324.78
(2 rows)
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
SELECT * FROM agg_bad;
ERROR: skipped more than REJECT_LIMIT (1) rows due to data type incompatibility
CONTEXT: COPY agg_bad, line 5, column b: "bbb"
ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
SELECT * FROM agg_bad;
a | b
-----+--------
100 | 99.097
42 | 324.78
(2 rows)
ANALYZE agg_bad;
-- misc query tests
\t on

View File

@ -77,6 +77,7 @@ static const struct FileFdwOption valid_options[] = {
{"encoding", ForeignTableRelationId},
{"on_error", ForeignTableRelationId},
{"log_verbosity", ForeignTableRelationId},
{"reject_limit", ForeignTableRelationId},
{"force_not_null", AttributeRelationId},
{"force_null", AttributeRelationId},
@ -788,6 +789,13 @@ retry:
*/
ResetPerTupleExprContext(estate);
if (cstate->opts.reject_limit > 0 &&
cstate->num_errors > cstate->opts.reject_limit)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
(long long) cstate->opts.reject_limit)));
/* Repeat NextCopyFrom() until no soft error occurs */
goto retry;
}

View File

@ -77,6 +77,7 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter
'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR
\set filename :abs_srcdir '/data/agg.data'
@ -150,11 +151,15 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
-- on_error and log_verbosity tests
-- on_error, log_verbosity and reject_limit tests
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
SELECT * FROM agg_bad;
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
SELECT * FROM agg_bad;
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
SELECT * FROM agg_bad;
ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
SELECT * FROM agg_bad;
ANALYZE agg_bad;
-- misc query tests

View File

@ -138,6 +138,18 @@
</listitem>
</varlistentry>
<varlistentry>
<term><literal>reject_limit</literal></term>
<listitem>
<para>
Specifies the maximum number of errors tolerated while converting a column's
input value to its data type, the same as <command>COPY</command>'s
<literal>REJECT_LIMIT</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>log_verbosity</literal></term>

View File

@ -420,11 +420,25 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
/*
* Extract REJECT_LIMIT value from a DefElem.
*
* REJECT_LIMIT can be specified in two ways: as an int64 for the COPY command
* option or as a single-quoted string for the foreign table option using
* file_fdw. Therefore this function needs to handle both formats.
*/
static int64
defGetCopyRejectLimitOption(DefElem *def)
{
int64 reject_limit = defGetInt64(def);
int64 reject_limit;
if (def->arg == NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("%s requires a numeric value",
def->defname)));
else if (nodeTag(def->arg) == T_String)
reject_limit = pg_strtoint64(strVal(def->arg));
else
reject_limit = defGetInt64(def);
if (reject_limit <= 0)
ereport(ERROR,