Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file().

One of the usages of the binary version is to read files in a different
encoding from the server encoding.

Dimitri Fontaine and Itagaki Takahiro.
This commit is contained in:
Itagaki Takahiro 2010-12-16 06:56:28 +09:00
parent 16b5e08dec
commit 03db44eae3
5 changed files with 139 additions and 25 deletions

View File

@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
</row>
<row>
<entry>
<literal><function>pg_read_file(<parameter>filename</> <type>text</>, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>)</function></literal>
<literal><function>pg_read_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
</entry>
<entry><type>text</type></entry>
<entry>Return the contents of a text file</entry>
</row>
<row>
<entry>
<literal><function>pg_read_binary_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
</entry>
<entry><type>bytea</type></entry>
<entry>Return the contents of a file</entry>
</row>
<row>
<entry>
<literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
at the given <parameter>offset</>, returning at most <parameter>length</>
bytes (less if the end of file is reached first). If <parameter>offset</>
is negative, it is relative to the end of the file.
When <parameter>offset</> and <parameter>length</> parameters are omitted,
it returns the whole of the file.
The part of a file must be a valid text in the server encoding.
</para>
<indexterm>
<primary>pg_read_binary_file</primary>
</indexterm>
<para>
<function>pg_read_binary_file</> returns part of a file as like as
<function>pg_read_file</>, but the result is a bytea value.
One of the usages is to read a file in the specified encoding combined with
<function>convert_from</> function:
<programlisting>
SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
</programlisting>
</para>
<indexterm>

View File

@ -80,15 +80,14 @@ convert_and_check_filename(text *arg)
/*
* Read a section of a file, returning it as text
* Read a section of a file, returning it as bytea
*
* We read the whole of the file when bytes_to_read is nagative.
*/
Datum
pg_read_file(PG_FUNCTION_ARGS)
static bytea *
read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read)
{
text *filename_t = PG_GETARG_TEXT_P(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
char *buf;
bytea *buf;
size_t nbytes;
FILE *file;
char *filename;
@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS)
filename = convert_and_check_filename(filename_t);
if (bytes_to_read < 0)
{
if (seek_offset < 0)
bytes_to_read = -seek_offset;
else
{
struct stat fst;
if (stat(filename, &fst) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
bytes_to_read = fst.st_size - seek_offset;
}
}
/* not sure why anyone thought that int64 length was a good idea */
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length too large")));
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
ereport(ERROR,
(errcode_for_file_access(),
@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS)
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m", filename)));
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
/* not sure why anyone thought that int64 length was a good idea */
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length too large")));
buf = palloc((Size) bytes_to_read + VARHDRSZ);
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS)
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
/* Make sure the input is valid */
pg_verifymbstr(VARDATA(buf), nbytes, false);
SET_VARSIZE(buf, nbytes + VARHDRSZ);
FreeFile(file);
pfree(filename);
PG_RETURN_TEXT_P(buf);
return buf;
}
/*
* In addition to read_binary_file, verify whether the contents are encoded
* in the database encoding.
*/
static text *
read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read)
{
bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read);
/* Make sure the input is valid */
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
/* OK, we can cast it as text safely */
return (text *) buf;
}
/*
* Read a section of a file, returning it as text
*/
Datum
pg_read_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_P(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read));
}
/*
* Read the whole of a file, returning it as text
*/
Datum
pg_read_file_all(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_P(0);
PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1));
}
/*
* Read a section of a file, returning it as bytea
*/
Datum
pg_read_binary_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_P(0);
int64 seek_offset = PG_GETARG_INT64(1);
int64 bytes_to_read = PG_GETARG_INT64(2);
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read));
}
/*
* Read the whole of a file, returning it as bytea
*/
Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_P(0);
PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1));
}
/*

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201012131
#define CATALOG_VERSION_NO 201012161
#endif

View File

@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249
DESCR("return file information");
DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
DESCR("read text from a file");
DATA(insert OID = 3826 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ ));
DESCR("read text from a file");
DATA(insert OID = 3827 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ ));
DESCR("read bytea from a file");
DATA(insert OID = 3828 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ ));
DESCR("read bytea from a file");
DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
DESCR("list all files in a directory");
DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));

View File

@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
/* genfile.c */
extern Datum pg_stat_file(PG_FUNCTION_ARGS);
extern Datum pg_read_file(PG_FUNCTION_ARGS);
extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
/* misc.c */