mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-15 08:20:16 +08:00
Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file().
One of the usages of the binary version is to read files in a different encoding from the server encoding. Dimitri Fontaine and Itagaki Takahiro.
This commit is contained in:
parent
16b5e08dec
commit
03db44eae3
@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
|
||||
</row>
|
||||
<row>
|
||||
<entry>
|
||||
<literal><function>pg_read_file(<parameter>filename</> <type>text</>, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>)</function></literal>
|
||||
<literal><function>pg_read_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
|
||||
</entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>Return the contents of a text file</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>
|
||||
<literal><function>pg_read_binary_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
|
||||
</entry>
|
||||
<entry><type>bytea</type></entry>
|
||||
<entry>Return the contents of a file</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>
|
||||
<literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
|
||||
@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
|
||||
at the given <parameter>offset</>, returning at most <parameter>length</>
|
||||
bytes (less if the end of file is reached first). If <parameter>offset</>
|
||||
is negative, it is relative to the end of the file.
|
||||
When <parameter>offset</> and <parameter>length</> parameters are omitted,
|
||||
it returns the whole of the file.
|
||||
The part of a file must be a valid text in the server encoding.
|
||||
</para>
|
||||
|
||||
<indexterm>
|
||||
<primary>pg_read_binary_file</primary>
|
||||
</indexterm>
|
||||
<para>
|
||||
<function>pg_read_binary_file</> returns part of a file as like as
|
||||
<function>pg_read_file</>, but the result is a bytea value.
|
||||
One of the usages is to read a file in the specified encoding combined with
|
||||
<function>convert_from</> function:
|
||||
<programlisting>
|
||||
SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<indexterm>
|
||||
|
@ -80,15 +80,14 @@ convert_and_check_filename(text *arg)
|
||||
|
||||
|
||||
/*
|
||||
* Read a section of a file, returning it as text
|
||||
* Read a section of a file, returning it as bytea
|
||||
*
|
||||
* We read the whole of the file when bytes_to_read is nagative.
|
||||
*/
|
||||
Datum
|
||||
pg_read_file(PG_FUNCTION_ARGS)
|
||||
static bytea *
|
||||
read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read)
|
||||
{
|
||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||
int64 seek_offset = PG_GETARG_INT64(1);
|
||||
int64 bytes_to_read = PG_GETARG_INT64(2);
|
||||
char *buf;
|
||||
bytea *buf;
|
||||
size_t nbytes;
|
||||
FILE *file;
|
||||
char *filename;
|
||||
@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS)
|
||||
|
||||
filename = convert_and_check_filename(filename_t);
|
||||
|
||||
if (bytes_to_read < 0)
|
||||
{
|
||||
if (seek_offset < 0)
|
||||
bytes_to_read = -seek_offset;
|
||||
else
|
||||
{
|
||||
struct stat fst;
|
||||
|
||||
if (stat(filename, &fst) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not stat file \"%s\": %m", filename)));
|
||||
|
||||
bytes_to_read = fst.st_size - seek_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/* not sure why anyone thought that int64 length was a good idea */
|
||||
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("requested length too large")));
|
||||
|
||||
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS)
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek in file \"%s\": %m", filename)));
|
||||
|
||||
if (bytes_to_read < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("requested length cannot be negative")));
|
||||
|
||||
/* not sure why anyone thought that int64 length was a good idea */
|
||||
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("requested length too large")));
|
||||
|
||||
buf = palloc((Size) bytes_to_read + VARHDRSZ);
|
||||
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
|
||||
|
||||
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
|
||||
|
||||
@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS)
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read file \"%s\": %m", filename)));
|
||||
|
||||
/* Make sure the input is valid */
|
||||
pg_verifymbstr(VARDATA(buf), nbytes, false);
|
||||
|
||||
SET_VARSIZE(buf, nbytes + VARHDRSZ);
|
||||
|
||||
FreeFile(file);
|
||||
pfree(filename);
|
||||
|
||||
PG_RETURN_TEXT_P(buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* In addition to read_binary_file, verify whether the contents are encoded
|
||||
* in the database encoding.
|
||||
*/
|
||||
static text *
|
||||
read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read)
|
||||
{
|
||||
bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read);
|
||||
|
||||
/* Make sure the input is valid */
|
||||
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
|
||||
|
||||
/* OK, we can cast it as text safely */
|
||||
return (text *) buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a section of a file, returning it as text
|
||||
*/
|
||||
Datum
|
||||
pg_read_file(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||
int64 seek_offset = PG_GETARG_INT64(1);
|
||||
int64 bytes_to_read = PG_GETARG_INT64(2);
|
||||
|
||||
if (bytes_to_read < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("requested length cannot be negative")));
|
||||
|
||||
PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read));
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the whole of a file, returning it as text
|
||||
*/
|
||||
Datum
|
||||
pg_read_file_all(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||
|
||||
PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a section of a file, returning it as bytea
|
||||
*/
|
||||
Datum
|
||||
pg_read_binary_file(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||
int64 seek_offset = PG_GETARG_INT64(1);
|
||||
int64 bytes_to_read = PG_GETARG_INT64(2);
|
||||
|
||||
if (bytes_to_read < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("requested length cannot be negative")));
|
||||
|
||||
PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read));
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the whole of a file, returning it as bytea
|
||||
*/
|
||||
Datum
|
||||
pg_read_binary_file_all(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *filename_t = PG_GETARG_TEXT_P(0);
|
||||
|
||||
PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201012131
|
||||
#define CATALOG_VERSION_NO 201012161
|
||||
|
||||
#endif
|
||||
|
@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249
|
||||
DESCR("return file information");
|
||||
DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
|
||||
DESCR("read text from a file");
|
||||
DATA(insert OID = 3826 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ ));
|
||||
DESCR("read text from a file");
|
||||
DATA(insert OID = 3827 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ ));
|
||||
DESCR("read bytea from a file");
|
||||
DATA(insert OID = 3828 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ ));
|
||||
DESCR("read bytea from a file");
|
||||
DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
|
||||
DESCR("list all files in a directory");
|
||||
DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));
|
||||
|
@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
|
||||
/* genfile.c */
|
||||
extern Datum pg_stat_file(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_read_file(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
|
||||
|
||||
/* misc.c */
|
||||
|
Loading…
Reference in New Issue
Block a user