mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Improve behavior of tsearch_readline(), and remove t_readline().
Commitfbeb9da22
, which added the tsearch_readline APIs, left t_readline() in place as a compatibility measure. But that function has been unused and deprecated for twelve years now, so that seems like enough time to remove it. Doing so, and merging t_readline's code into tsearch_readline, aids in making several useful improvements: * The hard-wired 4K limit on line length in tsearch data files is removed, by using a StringInfo buffer instead of a fixed-size buffer. * We can buy back the per-line palloc/pfree added by3ea7e9550
in the common case where encoding conversion is not required. * We no longer need a separate pg_verify_mbstr call, as that functionality was folded into encoding conversion some time ago. (We could have done some of this stuff while keeping t_readline as a separate API, but there seems little point, since there's no reason for anyone to still be using t_readline directly.) Discussion: https://postgr.es/m/48A4FA71-524E-41B9-953A-FD04EF36E2E7@yesql.se
This commit is contained in:
parent
aca74843e4
commit
83b61319a1
@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("unexpected end of line")));
|
||||
|
||||
/*
|
||||
* Note: currently, tsearch_readline can't return lines exceeding 4KB,
|
||||
* so overflow of the word counts is impossible. But that may not
|
||||
* always be true, so let's check.
|
||||
*/
|
||||
if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "common/string.h"
|
||||
#include "storage/fd.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
|
||||
return false;
|
||||
stp->filename = filename;
|
||||
stp->lineno = 0;
|
||||
initStringInfo(&stp->buf);
|
||||
stp->curline = NULL;
|
||||
/* Setup error traceback support for ereport() */
|
||||
stp->cb.callback = tsearch_readline_callback;
|
||||
@ -145,7 +147,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
|
||||
char *
|
||||
tsearch_readline(tsearch_readline_state *stp)
|
||||
{
|
||||
char *result;
|
||||
char *recoded;
|
||||
|
||||
/* Advance line number to use in error reports */
|
||||
stp->lineno++;
|
||||
@ -153,23 +155,35 @@ tsearch_readline(tsearch_readline_state *stp)
|
||||
/* Clear curline, it's no longer relevant */
|
||||
if (stp->curline)
|
||||
{
|
||||
pfree(stp->curline);
|
||||
if (stp->curline != stp->buf.data)
|
||||
pfree(stp->curline);
|
||||
stp->curline = NULL;
|
||||
}
|
||||
|
||||
/* Collect next line, if there is one */
|
||||
result = t_readline(stp->fp);
|
||||
if (!result)
|
||||
if (!pg_get_line_buf(stp->fp, &stp->buf))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Save a copy of the line for possible use in error reports. (We cannot
|
||||
* just save "result", since it's likely to get pfree'd at some point by
|
||||
* the caller; an error after that would try to access freed data.)
|
||||
*/
|
||||
stp->curline = pstrdup(result);
|
||||
/* Validate the input as UTF-8, then convert to DB encoding if needed */
|
||||
recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
|
||||
|
||||
return result;
|
||||
/* Save the correctly-encoded string for possible error reports */
|
||||
stp->curline = recoded; /* might be equal to buf.data */
|
||||
|
||||
/*
|
||||
* We always return a freshly pstrdup'd string. This is clearly necessary
|
||||
* if pg_any_to_server() returned buf.data, and we need a second copy even
|
||||
* if encoding conversion did occur. The caller is entitled to pfree the
|
||||
* returned string at any time, which would leave curline pointing to
|
||||
* recycled storage, causing problems if an error occurs after that point.
|
||||
* (It's preferable to return the result of pstrdup instead of the output
|
||||
* of pg_any_to_server, because the conversion result tends to be
|
||||
* over-allocated. Since callers might save the result string directly
|
||||
* into a long-lived dictionary structure, we don't want it to be a larger
|
||||
* palloc chunk than necessary. We'll reclaim the conversion result on
|
||||
* the next call.)
|
||||
*/
|
||||
return pstrdup(recoded);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -181,11 +195,13 @@ tsearch_readline_end(tsearch_readline_state *stp)
|
||||
/* Suppress use of curline in any error reported below */
|
||||
if (stp->curline)
|
||||
{
|
||||
pfree(stp->curline);
|
||||
if (stp->curline != stp->buf.data)
|
||||
pfree(stp->curline);
|
||||
stp->curline = NULL;
|
||||
}
|
||||
|
||||
/* Release other resources */
|
||||
pfree(stp->buf.data);
|
||||
FreeFile(stp->fp);
|
||||
|
||||
/* Pop the error context stack */
|
||||
@ -203,8 +219,7 @@ tsearch_readline_callback(void *arg)
|
||||
|
||||
/*
|
||||
* We can't include the text of the config line for errors that occur
|
||||
* during t_readline() itself. This is only partly a consequence of our
|
||||
* arms-length use of that routine: the major cause of such errors is
|
||||
* during tsearch_readline() itself. The major cause of such errors is
|
||||
* encoding violations, and we daren't try to print error messages
|
||||
* containing badly-encoded data.
|
||||
*/
|
||||
@ -220,43 +235,6 @@ tsearch_readline_callback(void *arg)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Read the next line from a tsearch data file (expected to be in UTF-8), and
|
||||
* convert it to database encoding if needed. The returned string is palloc'd.
|
||||
* NULL return means EOF.
|
||||
*
|
||||
* Note: direct use of this function is now deprecated. Go through
|
||||
* tsearch_readline() to provide better error reporting.
|
||||
*/
|
||||
char *
|
||||
t_readline(FILE *fp)
|
||||
{
|
||||
int len;
|
||||
char *recoded;
|
||||
char buf[4096]; /* lines must not be longer than this */
|
||||
|
||||
if (fgets(buf, sizeof(buf), fp) == NULL)
|
||||
return NULL;
|
||||
|
||||
len = strlen(buf);
|
||||
|
||||
/* Make sure the input is valid UTF-8 */
|
||||
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
|
||||
|
||||
/* And convert */
|
||||
recoded = pg_any_to_server(buf, len, PG_UTF8);
|
||||
if (recoded == buf)
|
||||
{
|
||||
/*
|
||||
* conversion didn't pstrdup, so we must. We can use the length of the
|
||||
* original string, because no conversion was done.
|
||||
*/
|
||||
recoded = pnstrdup(recoded, len);
|
||||
}
|
||||
|
||||
return recoded;
|
||||
}
|
||||
|
||||
/*
|
||||
* lowerstr --- fold null-terminated string to lower case
|
||||
*
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "lib/stringinfo.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "utils/pg_locale.h"
|
||||
|
||||
@ -33,7 +34,9 @@ typedef struct
|
||||
FILE *fp;
|
||||
const char *filename;
|
||||
int lineno;
|
||||
char *curline;
|
||||
StringInfoData buf; /* current input line, in UTF-8 */
|
||||
char *curline; /* current input line, in DB's encoding */
|
||||
/* curline may be NULL, or equal to buf.data, or a palloc'd string */
|
||||
ErrorContextCallback cb;
|
||||
} tsearch_readline_state;
|
||||
|
||||
@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
|
||||
extern char *tsearch_readline(tsearch_readline_state *stp);
|
||||
extern void tsearch_readline_end(tsearch_readline_state *stp);
|
||||
|
||||
extern char *t_readline(FILE *fp);
|
||||
|
||||
#endif /* __TSLOCALE_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user