diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index cb0835982d..64c979086d 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line"))); - /* - * Note: currently, tsearch_readline can't return lines exceeding 4KB, - * so overflow of the word counts is impossible. But that may not - * always be true, so let's check. - */ if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 9b199d0ac1..d362e86d61 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -14,6 +14,7 @@ #include "postgres.h" #include "catalog/pg_collation.h" +#include "common/string.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_public.h" @@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp, return false; stp->filename = filename; stp->lineno = 0; + initStringInfo(&stp->buf); stp->curline = NULL; /* Setup error traceback support for ereport() */ stp->cb.callback = tsearch_readline_callback; @@ -145,7 +147,7 @@ tsearch_readline_begin(tsearch_readline_state *stp, char * tsearch_readline(tsearch_readline_state *stp) { - char *result; + char *recoded; /* Advance line number to use in error reports */ stp->lineno++; @@ -153,23 +155,35 @@ tsearch_readline(tsearch_readline_state *stp) /* Clear curline, it's no longer relevant */ if (stp->curline) { - pfree(stp->curline); + if (stp->curline != stp->buf.data) + pfree(stp->curline); stp->curline = NULL; } /* Collect next line, if there is one */ - result = t_readline(stp->fp); - if (!result) + if (!pg_get_line_buf(stp->fp, &stp->buf)) return NULL; - /* - * Save a copy of the line for possible use in error reports. (We cannot - * just save "result", since it's likely to get pfree'd at some point by - * the caller; an error after that would try to access freed data.) - */ - stp->curline = pstrdup(result); + /* Validate the input as UTF-8, then convert to DB encoding if needed */ + recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8); - return result; + /* Save the correctly-encoded string for possible error reports */ + stp->curline = recoded; /* might be equal to buf.data */ + + /* + * We always return a freshly pstrdup'd string. This is clearly necessary + * if pg_any_to_server() returned buf.data, and we need a second copy even + * if encoding conversion did occur. The caller is entitled to pfree the + * returned string at any time, which would leave curline pointing to + * recycled storage, causing problems if an error occurs after that point. + * (It's preferable to return the result of pstrdup instead of the output + * of pg_any_to_server, because the conversion result tends to be + * over-allocated. Since callers might save the result string directly + * into a long-lived dictionary structure, we don't want it to be a larger + * palloc chunk than necessary. We'll reclaim the conversion result on + * the next call.) + */ + return pstrdup(recoded); } /* @@ -181,11 +195,13 @@ tsearch_readline_end(tsearch_readline_state *stp) /* Suppress use of curline in any error reported below */ if (stp->curline) { - pfree(stp->curline); + if (stp->curline != stp->buf.data) + pfree(stp->curline); stp->curline = NULL; } /* Release other resources */ + pfree(stp->buf.data); FreeFile(stp->fp); /* Pop the error context stack */ @@ -203,8 +219,7 @@ tsearch_readline_callback(void *arg) /* * We can't include the text of the config line for errors that occur - * during t_readline() itself. This is only partly a consequence of our - * arms-length use of that routine: the major cause of such errors is + * during tsearch_readline() itself. The major cause of such errors is * encoding violations, and we daren't try to print error messages * containing badly-encoded data. */ @@ -220,43 +235,6 @@ tsearch_readline_callback(void *arg) } -/* - * Read the next line from a tsearch data file (expected to be in UTF-8), and - * convert it to database encoding if needed. The returned string is palloc'd. - * NULL return means EOF. - * - * Note: direct use of this function is now deprecated. Go through - * tsearch_readline() to provide better error reporting. - */ -char * -t_readline(FILE *fp) -{ - int len; - char *recoded; - char buf[4096]; /* lines must not be longer than this */ - - if (fgets(buf, sizeof(buf), fp) == NULL) - return NULL; - - len = strlen(buf); - - /* Make sure the input is valid UTF-8 */ - (void) pg_verify_mbstr(PG_UTF8, buf, len, false); - - /* And convert */ - recoded = pg_any_to_server(buf, len, PG_UTF8); - if (recoded == buf) - { - /* - * conversion didn't pstrdup, so we must. We can use the length of the - * original string, because no conversion was done. - */ - recoded = pnstrdup(recoded, len); - } - - return recoded; -} - /* * lowerstr --- fold null-terminated string to lower case * diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h index cc4bd9ab20..f1669fda21 100644 --- a/src/include/tsearch/ts_locale.h +++ b/src/include/tsearch/ts_locale.h @@ -15,6 +15,7 @@ #include #include +#include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "utils/pg_locale.h" @@ -33,7 +34,9 @@ typedef struct FILE *fp; const char *filename; int lineno; - char *curline; + StringInfoData buf; /* current input line, in UTF-8 */ + char *curline; /* current input line, in DB's encoding */ + /* curline may be NULL, or equal to buf.data, or a palloc'd string */ ErrorContextCallback cb; } tsearch_readline_state; @@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp, extern char *tsearch_readline(tsearch_readline_state *stp); extern void tsearch_readline_end(tsearch_readline_state *stp); -extern char *t_readline(FILE *fp); - #endif /* __TSLOCALE_H__ */