mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-09 08:10:09 +08:00
Add comments and a missing CHECK_FOR_INTERRUPTS in ts_headline.
I just spent an annoying amount of time reverse-engineering the
100%-undocumented API between ts_headline and the text search
parser's prsheadline function. Add some commentary about that
while it's fresh in mind. Also remove some unused macros in
wparser_def.c.
While at it, I noticed that when commit 78e73e875
added a
CHECK_FOR_INTERRUPTS call in TS_execute_recurse, it missed
doing so in the parallel function TS_phrase_execute, which
surely needs one just as much.
Back-patch because of the missing CHECK_FOR_INTERRUPTS.
Might as well back-patch the rest of this too.
This commit is contained in:
parent
f686ae82f2
commit
5644d6f909
@ -433,6 +433,8 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
|
||||
/*
|
||||
* Headline framework
|
||||
*/
|
||||
|
||||
/* Add a word to prs->words[] */
|
||||
static void
|
||||
hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
|
||||
{
|
||||
@ -449,6 +451,14 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
|
||||
prs->curwords++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add pos and matching-query-item data to the just-added word.
|
||||
* Here, buf/buflen represent a processed lexeme, not raw token text.
|
||||
*
|
||||
* If the query contains more than one matching item, we replicate
|
||||
* the last-added word so that each item can be pointed to. The
|
||||
* duplicate entries are marked with repeated = 1.
|
||||
*/
|
||||
static void
|
||||
hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
|
||||
{
|
||||
@ -589,6 +599,9 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
|
||||
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the headline, as a text object, from HeadlineParsedText.
|
||||
*/
|
||||
text *
|
||||
generateHeadline(HeadlineParsedText *prs)
|
||||
{
|
||||
|
@ -1914,10 +1914,6 @@ prsd_end(PG_FUNCTION_ARGS)
|
||||
*/
|
||||
|
||||
/* token type classification macros */
|
||||
#define LEAVETOKEN(x) ( (x)==SPACE )
|
||||
#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
|
||||
#define ENDPUNCTOKEN(x) ( (x)==SPACE )
|
||||
|
||||
#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
|
||||
#define HLIDREPLACE(x) ( (x)==TAG_T )
|
||||
#define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
|
||||
|
@ -1617,6 +1617,9 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
/* ... and let's check for query cancel while we're at it */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
if (curitem->type == QI_VAL)
|
||||
return chkcond(arg, (QueryOperand *) curitem, data);
|
||||
|
||||
|
@ -30,33 +30,60 @@ typedef struct
|
||||
} LexDescr;
|
||||
|
||||
/*
|
||||
* Interface to headline generator
|
||||
* Interface to headline generator (tsparser's prsheadline function)
|
||||
*
|
||||
* HeadlineParsedText describes the text that is to be highlighted.
|
||||
* Some fields are passed from the core code to the prsheadline function,
|
||||
* while others are output from the prsheadline function.
|
||||
*
|
||||
* The principal data is words[], an array of HeadlineWordEntry,
|
||||
* one entry per token, of length curwords.
|
||||
* The fields of HeadlineWordEntry are:
|
||||
*
|
||||
* in, selected, replace, skip: these flags are initially zero
|
||||
* and may be set by the prsheadline function. A consecutive group
|
||||
* of tokens marked "in" form a "fragment" to be output.
|
||||
* Such tokens may additionally be marked selected, replace, or skip
|
||||
* to modify how they are shown. (If you set more than one of those
|
||||
* bits, you get an unspecified one of those behaviors.)
|
||||
*
|
||||
* type, len, pos, word: filled by core code to describe the token.
|
||||
*
|
||||
* item: if the token matches any operand of the tsquery of interest,
|
||||
* a pointer to such an operand. (If there are multiple matching
|
||||
* operands, we generate extra copies of the HeadlineWordEntry to hold
|
||||
* all the pointers. The extras are marked with repeated = 1 and should
|
||||
* be ignored except for checking the item pointer.)
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32 selected:1,
|
||||
in:1,
|
||||
replace:1,
|
||||
repeated:1,
|
||||
skip:1,
|
||||
unused:3,
|
||||
type:8,
|
||||
len:16;
|
||||
WordEntryPos pos;
|
||||
char *word;
|
||||
QueryOperand *item;
|
||||
uint32 selected:1, /* token is to be highlighted */
|
||||
in:1, /* token is part of headline */
|
||||
replace:1, /* token is to be replaced with a space */
|
||||
repeated:1, /* duplicate entry to hold item pointer */
|
||||
skip:1, /* token is to be skipped (not output) */
|
||||
unused:3, /* available bits */
|
||||
type:8, /* parser's token category */
|
||||
len:16; /* length of token */
|
||||
WordEntryPos pos; /* position of token */
|
||||
char *word; /* text of token (not null-terminated) */
|
||||
QueryOperand *item; /* a matching query operand, or NULL if none */
|
||||
} HeadlineWordEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Fields filled by core code before calling prsheadline function: */
|
||||
HeadlineWordEntry *words;
|
||||
int32 lenwords;
|
||||
int32 curwords;
|
||||
int32 vectorpos; /* positions a-la tsvector */
|
||||
char *startsel;
|
||||
int32 lenwords; /* allocated length of words[] */
|
||||
int32 curwords; /* current number of valid entries */
|
||||
int32 vectorpos; /* used by ts_parse.c in filling pos fields */
|
||||
|
||||
/* The prsheadline function must fill these fields: */
|
||||
/* Strings for marking selected tokens and separating fragments: */
|
||||
char *startsel; /* palloc'd strings */
|
||||
char *stopsel;
|
||||
char *fragdelim;
|
||||
int16 startsellen;
|
||||
int16 startsellen; /* lengths of strings */
|
||||
int16 stopsellen;
|
||||
int16 fragdelimlen;
|
||||
} HeadlineParsedText;
|
||||
|
Loading…
Reference in New Issue
Block a user