2003-07-21 18:27:44 +08:00
|
|
|
/*
|
|
|
|
* IO definitions for tsquery and mtsquery. This type
|
|
|
|
* are identical, but for parsing mtsquery used parser for text
|
|
|
|
* and also morphology is used.
|
|
|
|
* Internal structure:
|
|
|
|
* query tree, then string with original value.
|
|
|
|
* Query tree with plain view. It's means that in array of nodes
|
|
|
|
* right child is always next and left position = item+item->left
|
|
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <float.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
#include "access/gist.h"
|
|
|
|
#include "access/itup.h"
|
|
|
|
#include "access/rtree.h"
|
|
|
|
#include "utils/array.h"
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "storage/bufpage.h"
|
|
|
|
|
|
|
|
#include "ts_cfg.h"
|
|
|
|
#include "tsvector.h"
|
|
|
|
#include "crc32.h"
|
|
|
|
#include "query.h"
|
|
|
|
#include "rewrite.h"
|
|
|
|
#include "common.h"
|
|
|
|
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(tsquery_in);
|
|
|
|
Datum tsquery_in(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(tsquery_out);
|
|
|
|
Datum tsquery_out(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(exectsq);
|
|
|
|
Datum exectsq(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(rexectsq);
|
|
|
|
Datum rexectsq(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(tsquerytree);
|
|
|
|
Datum tsquerytree(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsquery);
|
|
|
|
Datum to_tsquery(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsquery_name);
|
|
|
|
Datum to_tsquery_name(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsquery_current);
|
|
|
|
Datum to_tsquery_current(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
#define END 0
|
|
|
|
#define ERR 1
|
|
|
|
#define VAL 2
|
|
|
|
#define OPR 3
|
|
|
|
#define OPEN 4
|
|
|
|
#define CLOSE 5
|
|
|
|
#define VALTRUE 6 /* for stop words */
|
|
|
|
#define VALFALSE 7
|
|
|
|
|
|
|
|
/* parser's states */
|
|
|
|
#define WAITOPERAND 1
|
|
|
|
#define WAITOPERATOR 2
|
|
|
|
|
|
|
|
/*
|
|
|
|
* node of query tree, also used
|
|
|
|
* for storing polish notation in parser
|
|
|
|
*/
|
|
|
|
typedef struct NODE
|
|
|
|
{
|
|
|
|
int2 weight;
|
|
|
|
int2 type;
|
|
|
|
int4 val;
|
|
|
|
int2 distance;
|
|
|
|
int2 length;
|
|
|
|
struct NODE *next;
|
|
|
|
} NODE;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int4 state;
|
|
|
|
int4 count;
|
|
|
|
/* reverse polish notation in list (for temprorary usage) */
|
|
|
|
NODE *str;
|
|
|
|
/* number in str */
|
|
|
|
int4 num;
|
|
|
|
|
|
|
|
/* user-friendly operand */
|
|
|
|
int4 lenop;
|
|
|
|
int4 sumlen;
|
|
|
|
char *op;
|
|
|
|
char *curop;
|
|
|
|
|
|
|
|
/* state for value's parser */
|
|
|
|
TI_IN_STATE valstate;
|
|
|
|
|
|
|
|
/* tscfg */
|
2003-08-04 08:43:34 +08:00
|
|
|
int cfg_id;
|
2003-07-21 18:27:44 +08:00
|
|
|
} QPRS_STATE;
|
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
static char *
|
|
|
|
get_weight(char *buf, int2 *weight)
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
*weight = 0;
|
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
if (*buf != ':')
|
2003-07-21 18:27:44 +08:00
|
|
|
return buf;
|
|
|
|
|
|
|
|
buf++;
|
2003-08-04 08:43:34 +08:00
|
|
|
while (*buf)
|
|
|
|
{
|
|
|
|
switch (tolower(*buf))
|
|
|
|
{
|
|
|
|
case 'a':
|
|
|
|
*weight |= 1 << 3;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
*weight |= 1 << 2;
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
*weight |= 1 << 1;
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
*weight |= 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return buf;
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
buf++;
|
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
|
2003-07-21 18:27:44 +08:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get token from query string
|
|
|
|
*/
|
|
|
|
static int4
|
|
|
|
gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
|
|
|
|
{
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
switch (state->state)
|
|
|
|
{
|
|
|
|
case WAITOPERAND:
|
|
|
|
if (*(state->buf) == '!')
|
|
|
|
{
|
|
|
|
(state->buf)++;
|
|
|
|
*val = (int4) '!';
|
|
|
|
return OPR;
|
|
|
|
}
|
|
|
|
else if (*(state->buf) == '(')
|
|
|
|
{
|
|
|
|
state->count++;
|
|
|
|
(state->buf)++;
|
|
|
|
return OPEN;
|
2003-08-04 08:43:34 +08:00
|
|
|
}
|
|
|
|
else if (*(state->buf) == ':')
|
|
|
|
{
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("error at start of operand")));
|
2003-08-04 08:43:34 +08:00
|
|
|
}
|
|
|
|
else if (*(state->buf) != ' ')
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
state->valstate.prsbuf = state->buf;
|
|
|
|
state->state = WAITOPERATOR;
|
|
|
|
if (gettoken_tsvector(&(state->valstate)))
|
|
|
|
{
|
|
|
|
*strval = state->valstate.word;
|
|
|
|
*lenval = state->valstate.curpos - state->valstate.word;
|
|
|
|
state->buf = get_weight(state->valstate.prsbuf, weight);
|
|
|
|
return VAL;
|
|
|
|
}
|
|
|
|
else
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("no operand")));
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case WAITOPERATOR:
|
|
|
|
if (*(state->buf) == '&' || *(state->buf) == '|')
|
|
|
|
{
|
|
|
|
state->state = WAITOPERAND;
|
|
|
|
*val = (int4) *(state->buf);
|
|
|
|
(state->buf)++;
|
|
|
|
return OPR;
|
|
|
|
}
|
|
|
|
else if (*(state->buf) == ')')
|
|
|
|
{
|
|
|
|
(state->buf)++;
|
|
|
|
state->count--;
|
|
|
|
return (state->count < 0) ? ERR : CLOSE;
|
|
|
|
}
|
|
|
|
else if (*(state->buf) == '\0')
|
|
|
|
return (state->count) ? ERR : END;
|
|
|
|
else if (*(state->buf) != ' ')
|
|
|
|
return ERR;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return ERR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
(state->buf)++;
|
|
|
|
}
|
|
|
|
return END;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* push new one in polish notation reverse view
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
|
|
|
|
{
|
|
|
|
NODE *tmp = (NODE *) palloc(sizeof(NODE));
|
|
|
|
|
|
|
|
tmp->weight = weight;
|
|
|
|
tmp->type = type;
|
|
|
|
tmp->val = val;
|
|
|
|
if (distance >= MAXSTRPOS)
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("value is too big")));
|
2003-07-21 18:27:44 +08:00
|
|
|
if (lenval >= MAXSTRLEN)
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("operand is too long")));
|
2003-07-21 18:27:44 +08:00
|
|
|
tmp->distance = distance;
|
|
|
|
tmp->length = lenval;
|
|
|
|
tmp->next = state->str;
|
|
|
|
state->str = tmp;
|
|
|
|
state->num++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function is used for tsquery parsing
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
|
|
|
|
{
|
|
|
|
if (lenval >= MAXSTRLEN)
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("word is too long")));
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
|
|
|
|
state->curop - state->op, lenval, weight);
|
|
|
|
|
|
|
|
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
|
|
|
{
|
|
|
|
int4 tmp = state->curop - state->op;
|
|
|
|
|
|
|
|
state->lenop *= 2;
|
|
|
|
state->op = (char *) repalloc((void *) state->op, state->lenop);
|
|
|
|
state->curop = state->op + tmp;
|
|
|
|
}
|
|
|
|
memcpy((void *) state->curop, (void *) strval, lenval);
|
|
|
|
state->curop += lenval;
|
|
|
|
*(state->curop) = '\0';
|
|
|
|
state->curop++;
|
|
|
|
state->sumlen += lenval + 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function is used for morph parsing
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
|
|
|
|
{
|
|
|
|
int4 count = 0;
|
2003-08-04 08:43:34 +08:00
|
|
|
PRSTEXT prs;
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
prs.lenwords = 32;
|
|
|
|
prs.curwords = 0;
|
|
|
|
prs.pos = 0;
|
|
|
|
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
|
|
|
|
|
|
|
|
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
|
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
for (count = 0; count < prs.curwords; count++)
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
|
2003-08-04 08:43:34 +08:00
|
|
|
pfree(prs.words[count].word);
|
2003-07-21 18:27:44 +08:00
|
|
|
if (count)
|
2003-08-04 08:43:34 +08:00
|
|
|
pushquery(state, OPR, (int4) '&', 0, 0, 0);
|
|
|
|
}
|
2003-07-21 18:27:44 +08:00
|
|
|
pfree(prs.words);
|
|
|
|
|
|
|
|
/* XXX */
|
2003-08-04 08:43:34 +08:00
|
|
|
if (prs.curwords == 0)
|
2003-07-21 18:27:44 +08:00
|
|
|
pushval_asis(state, VALTRUE, 0, 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define STACKDEPTH 32
|
|
|
|
/*
|
|
|
|
* make polish notaion of query
|
|
|
|
*/
|
|
|
|
static int4
|
|
|
|
makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
|
|
|
|
{
|
|
|
|
int4 val,
|
|
|
|
type;
|
|
|
|
int4 lenval;
|
|
|
|
char *strval;
|
|
|
|
int4 stack[STACKDEPTH];
|
|
|
|
int4 lenstack = 0;
|
|
|
|
int2 weight;
|
|
|
|
|
|
|
|
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
|
|
|
|
{
|
|
|
|
switch (type)
|
|
|
|
{
|
|
|
|
case VAL:
|
|
|
|
(*pushval) (state, VAL, strval, lenval, weight);
|
|
|
|
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
|
|
|
|
stack[lenstack - 1] == (int4) '!'))
|
|
|
|
{
|
|
|
|
lenstack--;
|
|
|
|
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OPR:
|
|
|
|
if (lenstack && val == (int4) '|')
|
|
|
|
pushquery(state, OPR, val, 0, 0, 0);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (lenstack == STACKDEPTH)
|
2003-07-25 01:52:50 +08:00
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "stack too short");
|
2003-07-21 18:27:44 +08:00
|
|
|
stack[lenstack] = val;
|
|
|
|
lenstack++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OPEN:
|
|
|
|
if (makepol(state, pushval) == ERR)
|
|
|
|
return ERR;
|
|
|
|
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
|
|
|
|
stack[lenstack - 1] == (int4) '!'))
|
|
|
|
{
|
|
|
|
lenstack--;
|
|
|
|
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case CLOSE:
|
|
|
|
while (lenstack)
|
|
|
|
{
|
|
|
|
lenstack--;
|
|
|
|
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
|
|
|
};
|
|
|
|
return END;
|
|
|
|
break;
|
|
|
|
case ERR:
|
|
|
|
default:
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("syntax error")));
|
2003-07-21 18:27:44 +08:00
|
|
|
return ERR;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (lenstack)
|
|
|
|
{
|
|
|
|
lenstack--;
|
|
|
|
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
|
|
|
};
|
|
|
|
return END;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
WordEntry *arrb;
|
|
|
|
WordEntry *arre;
|
|
|
|
char *values;
|
|
|
|
char *operand;
|
|
|
|
} CHKVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* compare 2 string values
|
|
|
|
*/
|
|
|
|
static int4
|
|
|
|
ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
|
|
|
|
{
|
|
|
|
if (ptr->len == item->length)
|
|
|
|
return strncmp(
|
|
|
|
&(chkval->values[ptr->pos]),
|
|
|
|
&(chkval->operand[item->distance]),
|
|
|
|
item->length);
|
|
|
|
|
|
|
|
return (ptr->len > item->length) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check weight info
|
|
|
|
*/
|
|
|
|
static bool
|
2003-08-04 08:43:34 +08:00
|
|
|
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item)
|
|
|
|
{
|
|
|
|
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
|
|
|
|
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
|
|
|
|
|
|
|
|
while (len--)
|
|
|
|
{
|
|
|
|
if (item->weight & (1 << ptr->weight))
|
2003-07-21 18:27:44 +08:00
|
|
|
return true;
|
|
|
|
ptr++;
|
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
return false;
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* is there value 'val' in array or not ?
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
checkcondition_str(void *checkval, ITEM * val)
|
|
|
|
{
|
|
|
|
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
|
|
|
|
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
|
|
|
|
WordEntry *StopMiddle;
|
|
|
|
int difference;
|
|
|
|
|
|
|
|
/* Loop invariant: StopLow <= val < StopHigh */
|
|
|
|
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
|
|
|
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
|
|
|
|
if (difference == 0)
|
2003-08-04 08:43:34 +08:00
|
|
|
return (val->weight && StopMiddle->haspos) ?
|
|
|
|
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
|
2003-07-21 18:27:44 +08:00
|
|
|
else if (difference < 0)
|
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check for boolean condition
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
|
|
|
|
{
|
|
|
|
if (curitem->type == VAL)
|
|
|
|
return (*chkcond) (checkval, curitem);
|
|
|
|
else if (curitem->val == (int4) '!')
|
|
|
|
{
|
|
|
|
return (calcnot) ?
|
|
|
|
((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
|
|
|
|
: true;
|
|
|
|
}
|
|
|
|
else if (curitem->val == (int4) '&')
|
|
|
|
{
|
|
|
|
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
|
|
|
|
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{ /* |-operator */
|
|
|
|
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* boolean operations
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
rexectsq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return DirectFunctionCall2(
|
|
|
|
exectsq,
|
|
|
|
PG_GETARG_DATUM(1),
|
|
|
|
PG_GETARG_DATUM(0)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
exectsq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2003-08-04 08:43:34 +08:00
|
|
|
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
2003-07-21 18:27:44 +08:00
|
|
|
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
|
|
|
|
CHKVAL chkval;
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
if (!val->size || !query->size)
|
|
|
|
{
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
chkval.arrb = ARRPTR(val);
|
|
|
|
chkval.arre = chkval.arrb + val->size;
|
|
|
|
chkval.values = STRPTR(val);
|
|
|
|
chkval.operand = GETOPERAND(query);
|
|
|
|
result = TS_execute(
|
2003-08-04 08:43:34 +08:00
|
|
|
GETQUERY(query),
|
|
|
|
&chkval,
|
|
|
|
true,
|
|
|
|
checkcondition_str
|
2003-07-21 18:27:44 +08:00
|
|
|
);
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* find left operand in polish notation view
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
findoprnd(ITEM * ptr, int4 *pos)
|
|
|
|
{
|
|
|
|
#ifdef BS_DEBUG
|
|
|
|
elog(DEBUG3, (ptr[*pos].type == OPR) ?
|
2003-07-25 01:52:50 +08:00
|
|
|
"%d %c" : "%d %d", *pos, ptr[*pos].val);
|
2003-07-21 18:27:44 +08:00
|
|
|
#endif
|
|
|
|
if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
|
|
|
|
{
|
|
|
|
ptr[*pos].left = 0;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
else if (ptr[*pos].val == (int4) '!')
|
|
|
|
{
|
|
|
|
ptr[*pos].left = 1;
|
|
|
|
(*pos)++;
|
|
|
|
findoprnd(ptr, pos);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ITEM *curitem = &ptr[*pos];
|
|
|
|
int4 tmp = *pos;
|
|
|
|
|
|
|
|
(*pos)++;
|
|
|
|
findoprnd(ptr, pos);
|
|
|
|
curitem->left = *pos - tmp;
|
|
|
|
findoprnd(ptr, pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* input
|
|
|
|
*/
|
|
|
|
static QUERYTYPE *
|
2003-08-04 08:43:34 +08:00
|
|
|
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
|
2003-07-21 18:27:44 +08:00
|
|
|
{
|
|
|
|
QPRS_STATE state;
|
|
|
|
int4 i;
|
|
|
|
QUERYTYPE *query;
|
|
|
|
int4 commonlen;
|
|
|
|
ITEM *ptr;
|
|
|
|
NODE *tmp;
|
|
|
|
int4 pos = 0;
|
|
|
|
|
|
|
|
#ifdef BS_DEBUG
|
|
|
|
char pbuf[16384],
|
|
|
|
*cur;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* init state */
|
|
|
|
state.buf = buf;
|
|
|
|
state.state = WAITOPERAND;
|
|
|
|
state.count = 0;
|
|
|
|
state.num = 0;
|
|
|
|
state.str = NULL;
|
2003-08-04 08:43:34 +08:00
|
|
|
state.cfg_id = cfg_id;
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
/* init value parser's state */
|
|
|
|
state.valstate.oprisdelim = true;
|
|
|
|
state.valstate.len = 32;
|
|
|
|
state.valstate.word = (char *) palloc(state.valstate.len);
|
|
|
|
|
|
|
|
/* init list of operand */
|
|
|
|
state.sumlen = 0;
|
|
|
|
state.lenop = 64;
|
|
|
|
state.curop = state.op = (char *) palloc(state.lenop);
|
|
|
|
*(state.curop) = '\0';
|
|
|
|
|
|
|
|
/* parse query & make polish notation (postfix, but in reverse order) */
|
|
|
|
makepol(&state, pushval);
|
|
|
|
pfree(state.valstate.word);
|
|
|
|
if (!state.num)
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("empty query")));
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
/* make finish struct */
|
|
|
|
commonlen = COMPUTESIZE(state.num, state.sumlen);
|
|
|
|
query = (QUERYTYPE *) palloc(commonlen);
|
|
|
|
query->len = commonlen;
|
|
|
|
query->size = state.num;
|
|
|
|
ptr = GETQUERY(query);
|
|
|
|
|
|
|
|
/* set item in polish notation */
|
|
|
|
for (i = 0; i < state.num; i++)
|
|
|
|
{
|
|
|
|
ptr[i].weight = state.str->weight;
|
|
|
|
ptr[i].type = state.str->type;
|
|
|
|
ptr[i].val = state.str->val;
|
|
|
|
ptr[i].distance = state.str->distance;
|
|
|
|
ptr[i].length = state.str->length;
|
|
|
|
tmp = state.str->next;
|
|
|
|
pfree(state.str);
|
|
|
|
state.str = tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* set user friendly-operand view */
|
|
|
|
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
|
|
|
|
pfree(state.op);
|
|
|
|
|
|
|
|
/* set left operand's position for every operator */
|
|
|
|
pos = 0;
|
|
|
|
findoprnd(ptr, &pos);
|
|
|
|
|
|
|
|
#ifdef BS_DEBUG
|
|
|
|
cur = pbuf;
|
|
|
|
*cur = '\0';
|
|
|
|
for (i = 0; i < query->size; i++)
|
|
|
|
{
|
|
|
|
if (ptr[i].type == OPR)
|
|
|
|
sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
|
|
|
|
else
|
|
|
|
sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
|
|
|
|
cur = strchr(cur, '\0');
|
|
|
|
}
|
|
|
|
elog(DEBUG3, "POR: %s", pbuf);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return query;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* in without morphology
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsquery_in(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* out function
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
ITEM *curpol;
|
|
|
|
char *buf;
|
|
|
|
char *cur;
|
|
|
|
char *op;
|
|
|
|
int4 buflen;
|
|
|
|
} INFIX;
|
|
|
|
|
|
|
|
#define RESIZEBUF(inf,addsize) \
|
|
|
|
while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
|
|
|
|
{ \
|
|
|
|
int4 len = inf->cur - inf->buf; \
|
|
|
|
inf->buflen *= 2; \
|
|
|
|
inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
|
|
|
|
inf->cur = inf->buf + len; \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* recursive walk on tree and print it in
|
|
|
|
* infix (human-readable) view
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
infix(INFIX * in, bool first)
|
|
|
|
{
|
|
|
|
if (in->curpol->type == VAL)
|
|
|
|
{
|
|
|
|
char *op = in->op + in->curpol->distance;
|
|
|
|
|
|
|
|
RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
|
|
|
|
*(in->cur) = '\'';
|
|
|
|
in->cur++;
|
|
|
|
while (*op)
|
|
|
|
{
|
|
|
|
if (*op == '\'')
|
|
|
|
{
|
|
|
|
*(in->cur) = '\\';
|
|
|
|
in->cur++;
|
|
|
|
}
|
|
|
|
*(in->cur) = *op;
|
|
|
|
op++;
|
|
|
|
in->cur++;
|
|
|
|
}
|
|
|
|
*(in->cur) = '\'';
|
|
|
|
in->cur++;
|
2003-08-04 08:43:34 +08:00
|
|
|
if (in->curpol->weight)
|
|
|
|
{
|
|
|
|
*(in->cur) = ':';
|
|
|
|
in->cur++;
|
|
|
|
if (in->curpol->weight & (1 << 3))
|
|
|
|
{
|
|
|
|
*(in->cur) = 'A';
|
|
|
|
in->cur++;
|
|
|
|
}
|
|
|
|
if (in->curpol->weight & (1 << 2))
|
|
|
|
{
|
|
|
|
*(in->cur) = 'B';
|
|
|
|
in->cur++;
|
|
|
|
}
|
|
|
|
if (in->curpol->weight & (1 << 1))
|
|
|
|
{
|
|
|
|
*(in->cur) = 'C';
|
|
|
|
in->cur++;
|
|
|
|
}
|
|
|
|
if (in->curpol->weight & 1)
|
|
|
|
{
|
|
|
|
*(in->cur) = 'D';
|
|
|
|
in->cur++;
|
|
|
|
}
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
*(in->cur) = '\0';
|
|
|
|
in->curpol++;
|
|
|
|
}
|
|
|
|
else if (in->curpol->val == (int4) '!')
|
|
|
|
{
|
|
|
|
bool isopr = false;
|
|
|
|
|
|
|
|
RESIZEBUF(in, 1);
|
|
|
|
*(in->cur) = '!';
|
|
|
|
in->cur++;
|
|
|
|
*(in->cur) = '\0';
|
|
|
|
in->curpol++;
|
|
|
|
if (in->curpol->type == OPR)
|
|
|
|
{
|
|
|
|
isopr = true;
|
|
|
|
RESIZEBUF(in, 2);
|
|
|
|
sprintf(in->cur, "( ");
|
|
|
|
in->cur = strchr(in->cur, '\0');
|
|
|
|
}
|
|
|
|
infix(in, isopr);
|
|
|
|
if (isopr)
|
|
|
|
{
|
|
|
|
RESIZEBUF(in, 2);
|
|
|
|
sprintf(in->cur, " )");
|
|
|
|
in->cur = strchr(in->cur, '\0');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int4 op = in->curpol->val;
|
|
|
|
INFIX nrm;
|
|
|
|
|
|
|
|
in->curpol++;
|
|
|
|
if (op == (int4) '|' && !first)
|
|
|
|
{
|
|
|
|
RESIZEBUF(in, 2);
|
|
|
|
sprintf(in->cur, "( ");
|
|
|
|
in->cur = strchr(in->cur, '\0');
|
|
|
|
}
|
|
|
|
|
|
|
|
nrm.curpol = in->curpol;
|
|
|
|
nrm.op = in->op;
|
|
|
|
nrm.buflen = 16;
|
|
|
|
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
|
|
|
|
|
|
|
/* get right operand */
|
|
|
|
infix(&nrm, false);
|
|
|
|
|
|
|
|
/* get & print left operand */
|
|
|
|
in->curpol = nrm.curpol;
|
|
|
|
infix(in, false);
|
|
|
|
|
|
|
|
/* print operator & right operand */
|
|
|
|
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
|
|
|
|
sprintf(in->cur, " %c %s", op, nrm.buf);
|
|
|
|
in->cur = strchr(in->cur, '\0');
|
|
|
|
pfree(nrm.buf);
|
|
|
|
|
|
|
|
if (op == (int4) '|' && !first)
|
|
|
|
{
|
|
|
|
RESIZEBUF(in, 2);
|
|
|
|
sprintf(in->cur, " )");
|
|
|
|
in->cur = strchr(in->cur, '\0');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsquery_out(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
|
|
|
INFIX nrm;
|
|
|
|
|
|
|
|
if (query->size == 0)
|
|
|
|
{
|
|
|
|
char *b = palloc(1);
|
|
|
|
|
|
|
|
*b = '\0';
|
|
|
|
PG_RETURN_POINTER(b);
|
|
|
|
}
|
|
|
|
nrm.curpol = GETQUERY(query);
|
|
|
|
nrm.buflen = 32;
|
|
|
|
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
|
|
|
*(nrm.cur) = '\0';
|
|
|
|
nrm.op = GETOPERAND(query);
|
|
|
|
infix(&nrm, true);
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(query, 0);
|
|
|
|
PG_RETURN_POINTER(nrm.buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* debug function, used only for view query
|
|
|
|
* which will be executed in non-leaf pages in index
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsquerytree(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
|
|
|
INFIX nrm;
|
|
|
|
text *res;
|
|
|
|
ITEM *q;
|
|
|
|
int4 len;
|
|
|
|
|
|
|
|
|
|
|
|
if (query->size == 0)
|
|
|
|
{
|
|
|
|
res = (text *) palloc(VARHDRSZ);
|
|
|
|
VARATT_SIZEP(res) = VARHDRSZ;
|
|
|
|
PG_RETURN_POINTER(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
q = clean_NOT_v2(GETQUERY(query), &len);
|
|
|
|
|
|
|
|
if (!q)
|
|
|
|
{
|
|
|
|
res = (text *) palloc(1 + VARHDRSZ);
|
|
|
|
VARATT_SIZEP(res) = 1 + VARHDRSZ;
|
|
|
|
*((char *) VARDATA(res)) = 'T';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
nrm.curpol = q;
|
|
|
|
nrm.buflen = 32;
|
|
|
|
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
|
|
|
*(nrm.cur) = '\0';
|
|
|
|
nrm.op = GETOPERAND(query);
|
|
|
|
infix(&nrm, true);
|
|
|
|
|
|
|
|
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
|
|
|
|
VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
|
|
|
|
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
|
|
|
|
pfree(q);
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(query, 0);
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
2003-08-04 08:43:34 +08:00
|
|
|
to_tsquery(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *in = PG_GETARG_TEXT_P(1);
|
|
|
|
char *str;
|
2003-07-21 18:27:44 +08:00
|
|
|
QUERYTYPE *query;
|
|
|
|
ITEM *res;
|
|
|
|
int4 len;
|
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
str = text2char(in);
|
|
|
|
PG_FREE_IF_COPY(in, 1);
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
|
|
|
|
res = clean_fakeval_v2(GETQUERY(query), &len);
|
|
|
|
if (!res)
|
|
|
|
{
|
|
|
|
query->len = HDRSIZEQT;
|
|
|
|
query->size = 0;
|
|
|
|
PG_RETURN_POINTER(query);
|
|
|
|
}
|
|
|
|
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
|
|
|
|
pfree(res);
|
|
|
|
PG_RETURN_POINTER(query);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
2003-08-04 08:43:34 +08:00
|
|
|
to_tsquery_name(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *name = PG_GETARG_TEXT_P(0);
|
|
|
|
Datum res = DirectFunctionCall2(
|
|
|
|
to_tsquery,
|
|
|
|
Int32GetDatum(name2id_cfg(name)),
|
|
|
|
PG_GETARG_DATUM(1)
|
2003-07-21 18:27:44 +08:00
|
|
|
);
|
2003-08-04 08:43:34 +08:00
|
|
|
|
|
|
|
PG_FREE_IF_COPY(name, 1);
|
2003-07-21 18:27:44 +08:00
|
|
|
PG_RETURN_DATUM(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
2003-08-04 08:43:34 +08:00
|
|
|
to_tsquery_current(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall2(
|
|
|
|
to_tsquery,
|
|
|
|
Int32GetDatum(get_currcfg()),
|
|
|
|
PG_GETARG_DATUM(0)
|
|
|
|
));
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|