From a8b5d6dc260b740b5d754c7be18321e546dc4408 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 31 Aug 2007 16:33:36 +0000 Subject: [PATCH] Place GiST and GIN text search indexes as secondary items under the main "index" entries for GIN/GiST. --- doc/src/sgml/textsearch.sgml | 69 ++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 04c97df2fa..f3e8d25e67 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -328,7 +328,7 @@ ORDER BY dlm DESC LIMIT 10; CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', body)); - + Notice that the 2-argument version of to_tsvector is used. Only text search functions which specify a configuration name can be used in expression indexes (). @@ -405,7 +405,7 @@ ORDER BY rank DESC LIMIT 10; column current anytime title or body changes. Keep in mind that, just like with expression indexes, it is important to specify the configuration name when creating text search data types - inside triggers so the column's contents are not affected by changes to + inside triggers so the column's contents are not affected by changes to default_text_search_config. @@ -448,13 +448,13 @@ SELECT to_tsvector('english', 'a fat cat sat on a mat - it ate a fat rats'); - + In the example above we see that the resulting tsvector does not contain the words a, on, or it, the word rats became rat, and the punctuation sign - was - ignored. - + ignored. + The to_tsvector function internally calls a parser @@ -487,31 +487,31 @@ SELECT to_tsvector('english', 'a fat cat sat on a mat - it ate a fat rats'); SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats'); - Alias | Description | Token | Dictionaries | Lexized token + Alias | Description | Token | Dictionaries | Lexized token -------+---------------+-------+--------------+---------------- lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | fat | {english} | english: {fat} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | cat | {english} | english: {cat} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | sat | {english} | english: {sat} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | on | {english} | english: {} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | mat | {english} | english: {mat} - blank | Space symbols | | | - blank | Space symbols | - | | + blank | Space symbols | | | + blank | Space symbols | - | | lword | Latin word | it | {english} | english: {} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | ate | {english} | english: {ate} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | fat | {english} | english: {fat} - blank | Space symbols | | | + blank | Space symbols | | | lword | Latin word | rats | {english} | english: {rat} (24 rows) @@ -688,7 +688,7 @@ SELECT * FROM ts_token_type('default'); {D-weight, C-weight, B-weight, A-weight} - + If no weights are provided, then these defaults are used: @@ -943,7 +943,7 @@ SELECT ts_headline('a b c', 'c'::tsquery); a b <b>c</b> SELECT ts_headline('a b c', 'c'::tsquery, 'StartSel=<,StopSel=>'); - ts_headline + ts_headline ------------- a b <c> @@ -989,7 +989,7 @@ ORDER BY rank DESC LIMIT 10) AS foo; - Some examples of normalization: + Some examples of normalization: @@ -998,7 +998,7 @@ ORDER BY rank DESC LIMIT 10) AS foo; Linguistic - ispell dictionaries try to reduce input words to a normalized form; stemmer dictionaries remove word endings - + Identical URL locations are identified and canonicalized: @@ -1113,7 +1113,7 @@ SELECT ts_lexize('english_stem', 'stars'); Stop Words - + Stop words are words which are very common, appear in almost every document, and have no discrimination value. Therefore, they can be ignored @@ -1207,7 +1207,7 @@ SELECT ts_lexize('public.simple_dict','The'); SELECT * FROM ts_debug('english','Paris'); - Alias | Description | Token | Dictionaries | Lexized token + Alias | Description | Token | Dictionaries | Lexized token -------+-------------+-------+----------------+---------------------- lword | Latin word | Paris | {english_stem} | english_stem: {pari} (1 row) @@ -1219,7 +1219,7 @@ ALTER TEXT SEARCH CONFIGURATION english ALTER MAPPING FOR lword WITH synonym, english_stem; SELECT * FROM ts_debug('english','Paris'); - Alias | Description | Token | Dictionaries | Lexized token + Alias | Description | Token | Dictionaries | Lexized token -------+-------------+-------+------------------------+------------------ lword | Latin word | Paris | {synonym,english_stem} | synonym: {paris} (1 row) @@ -1824,11 +1824,10 @@ SHOW default_text_search_config; GiST - CREATE INDEX name ON table USING gist(column); @@ -1851,11 +1850,11 @@ SHOW default_text_search_config; GIN - + CREATE INDEX name ON table USING gin(column); @@ -2079,7 +2078,7 @@ EXPLAIN SELECT * FROM apod WHERE textsearch @@@ to_tsquery('supernovae:a'); List of fulltext configurations Schema | Name | Description ----------+---------------------------- - fulltext | fulltext_cfg | + fulltext | fulltext_cfg | public | fulltext_cfg | @@ -2482,11 +2481,11 @@ dinit_intdict(PG_FUNCTION_ARGS) { PG_FREE_IF_COPY(in, 0); pcfg=cfg; - while (pcfg->key) + while (pcfg->key) { if (strcasecmp("MAXLEN", pcfg->key) == 0) d->maxlen=atoi(pcfg->value); - else if ( strcasecmp("REJECTLONG", pcfg->key) == 0) + else if ( strcasecmp("REJECTLONG", pcfg->key) == 0) { if ( strcasecmp("true", pcfg->value) == 0 ) d->rejectlong=true; @@ -2522,7 +2521,7 @@ dlexize_intdict(PG_FUNCTION_ARGS) if (PG_GETARG_INT32(2) > d->maxlen) { - if (d->rejectlong) + if (d->rejectlong) { /* stop, return void array */ pfree(txt); res[0].lexeme = NULL; @@ -2798,14 +2797,14 @@ Datum testprs_getlexeme(PG_FUNCTION_ARGS) /* blank type */ type = 12; /* go to the next non-white-space character */ - while ((pst->buffer)[pst->pos] == ' ' && + while ((pst->buffer)[pst->pos] == ' ' && pst->pos < pst->len) (pst->pos)++; } else { /* word type */ type = 3; /* go to the next white-space character */ - while ((pst->buffer)[pst->pos] != ' ' && + while ((pst->buffer)[pst->pos] != ' ' && pst->pos < pst->len) (pst->pos)++; }