From 5e2707c45fb9ef4dd1250560a049265721b60907 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 27 Jan 2006 16:32:31 +0000 Subject: [PATCH] Snowball multibyte. It's a pity, but snowball sources is very diferent for multibyte and singlebyte encodings, so we should have snowball for every encodings. I hope that finalize multibyte support work in tsearch2, but testing is needed... --- contrib/tsearch2/Makefile | 4 +- contrib/tsearch2/dict_snowball.c | 44 +- contrib/tsearch2/expected/tsearch2.out | 22 +- contrib/tsearch2/snowball/Makefile | 4 +- contrib/tsearch2/snowball/russian_stem_UTF8.c | 709 ++++++++++++++++++ contrib/tsearch2/snowball/russian_stem_UTF8.h | 16 + contrib/tsearch2/stopword/russian.stop.utf8 | 151 ++++ contrib/tsearch2/tsearch.sql.in | 51 +- 8 files changed, 974 insertions(+), 27 deletions(-) create mode 100644 contrib/tsearch2/snowball/russian_stem_UTF8.c create mode 100644 contrib/tsearch2/snowball/russian_stem_UTF8.h create mode 100644 contrib/tsearch2/stopword/russian.stop.utf8 diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile index 2ef904ddb4..b7e4915ce7 100644 --- a/contrib/tsearch2/Makefile +++ b/contrib/tsearch2/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $ MODULE_big = tsearch2 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ @@ -16,7 +16,7 @@ OBJS += $(SUBDIROBJS) PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser -DATA = stopword/english.stop stopword/russian.stop +DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 DATA_built = tsearch2.sql untsearch2.sql DOCS = README.tsearch2 REGRESS = tsearch2 diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c index bbd44246b8..92e25b6e2e 100644 --- a/contrib/tsearch2/dict_snowball.c +++ b/contrib/tsearch2/dict_snowball.c @@ -10,6 +10,7 @@ #include "snowball/header.h" #include "snowball/english_stem.h" #include "snowball/russian_stem.h" +#include "snowball/russian_stem_UTF8.h" #include "ts_locale.h" typedef struct @@ -23,8 +24,11 @@ typedef struct PG_FUNCTION_INFO_V1(snb_en_init); Datum snb_en_init(PG_FUNCTION_ARGS); -PG_FUNCTION_INFO_V1(snb_ru_init); -Datum snb_ru_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(snb_ru_init_koi8); +Datum snb_ru_init_koi8(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(snb_ru_init_utf8); +Datum snb_ru_init_utf8(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(snb_lexize); Datum snb_lexize(PG_FUNCTION_ARGS); @@ -64,7 +68,7 @@ snb_en_init(PG_FUNCTION_ARGS) } Datum -snb_ru_init(PG_FUNCTION_ARGS) +snb_ru_init_koi8(PG_FUNCTION_ARGS) { DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball)); @@ -97,6 +101,40 @@ snb_ru_init(PG_FUNCTION_ARGS) PG_RETURN_POINTER(d); } +Datum +snb_ru_init_utf8(PG_FUNCTION_ARGS) +{ + DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball)); + + if (!d) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + memset(d, 0, sizeof(DictSnowball)); + d->stoplist.wordop = lowerstr; + + if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) + { + text *in = PG_GETARG_TEXT_P(0); + + readstoplist(in, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + PG_FREE_IF_COPY(in, 0); + } + + d->z = russian_UTF_8_create_env(); + if (!d->z) + { + freestoplist(&(d->stoplist)); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + d->stem = russian_UTF_8_stem; + + PG_RETURN_POINTER(d); +} + Datum snb_lexize(PG_FUNCTION_ARGS) { diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index a98c2216a8..67b6576e28 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -4,21 +4,21 @@ -- \set ECHO none psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict" -psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser" -psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg" -psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap" -psql:tsearch2.sql:337: NOTICE: type "tsvector" is not yet defined +psql:tsearch2.sql:158: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser" +psql:tsearch2.sql:257: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg" +psql:tsearch2.sql:264: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap" +psql:tsearch2.sql:370: NOTICE: type "tsvector" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell -psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined +psql:tsearch2.sql:375: NOTICE: argument type tsvector is only a shell +psql:tsearch2.sql:429: NOTICE: type "tsquery" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell -psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined +psql:tsearch2.sql:434: NOTICE: argument type tsquery is only a shell +psql:tsearch2.sql:592: NOTICE: type "gtsvector" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell -psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined +psql:tsearch2.sql:597: NOTICE: argument type gtsvector is only a shell +psql:tsearch2.sql:1087: NOTICE: type "gtsq" is not yet defined DETAIL: Creating a shell type definition. -psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell +psql:tsearch2.sql:1092: NOTICE: argument type gtsq is only a shell --tsvector SELECT '1'::tsvector; tsvector diff --git a/contrib/tsearch2/snowball/Makefile b/contrib/tsearch2/snowball/Makefile index bb107a1ff9..aa94658f65 100644 --- a/contrib/tsearch2/snowball/Makefile +++ b/contrib/tsearch2/snowball/Makefile @@ -1,6 +1,6 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.8 2005/10/18 01:30:48 tgl Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.9 2006/01/27 16:32:31 teodor Exp $ -SUBOBJS = english_stem.o api.o russian_stem.o utilities.o +SUBOBJS = english_stem.o api.o russian_stem.o russian_stem_UTF8.o utilities.o EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) diff --git a/contrib/tsearch2/snowball/russian_stem_UTF8.c b/contrib/tsearch2/snowball/russian_stem_UTF8.c new file mode 100644 index 0000000000..99f84db790 --- /dev/null +++ b/contrib/tsearch2/snowball/russian_stem_UTF8.c @@ -0,0 +1,709 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +extern int russian_UTF_8_stem(struct SN_env * z); +static int r_tidy_up(struct SN_env * z); +static int r_derivational(struct SN_env * z); +static int r_noun(struct SN_env * z); +static int r_verb(struct SN_env * z); +static int r_reflexive(struct SN_env * z); +static int r_adjectival(struct SN_env * z); +static int r_adjective(struct SN_env * z); +static int r_perfective_gerund(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); + +extern struct SN_env * russian_UTF_8_create_env(void); +extern void russian_UTF_8_close_env(struct SN_env * z); + +static symbol s_0_0[10] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static symbol s_0_1[12] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static symbol s_0_2[12] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static symbol s_0_3[2] = { 0xD0, 0xB2 }; +static symbol s_0_4[4] = { 0xD1, 0x8B, 0xD0, 0xB2 }; +static symbol s_0_5[4] = { 0xD0, 0xB8, 0xD0, 0xB2 }; +static symbol s_0_6[6] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; +static symbol s_0_7[8] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; +static symbol s_0_8[8] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; + +static struct among a_0[9] = +{ +/* 0 */ { 10, s_0_0, -1, 1, 0}, +/* 1 */ { 12, s_0_1, 0, 2, 0}, +/* 2 */ { 12, s_0_2, 0, 2, 0}, +/* 3 */ { 2, s_0_3, -1, 1, 0}, +/* 4 */ { 4, s_0_4, 3, 2, 0}, +/* 5 */ { 4, s_0_5, 3, 2, 0}, +/* 6 */ { 6, s_0_6, -1, 1, 0}, +/* 7 */ { 8, s_0_7, 6, 2, 0}, +/* 8 */ { 8, s_0_8, 6, 2, 0} +}; + +static symbol s_1_0[6] = { 0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83 }; +static symbol s_1_1[6] = { 0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83 }; +static symbol s_1_2[4] = { 0xD1, 0x8B, 0xD1, 0x85 }; +static symbol s_1_3[4] = { 0xD0, 0xB8, 0xD1, 0x85 }; +static symbol s_1_4[4] = { 0xD1, 0x83, 0xD1, 0x8E }; +static symbol s_1_5[4] = { 0xD1, 0x8E, 0xD1, 0x8E }; +static symbol s_1_6[4] = { 0xD0, 0xB5, 0xD1, 0x8E }; +static symbol s_1_7[4] = { 0xD0, 0xBE, 0xD1, 0x8E }; +static symbol s_1_8[4] = { 0xD1, 0x8F, 0xD1, 0x8F }; +static symbol s_1_9[4] = { 0xD0, 0xB0, 0xD1, 0x8F }; +static symbol s_1_10[4] = { 0xD1, 0x8B, 0xD0, 0xB5 }; +static symbol s_1_11[4] = { 0xD0, 0xB5, 0xD0, 0xB5 }; +static symbol s_1_12[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; +static symbol s_1_13[4] = { 0xD0, 0xBE, 0xD0, 0xB5 }; +static symbol s_1_14[6] = { 0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8 }; +static symbol s_1_15[6] = { 0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8 }; +static symbol s_1_16[4] = { 0xD1, 0x8B, 0xD0, 0xB9 }; +static symbol s_1_17[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static symbol s_1_18[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; +static symbol s_1_19[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; +static symbol s_1_20[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; +static symbol s_1_21[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static symbol s_1_22[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; +static symbol s_1_23[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; +static symbol s_1_24[6] = { 0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE }; +static symbol s_1_25[6] = { 0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE }; + +static struct among a_1[26] = +{ +/* 0 */ { 6, s_1_0, -1, 1, 0}, +/* 1 */ { 6, s_1_1, -1, 1, 0}, +/* 2 */ { 4, s_1_2, -1, 1, 0}, +/* 3 */ { 4, s_1_3, -1, 1, 0}, +/* 4 */ { 4, s_1_4, -1, 1, 0}, +/* 5 */ { 4, s_1_5, -1, 1, 0}, +/* 6 */ { 4, s_1_6, -1, 1, 0}, +/* 7 */ { 4, s_1_7, -1, 1, 0}, +/* 8 */ { 4, s_1_8, -1, 1, 0}, +/* 9 */ { 4, s_1_9, -1, 1, 0}, +/* 10 */ { 4, s_1_10, -1, 1, 0}, +/* 11 */ { 4, s_1_11, -1, 1, 0}, +/* 12 */ { 4, s_1_12, -1, 1, 0}, +/* 13 */ { 4, s_1_13, -1, 1, 0}, +/* 14 */ { 6, s_1_14, -1, 1, 0}, +/* 15 */ { 6, s_1_15, -1, 1, 0}, +/* 16 */ { 4, s_1_16, -1, 1, 0}, +/* 17 */ { 4, s_1_17, -1, 1, 0}, +/* 18 */ { 4, s_1_18, -1, 1, 0}, +/* 19 */ { 4, s_1_19, -1, 1, 0}, +/* 20 */ { 4, s_1_20, -1, 1, 0}, +/* 21 */ { 4, s_1_21, -1, 1, 0}, +/* 22 */ { 4, s_1_22, -1, 1, 0}, +/* 23 */ { 4, s_1_23, -1, 1, 0}, +/* 24 */ { 6, s_1_24, -1, 1, 0}, +/* 25 */ { 6, s_1_25, -1, 1, 0} +}; + +static symbol s_2_0[4] = { 0xD0, 0xB2, 0xD1, 0x88 }; +static symbol s_2_1[6] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88 }; +static symbol s_2_2[6] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88 }; +static symbol s_2_3[2] = { 0xD1, 0x89 }; +static symbol s_2_4[4] = { 0xD1, 0x8E, 0xD1, 0x89 }; +static symbol s_2_5[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89 }; +static symbol s_2_6[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static symbol s_2_7[4] = { 0xD0, 0xBD, 0xD0, 0xBD }; + +static struct among a_2[8] = +{ +/* 0 */ { 4, s_2_0, -1, 1, 0}, +/* 1 */ { 6, s_2_1, 0, 2, 0}, +/* 2 */ { 6, s_2_2, 0, 2, 0}, +/* 3 */ { 2, s_2_3, -1, 1, 0}, +/* 4 */ { 4, s_2_4, 3, 1, 0}, +/* 5 */ { 6, s_2_5, 4, 2, 0}, +/* 6 */ { 4, s_2_6, -1, 1, 0}, +/* 7 */ { 4, s_2_7, -1, 1, 0} +}; + +static symbol s_3_0[4] = { 0xD1, 0x81, 0xD1, 0x8C }; +static symbol s_3_1[4] = { 0xD1, 0x81, 0xD1, 0x8F }; + +static struct among a_3[2] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0} +}; + +static symbol s_4_0[4] = { 0xD1, 0x8B, 0xD1, 0x82 }; +static symbol s_4_1[4] = { 0xD1, 0x8E, 0xD1, 0x82 }; +static symbol s_4_2[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82 }; +static symbol s_4_3[4] = { 0xD1, 0x8F, 0xD1, 0x82 }; +static symbol s_4_4[4] = { 0xD0, 0xB5, 0xD1, 0x82 }; +static symbol s_4_5[6] = { 0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82 }; +static symbol s_4_6[4] = { 0xD0, 0xB8, 0xD1, 0x82 }; +static symbol s_4_7[4] = { 0xD0, 0xBD, 0xD1, 0x8B }; +static symbol s_4_8[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B }; +static symbol s_4_9[4] = { 0xD1, 0x82, 0xD1, 0x8C }; +static symbol s_4_10[6] = { 0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C }; +static symbol s_4_11[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C }; +static symbol s_4_12[6] = { 0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C }; +static symbol s_4_13[6] = { 0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C }; +static symbol s_4_14[2] = { 0xD1, 0x8E }; +static symbol s_4_15[4] = { 0xD1, 0x83, 0xD1, 0x8E }; +static symbol s_4_16[4] = { 0xD0, 0xBB, 0xD0, 0xB0 }; +static symbol s_4_17[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0 }; +static symbol s_4_18[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0 }; +static symbol s_4_19[4] = { 0xD0, 0xBD, 0xD0, 0xB0 }; +static symbol s_4_20[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0 }; +static symbol s_4_21[6] = { 0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5 }; +static symbol s_4_22[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5 }; +static symbol s_4_23[6] = { 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static symbol s_4_24[8] = { 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static symbol s_4_25[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static symbol s_4_26[4] = { 0xD0, 0xBB, 0xD0, 0xB8 }; +static symbol s_4_27[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8 }; +static symbol s_4_28[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8 }; +static symbol s_4_29[2] = { 0xD0, 0xB9 }; +static symbol s_4_30[4] = { 0xD1, 0x83, 0xD0, 0xB9 }; +static symbol s_4_31[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static symbol s_4_32[2] = { 0xD0, 0xBB }; +static symbol s_4_33[4] = { 0xD1, 0x8B, 0xD0, 0xBB }; +static symbol s_4_34[4] = { 0xD0, 0xB8, 0xD0, 0xBB }; +static symbol s_4_35[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; +static symbol s_4_36[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static symbol s_4_37[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; +static symbol s_4_38[2] = { 0xD0, 0xBD }; +static symbol s_4_39[4] = { 0xD0, 0xB5, 0xD0, 0xBD }; +static symbol s_4_40[4] = { 0xD0, 0xBB, 0xD0, 0xBE }; +static symbol s_4_41[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE }; +static symbol s_4_42[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE }; +static symbol s_4_43[4] = { 0xD0, 0xBD, 0xD0, 0xBE }; +static symbol s_4_44[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE }; +static symbol s_4_45[6] = { 0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE }; + +static struct among a_4[46] = +{ +/* 0 */ { 4, s_4_0, -1, 2, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 6, s_4_2, 1, 2, 0}, +/* 3 */ { 4, s_4_3, -1, 2, 0}, +/* 4 */ { 4, s_4_4, -1, 1, 0}, +/* 5 */ { 6, s_4_5, 4, 2, 0}, +/* 6 */ { 4, s_4_6, -1, 2, 0}, +/* 7 */ { 4, s_4_7, -1, 1, 0}, +/* 8 */ { 6, s_4_8, 7, 2, 0}, +/* 9 */ { 4, s_4_9, -1, 1, 0}, +/* 10 */ { 6, s_4_10, 9, 2, 0}, +/* 11 */ { 6, s_4_11, 9, 2, 0}, +/* 12 */ { 6, s_4_12, -1, 1, 0}, +/* 13 */ { 6, s_4_13, -1, 2, 0}, +/* 14 */ { 2, s_4_14, -1, 2, 0}, +/* 15 */ { 4, s_4_15, 14, 2, 0}, +/* 16 */ { 4, s_4_16, -1, 1, 0}, +/* 17 */ { 6, s_4_17, 16, 2, 0}, +/* 18 */ { 6, s_4_18, 16, 2, 0}, +/* 19 */ { 4, s_4_19, -1, 1, 0}, +/* 20 */ { 6, s_4_20, 19, 2, 0}, +/* 21 */ { 6, s_4_21, -1, 1, 0}, +/* 22 */ { 6, s_4_22, -1, 2, 0}, +/* 23 */ { 6, s_4_23, -1, 1, 0}, +/* 24 */ { 8, s_4_24, 23, 2, 0}, +/* 25 */ { 8, s_4_25, 23, 2, 0}, +/* 26 */ { 4, s_4_26, -1, 1, 0}, +/* 27 */ { 6, s_4_27, 26, 2, 0}, +/* 28 */ { 6, s_4_28, 26, 2, 0}, +/* 29 */ { 2, s_4_29, -1, 1, 0}, +/* 30 */ { 4, s_4_30, 29, 2, 0}, +/* 31 */ { 4, s_4_31, 29, 2, 0}, +/* 32 */ { 2, s_4_32, -1, 1, 0}, +/* 33 */ { 4, s_4_33, 32, 2, 0}, +/* 34 */ { 4, s_4_34, 32, 2, 0}, +/* 35 */ { 4, s_4_35, -1, 2, 0}, +/* 36 */ { 4, s_4_36, -1, 1, 0}, +/* 37 */ { 4, s_4_37, -1, 2, 0}, +/* 38 */ { 2, s_4_38, -1, 1, 0}, +/* 39 */ { 4, s_4_39, 38, 2, 0}, +/* 40 */ { 4, s_4_40, -1, 1, 0}, +/* 41 */ { 6, s_4_41, 40, 2, 0}, +/* 42 */ { 6, s_4_42, 40, 2, 0}, +/* 43 */ { 4, s_4_43, -1, 1, 0}, +/* 44 */ { 6, s_4_44, 43, 2, 0}, +/* 45 */ { 6, s_4_45, 43, 1, 0} +}; + +static symbol s_5_0[2] = { 0xD1, 0x83 }; +static symbol s_5_1[4] = { 0xD1, 0x8F, 0xD1, 0x85 }; +static symbol s_5_2[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85 }; +static symbol s_5_3[4] = { 0xD0, 0xB0, 0xD1, 0x85 }; +static symbol s_5_4[2] = { 0xD1, 0x8B }; +static symbol s_5_5[2] = { 0xD1, 0x8C }; +static symbol s_5_6[2] = { 0xD1, 0x8E }; +static symbol s_5_7[4] = { 0xD1, 0x8C, 0xD1, 0x8E }; +static symbol s_5_8[4] = { 0xD0, 0xB8, 0xD1, 0x8E }; +static symbol s_5_9[2] = { 0xD1, 0x8F }; +static symbol s_5_10[4] = { 0xD1, 0x8C, 0xD1, 0x8F }; +static symbol s_5_11[4] = { 0xD0, 0xB8, 0xD1, 0x8F }; +static symbol s_5_12[2] = { 0xD0, 0xB0 }; +static symbol s_5_13[4] = { 0xD0, 0xB5, 0xD0, 0xB2 }; +static symbol s_5_14[4] = { 0xD0, 0xBE, 0xD0, 0xB2 }; +static symbol s_5_15[2] = { 0xD0, 0xB5 }; +static symbol s_5_16[4] = { 0xD1, 0x8C, 0xD0, 0xB5 }; +static symbol s_5_17[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; +static symbol s_5_18[2] = { 0xD0, 0xB8 }; +static symbol s_5_19[4] = { 0xD0, 0xB5, 0xD0, 0xB8 }; +static symbol s_5_20[4] = { 0xD0, 0xB8, 0xD0, 0xB8 }; +static symbol s_5_21[6] = { 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; +static symbol s_5_22[8] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; +static symbol s_5_23[6] = { 0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8 }; +static symbol s_5_24[2] = { 0xD0, 0xB9 }; +static symbol s_5_25[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static symbol s_5_26[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9 }; +static symbol s_5_27[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; +static symbol s_5_28[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; +static symbol s_5_29[4] = { 0xD1, 0x8F, 0xD0, 0xBC }; +static symbol s_5_30[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC }; +static symbol s_5_31[4] = { 0xD0, 0xB0, 0xD0, 0xBC }; +static symbol s_5_32[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static symbol s_5_33[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC }; +static symbol s_5_34[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; +static symbol s_5_35[2] = { 0xD0, 0xBE }; + +static struct among a_5[36] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 6, s_5_2, 1, 1, 0}, +/* 3 */ { 4, s_5_3, -1, 1, 0}, +/* 4 */ { 2, s_5_4, -1, 1, 0}, +/* 5 */ { 2, s_5_5, -1, 1, 0}, +/* 6 */ { 2, s_5_6, -1, 1, 0}, +/* 7 */ { 4, s_5_7, 6, 1, 0}, +/* 8 */ { 4, s_5_8, 6, 1, 0}, +/* 9 */ { 2, s_5_9, -1, 1, 0}, +/* 10 */ { 4, s_5_10, 9, 1, 0}, +/* 11 */ { 4, s_5_11, 9, 1, 0}, +/* 12 */ { 2, s_5_12, -1, 1, 0}, +/* 13 */ { 4, s_5_13, -1, 1, 0}, +/* 14 */ { 4, s_5_14, -1, 1, 0}, +/* 15 */ { 2, s_5_15, -1, 1, 0}, +/* 16 */ { 4, s_5_16, 15, 1, 0}, +/* 17 */ { 4, s_5_17, 15, 1, 0}, +/* 18 */ { 2, s_5_18, -1, 1, 0}, +/* 19 */ { 4, s_5_19, 18, 1, 0}, +/* 20 */ { 4, s_5_20, 18, 1, 0}, +/* 21 */ { 6, s_5_21, 18, 1, 0}, +/* 22 */ { 8, s_5_22, 21, 1, 0}, +/* 23 */ { 6, s_5_23, 18, 1, 0}, +/* 24 */ { 2, s_5_24, -1, 1, 0}, +/* 25 */ { 4, s_5_25, 24, 1, 0}, +/* 26 */ { 6, s_5_26, 25, 1, 0}, +/* 27 */ { 4, s_5_27, 24, 1, 0}, +/* 28 */ { 4, s_5_28, 24, 1, 0}, +/* 29 */ { 4, s_5_29, -1, 1, 0}, +/* 30 */ { 6, s_5_30, 29, 1, 0}, +/* 31 */ { 4, s_5_31, -1, 1, 0}, +/* 32 */ { 4, s_5_32, -1, 1, 0}, +/* 33 */ { 6, s_5_33, 32, 1, 0}, +/* 34 */ { 4, s_5_34, -1, 1, 0}, +/* 35 */ { 2, s_5_35, -1, 1, 0} +}; + +static symbol s_6_0[6] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82 }; +static symbol s_6_1[8] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C }; + +static struct among a_6[2] = +{ +/* 0 */ { 6, s_6_0, -1, 1, 0}, +/* 1 */ { 8, s_6_1, -1, 1, 0} +}; + +static symbol s_7_0[6] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88 }; +static symbol s_7_1[2] = { 0xD1, 0x8C }; +static symbol s_7_2[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5 }; +static symbol s_7_3[2] = { 0xD0, 0xBD }; + +static struct among a_7[4] = +{ +/* 0 */ { 6, s_7_0, -1, 1, 0}, +/* 1 */ { 2, s_7_1, -1, 3, 0}, +/* 2 */ { 8, s_7_2, -1, 1, 0}, +/* 3 */ { 2, s_7_3, -1, 2, 0} +}; + +static unsigned char g_v[] = { 33, 65, 8, 232 }; + +static symbol s_0[] = { 0xD0, 0xB0 }; +static symbol s_1[] = { 0xD1, 0x8F }; +static symbol s_2[] = { 0xD0, 0xB0 }; +static symbol s_3[] = { 0xD1, 0x8F }; +static symbol s_4[] = { 0xD0, 0xB0 }; +static symbol s_5[] = { 0xD1, 0x8F }; +static symbol s_6[] = { 0xD0, 0xBD }; +static symbol s_7[] = { 0xD0, 0xBD }; +static symbol s_8[] = { 0xD0, 0xBD }; +static symbol s_9[] = { 0xD0, 0xB8 }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c = z->c; /* do, line 61 */ + while(1) { /* gopast, line 62 */ + if (!(in_grouping_U(z, g_v, 1072, 1103))) goto lab1; + break; + lab1: + { int c = skip_utf8(z->p, z->c, 0, z->l, 1); + if (c < 0) goto lab0; + z->c = c; /* gopast, line 62 */ + } + } + z->I[0] = z->c; /* setmark pV, line 62 */ + while(1) { /* gopast, line 62 */ + if (!(out_grouping_U(z, g_v, 1072, 1103))) goto lab2; + break; + lab2: + { int c = skip_utf8(z->p, z->c, 0, z->l, 1); + if (c < 0) goto lab0; + z->c = c; /* gopast, line 62 */ + } + } + while(1) { /* gopast, line 63 */ + if (!(in_grouping_U(z, g_v, 1072, 1103))) goto lab3; + break; + lab3: + { int c = skip_utf8(z->p, z->c, 0, z->l, 1); + if (c < 0) goto lab0; + z->c = c; /* gopast, line 63 */ + } + } + while(1) { /* gopast, line 63 */ + if (!(out_grouping_U(z, g_v, 1072, 1103))) goto lab4; + break; + lab4: + { int c = skip_utf8(z->p, z->c, 0, z->l, 1); + if (c < 0) goto lab0; + z->c = c; /* gopast, line 63 */ + } + } + z->I[1] = z->c; /* setmark p2, line 63 */ + lab0: + z->c = c; + } + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_perfective_gerund(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 72 */ + among_var = find_among_b(z, a_0, 9); /* substring, line 72 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 72 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m = z->l - z->c; (void) m; /* or, line 76 */ + if (!(eq_s_b(z, 2, s_0))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 2, s_1))) return 0; + } + lab0: + { int ret; + ret = slice_del(z); /* delete, line 76 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret; + ret = slice_del(z); /* delete, line 83 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjective(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 88 */ + among_var = find_among_b(z, a_1, 26); /* substring, line 88 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 88 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret; + ret = slice_del(z); /* delete, line 97 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjectival(struct SN_env * z) { + int among_var; + { int ret = r_adjective(z); + if (ret == 0) return 0; /* call adjective, line 102 */ + if (ret < 0) return ret; + } + { int m = z->l - z->c; (void) m; /* try, line 109 */ + z->ket = z->c; /* [, line 110 */ + among_var = find_among_b(z, a_2, 8); /* substring, line 110 */ + if (!(among_var)) { z->c = z->l - m; goto lab0; } + z->bra = z->c; /* ], line 110 */ + switch(among_var) { + case 0: { z->c = z->l - m; goto lab0; } + case 1: + { int m = z->l - z->c; (void) m; /* or, line 115 */ + if (!(eq_s_b(z, 2, s_2))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m; + if (!(eq_s_b(z, 2, s_3))) { z->c = z->l - m; goto lab0; } + } + lab1: + { int ret; + ret = slice_del(z); /* delete, line 115 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret; + ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + return 1; +} + +static int r_reflexive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 129 */ + among_var = find_among_b(z, a_3, 2); /* substring, line 129 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 129 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret; + ret = slice_del(z); /* delete, line 132 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 137 */ + among_var = find_among_b(z, a_4, 46); /* substring, line 137 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 137 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m = z->l - z->c; (void) m; /* or, line 143 */ + if (!(eq_s_b(z, 2, s_4))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 2, s_5))) return 0; + } + lab0: + { int ret; + ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret; + ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_noun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 160 */ + among_var = find_among_b(z, a_5, 36); /* substring, line 160 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 160 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret; + ret = slice_del(z); /* delete, line 167 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_derivational(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 176 */ + among_var = find_among_b(z, a_6, 2); /* substring, line 176 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 176 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 176 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret; + ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_tidy_up(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 184 */ + among_var = find_among_b(z, a_7, 4); /* substring, line 184 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 184 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret; + ret = slice_del(z); /* delete, line 188 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 189 */ + if (!(eq_s_b(z, 2, s_6))) return 0; + z->bra = z->c; /* ], line 189 */ + if (!(eq_s_b(z, 2, s_7))) return 0; + { int ret; + ret = slice_del(z); /* delete, line 189 */ + if (ret < 0) return ret; + } + break; + case 2: + if (!(eq_s_b(z, 2, s_8))) return 0; + { int ret; + ret = slice_del(z); /* delete, line 192 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret; + ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int russian_UTF_8_stem(struct SN_env * z) { + { int c = z->c; /* do, line 201 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 201 */ + if (ret < 0) return ret; + } + lab0: + z->c = c; + } + z->lb = z->c; z->c = z->l; /* backwards, line 202 */ + + { int m3; /* setlimit, line 202 */ + int m = z->l - z->c; (void) m; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 202 */ + m3 = z->lb; z->lb = z->c; + z->c = z->l - m; + { int m = z->l - z->c; (void) m; /* do, line 203 */ + { int m = z->l - z->c; (void) m; /* or, line 204 */ + { int ret = r_perfective_gerund(z); + if (ret == 0) goto lab3; /* call perfective_gerund, line 204 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = z->l - m; + { int m = z->l - z->c; (void) m; /* try, line 205 */ + { int ret = r_reflexive(z); + if (ret == 0) { z->c = z->l - m; goto lab4; } /* call reflexive, line 205 */ + if (ret < 0) return ret; + } + lab4: + ; + } + { int m = z->l - z->c; (void) m; /* or, line 206 */ + { int ret = r_adjectival(z); + if (ret == 0) goto lab6; /* call adjectival, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m; + { int ret = r_verb(z); + if (ret == 0) goto lab7; /* call verb, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab7: + z->c = z->l - m; + { int ret = r_noun(z); + if (ret == 0) goto lab1; /* call noun, line 206 */ + if (ret < 0) return ret; + } + } + lab5: + ; + } + lab2: + lab1: + z->c = z->l - m; + } + { int m = z->l - z->c; (void) m; /* try, line 209 */ + z->ket = z->c; /* [, line 209 */ + if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m; goto lab8; } + z->bra = z->c; /* ], line 209 */ + { int ret; + ret = slice_del(z); /* delete, line 209 */ + if (ret < 0) return ret; + } + lab8: + ; + } + { int m = z->l - z->c; (void) m; /* do, line 212 */ + { int ret = r_derivational(z); + if (ret == 0) goto lab9; /* call derivational, line 212 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m; + } + { int m = z->l - z->c; (void) m; /* do, line 213 */ + { int ret = r_tidy_up(z); + if (ret == 0) goto lab10; /* call tidy_up, line 213 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m; + } + z->lb = m3; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * russian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void russian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z); } + diff --git a/contrib/tsearch2/snowball/russian_stem_UTF8.h b/contrib/tsearch2/snowball/russian_stem_UTF8.h new file mode 100644 index 0000000000..4ef774ddcc --- /dev/null +++ b/contrib/tsearch2/snowball/russian_stem_UTF8.h @@ -0,0 +1,16 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct SN_env * russian_UTF_8_create_env(void); +extern void russian_UTF_8_close_env(struct SN_env * z); + +extern int russian_UTF_8_stem(struct SN_env * z); + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/tsearch2/stopword/russian.stop.utf8 b/contrib/tsearch2/stopword/russian.stop.utf8 new file mode 100644 index 0000000000..ecb83d4a7f --- /dev/null +++ b/contrib/tsearch2/stopword/russian.stop.utf8 @@ -0,0 +1,151 @@ +и +в +во +не +что +он +на +я +с +со +как +а +то +все +она +так +его +но +да +ты +к +у +же +вы +за +бы +по +только +ее +мне +было +вот +от +меня +еще +нет +о +из +ему +теперь +когда +даже +ну +вдруг +ли +если +уже +или +ни +быть +был +него +до +вас +нибудь +опять +уж +вам +ведь +там +потом +себя +ничего +ей +может +они +тут +где +есть +надо +ней +для +мы +тебя +их +чем +была +сам +чтоб +без +будто +чего +раз +тоже +себе +под +будет +ж +тогда +кто +этот +того +потому +этого +какой +совсем +ним +здесь +этом +один +почти +мой +тем +чтобы +нее +сейчас +были +куда +зачем +всех +никогда +можно +при +наконец +два +об +другой +хоть +после +над +больше +тот +через +эти +нас +про +всего +них +какая +много +разве +три +эту +моя +впрочем +хорошо +свою +этой +перед +иногда +лучше +чуть +том +нельзя +такой +им +более +всегда +конечно +всю +между diff --git a/contrib/tsearch2/tsearch.sql.in b/contrib/tsearch2/tsearch.sql.in index 4fdf974d0d..07290838ff 100644 --- a/contrib/tsearch2/tsearch.sql.in +++ b/contrib/tsearch2/tsearch.sql.in @@ -82,17 +82,30 @@ insert into pg_ts_dict select 'English Stemmer. Snowball.' ; -CREATE FUNCTION snb_ru_init(internal) +CREATE FUNCTION snb_ru_init_koi8(internal) returns internal as 'MODULE_PATHNAME' language 'C'; insert into pg_ts_dict select - 'ru_stem', - 'snb_ru_init(internal)', + 'ru_stem_koi8', + 'snb_ru_init_koi8(internal)', 'contrib/russian.stop', 'snb_lexize(internal,internal,int4)', - 'Russian Stemmer. Snowball.' + 'Russian Stemmer. Snowball. KOI8 Encoding' +; + +CREATE FUNCTION snb_ru_init_utf8(internal) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +insert into pg_ts_dict select + 'ru_stem_utf8', + 'snb_ru_init_utf8(internal)', + 'contrib/russian.stop.utf8', + 'snb_lexize(internal,internal,int4)', + 'Russian Stemmer. Snowball. UTF8 Encoding' ; CREATE FUNCTION spell_init(internal) @@ -270,6 +283,7 @@ CREATE FUNCTION show_curcfg() insert into pg_ts_cfg values ('default', 'default','C'); insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R'); +insert into pg_ts_cfg values ('utf8_russian', 'default','ru_RU.UTF-8'); insert into pg_ts_cfg values ('simple', 'default'); insert into pg_ts_cfgmap values ('default', 'lword', '{en_stem}'); @@ -292,24 +306,43 @@ insert into pg_ts_cfgmap values ('default', 'float', '{simple}'); insert into pg_ts_cfgmap values ('default', 'int', '{simple}'); insert into pg_ts_cfgmap values ('default', 'uint', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'lword', '{en_stem}'); -insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem}'); -insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem_koi8}'); +insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem_koi8}'); insert into pg_ts_cfgmap values ('default_russian', 'email', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'url', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'host', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'sfloat', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'version', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'part_hword', '{simple}'); -insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem_koi8}'); insert into pg_ts_cfgmap values ('default_russian', 'lpart_hword', '{en_stem}'); -insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem_koi8}'); insert into pg_ts_cfgmap values ('default_russian', 'lhword', '{en_stem}'); -insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem_koi8}'); insert into pg_ts_cfgmap values ('default_russian', 'uri', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'file', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'float', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'int', '{simple}'); insert into pg_ts_cfgmap values ('default_russian', 'uint', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'lword', '{en_stem}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'nlword', '{ru_stem_utf8}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'word', '{ru_stem_utf8}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'email', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'url', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'host', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'sfloat', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'version', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'part_hword', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'nlpart_hword', '{ru_stem_utf8}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'lpart_hword', '{en_stem}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'hword', '{ru_stem_utf8}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'lhword', '{en_stem}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'nlhword', '{ru_stem_utf8}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'uri', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'file', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'float', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'int', '{simple}'); +insert into pg_ts_cfgmap values ('utf8_russian', 'uint', '{simple}'); insert into pg_ts_cfgmap values ('simple', 'lword', '{simple}'); insert into pg_ts_cfgmap values ('simple', 'nlword', '{simple}'); insert into pg_ts_cfgmap values ('simple', 'word', '{simple}');