Unaccent dictionary.

2025-01-12 18:34:36 +08:00 · 2009-08-18 10:34:39 +00:00 · 2009-08-18 10:34:39 +00:00 · 92e05bc6a5
commit 92e05bc6a5
parent a88a48011c
12 changed files with 808 additions and 3 deletions
--- a/contrib/Makefile
+++ b/contrib/Makefile
@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.88 2009/08/07 20:50:21 petere Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.89 2009/08/18 10:34:39 teodor Exp $
 subdir = contrib
 top_builddir = ..
@ -39,6 +39,7 @@ SUBDIRS = \
 		tablefunc	\
 		test_parser	\
 		tsearch2	\
 		unaccent	\
 		vacuumlo
 ifeq ($(with_openssl),yes)
--- a/contrib/README
+++ b/contrib/README
@ -169,6 +169,10 @@ tsearch2 -
 	Pavel Stehule <pavel.stehule@gmail.com>, based on code originally by
 	Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov <oleg@sai.msu.su>.
 unaccent -
 	Unaccent dictionary for text search
 	Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov <oleg@sai.msu.su>.
 uuid-ossp -
 	UUID generation functions
 	by Peter Eisentraut <peter_e@gmx.net>
--- a/contrib/unaccent/Makefile
+++ b/contrib/unaccent/Makefile
@ -0,0 +1,24 @@
 # $PostgreSQL: pgsql/contrib/unaccent/Makefile,v 1.1 2009/08/18 10:34:39 teodor Exp $
 MODULE_big = unaccent
 OBJS = unaccent.o
 DATA_built = unaccent.sql
 DATA = uninstall_unaccent.sql
 DATA_TSEARCH = unaccent.rules
 REGRESS = unaccent
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
 else
 subdir = contrib/pg_trgm
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
 #redefine REGRESS_OPTS because of needings of UTF8 database
 REGRESS_OPTS = --dbname=$(CONTRIB_TESTDB) --multibyte=UTF8 --no-locale 
--- a/contrib/unaccent/expected/unaccent.out
+++ b/contrib/unaccent/expected/unaccent.out
@ -0,0 +1,58 @@
 SET client_min_messages = warning;
 \set ECHO none
 RESET client_min_messages;
 SET client_encoding TO 'KOI8';
 SELECT unaccent('foobar');
 unaccent 
 ----------
 foobar
 (1 row)
 SELECT unaccent('Ｌ肆');
 unaccent 
 ----------
 盘肆
 (1 row)
 SELECT unaccent('出殡');
 unaccent 
 ----------
 弼殡
 (1 row)
 SELECT unaccent('unaccent', 'foobar');
 unaccent 
 ----------
 foobar
 (1 row)
 SELECT unaccent('unaccent', 'Ｌ肆');
 unaccent 
 ----------
 盘肆
 (1 row)
 SELECT unaccent('unaccent', '出殡');
 unaccent 
 ----------
 弼殡
 (1 row)
 SELECT ts_lexize('unaccent', 'foobar');
 ts_lexize 
 -----------
 (1 row)
 SELECT ts_lexize('unaccent', 'Ｌ肆');
 ts_lexize 
 -----------
 {盘肆}
 (1 row)
 SELECT ts_lexize('unaccent', '出殡');
 ts_lexize 
 -----------
 {弼殡}
 (1 row)
--- a/contrib/unaccent/sql/unaccent.sql
+++ b/contrib/unaccent/sql/unaccent.sql
@ -0,0 +1,19 @@
 SET client_min_messages = warning;
 \set ECHO none
 \i unaccent.sql
 \set ECHO all
 RESET client_min_messages;
 SET client_encoding TO 'KOI8';
 SELECT unaccent('foobar');
 SELECT unaccent('Ｌ肆');
 SELECT unaccent('出殡');
 SELECT unaccent('unaccent', 'foobar');
 SELECT unaccent('unaccent', 'Ｌ肆');
 SELECT unaccent('unaccent', '出殡');
 SELECT ts_lexize('unaccent', 'foobar');
 SELECT ts_lexize('unaccent', 'Ｌ肆');
 SELECT ts_lexize('unaccent', '出殡');
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@ -0,0 +1,318 @@
 /*-------------------------------------------------------------------------
 *
 * unaccent.c
 *    Text search unaccent dictionary
 *
 * Copyright (c) 2009, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *    $PostgreSQL: pgsql/contrib/unaccent/unaccent.c,v 1.1 2009/08/18 10:34:39 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include "fmgr.h"
 #include "catalog/namespace.h"
 #include "commands/defrem.h"
 #include "mb/pg_wchar.h"
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
 #include "utils/builtins.h"
 PG_MODULE_MAGIC;
 /*
 * Unaccent dictionary uses uncompressed suffix tree to find a 
 * character to replace. Each node of tree is an array of 
 * SuffixChar struct with length = 256 (n-th element of array
 * corresponds to byte)
 */
 typedef struct SuffixChar {
 	struct SuffixChar	*nextChar;
 	char				*replaceTo;
 	int					replacelen;
 } SuffixChar;
 /*
 * placeChar - put str into tree's structure, byte by byte.
 */
 static SuffixChar*
 placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
 {
 	SuffixChar	*curnode;
 	if ( !node )
 	{
 		node = palloc(sizeof(SuffixChar) * 256);
 		memset(node, 0, sizeof(SuffixChar) * 256);
 	}
 	curnode = node + *str;
 	if ( lenstr == 1 )
 	{
 		if ( curnode->replaceTo )
 			elog(WARNING, "duplicate TO argument, use first one");
 		else
 		{
 			curnode->replacelen = replacelen;
 			curnode->replaceTo = palloc( replacelen );
 			memcpy(curnode->replaceTo, replaceTo, replacelen);
 		}
 	}
 	else
 	{
 		curnode->nextChar = placeChar( curnode->nextChar, str+1, lenstr-1, replaceTo, replacelen);
 	}
 	return node;
 }
 /*
 * initSuffixTree  - create suffix tree from file. Function converts
 * UTF8-encoded file into current encoding.
 */
 static SuffixChar*
 initSuffixTree(char *filename) 
 {
 	SuffixChar *rootSuffixTree = NULL;
 	MemoryContext ccxt = CurrentMemoryContext;
 	tsearch_readline_state	trst;
 	bool			skip;
 	filename = get_tsearch_config_filename(filename, "rules");
 	if (!tsearch_readline_begin(&trst, filename))
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 				 errmsg("could not open unaccent file \"%s\": %m",
 						filename)));
 	do	
 	{
 		char	src[4096];
 		char	trg[4096];
 		int		srclen;
 		int		trglen;
 		char   *line = NULL;
 		skip = true;
 		PG_TRY();
 		{
 			/*
 			 * pg_do_encoding_conversion() (called by tsearch_readline())
 			 * will emit exception if it finds untranslatable characters in current locale.
 			 * We just skip such characters.
 			 */
 			while ((line = tsearch_readline(&trst)) != NULL)
 			{
 				if ( sscanf(line, "%s\t%s\n", src, trg)!=2 )
 					continue;
 				srclen = strlen(src);
 				trglen = strlen(trg);
 				rootSuffixTree = placeChar(rootSuffixTree, 
 											(unsigned char*)src, srclen, 
 											trg, trglen);
 				skip = false;
 				pfree(line);
 			}
 		}
 		PG_CATCH();
 		{
 			ErrorData  *errdata;
 			MemoryContext ecxt;
 			ecxt = MemoryContextSwitchTo(ccxt);
 			errdata = CopyErrorData();
 			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
 			{
 				FlushErrorState();
 			}
 			else
 			{
 				MemoryContextSwitchTo(ecxt);
 				PG_RE_THROW();
 			}
 		}
 		PG_END_TRY();
 	}
 	while(skip);
 	tsearch_readline_end(&trst);
 	return rootSuffixTree;
 }
 /*
 * findReplaceTo - find multibyte character in tree
 */
 static SuffixChar * 
 findReplaceTo( SuffixChar *node, unsigned char *src, int srclen )
 {
 	while( node ) 
 	{
 		node = node + *src;
 		if ( srclen == 1 )
 			return node;
 		src++;
 		srclen--;
 		node = node->nextChar;
 	}
 	return NULL;
 }
 PG_FUNCTION_INFO_V1(unaccent_init);
 Datum       unaccent_init(PG_FUNCTION_ARGS);
 Datum
 unaccent_init(PG_FUNCTION_ARGS)
 {
 	List       *dictoptions = (List *) PG_GETARG_POINTER(0);
 	SuffixChar *rootSuffixTree;
 	bool        fileloaded = false;
 	ListCell   *l;
 	foreach(l, dictoptions)
 	{
 		DefElem    *defel = (DefElem *) lfirst(l);
 		if (pg_strcasecmp("Rules", defel->defname) == 0)
 		{
 			if (fileloaded)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("multiple Rules parameters")));
 				rootSuffixTree = initSuffixTree(defGetString(defel));
 				fileloaded = true;
 		}
 		else
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized Unaccent parameter: \"%s\"",
 							defel->defname)));
 		}
 	}
 	if (!fileloaded)
 	{
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("missing Rules parameter")));
 	}
 	PG_RETURN_POINTER(rootSuffixTree);
 }
 PG_FUNCTION_INFO_V1(unaccent_lexize);
 Datum       unaccent_lexize(PG_FUNCTION_ARGS);
 Datum
 unaccent_lexize(PG_FUNCTION_ARGS)
 {
 	SuffixChar *rootSuffixTree = (SuffixChar*)PG_GETARG_POINTER(0);
 	char       *srcchar = (char *) PG_GETARG_POINTER(1);
 	int32		len = PG_GETARG_INT32(2);
 	char	   *srcstart, *trgchar;
 	int			charlen;
 	TSLexeme   *res = NULL;
 	SuffixChar *node;
 	srcstart = srcchar;
 	while( srcchar - srcstart < len )
 	{
 		charlen = pg_mblen(srcchar);
 		node = findReplaceTo( rootSuffixTree, (unsigned char *) srcchar, charlen );
 		if ( node  && node->replaceTo )
 		{
 			if ( !res )
 			{
 				/* allocate res only it it's needed */
 				res = palloc0(sizeof(TSLexeme) * 2);
 				res->lexeme = trgchar = palloc( len * pg_database_encoding_max_length() + 1 /* \0 */ );
 				res->flags = TSL_FILTER;
 				if ( srcchar != srcstart )
 				{
 					memcpy(trgchar, srcstart, srcchar - srcstart);
 					trgchar += (srcchar - srcstart);
 				}
 			}
 			memcpy( trgchar, node->replaceTo, node->replacelen );
 			trgchar += node->replacelen; 
 		}
 		else if ( res )
 		{
 			memcpy( trgchar, srcchar, charlen );
 			trgchar += charlen;
 		}
 		srcchar += charlen;
 	}
 	if ( res )
 		*trgchar = '\0';
 	PG_RETURN_POINTER(res);
 }
 /*
 * Function-like wrapper for dictionary
 */
 PG_FUNCTION_INFO_V1(unaccent_dict);
 Datum       unaccent_dict(PG_FUNCTION_ARGS);
 Datum
 unaccent_dict(PG_FUNCTION_ARGS)
 {
 	text	*str;
 	int		strArg;
 	Oid		dictOid;
 	TSDictionaryCacheEntry	*dict;
 	TSLexeme *res;
 	if (PG_NARGS() == 1)
 	{
 		dictOid = TSDictionaryGetDictid(stringToQualifiedNameList("unaccent"), false);
 		strArg = 0;
 	}
 	else
 	{
 		dictOid = PG_GETARG_OID(0);
 		strArg = 1;
 	}
 	str = PG_GETARG_TEXT_P(strArg);
 	dict = lookup_ts_dictionary_cache(dictOid);
 	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
 													 PointerGetDatum(dict->dictData),
 													 PointerGetDatum(VARDATA(str)),
 													 Int32GetDatum(VARSIZE(str) - VARHDRSZ),
 													 PointerGetDatum(NULL)));
 	PG_FREE_IF_COPY(str, strArg);
 	if ( res == NULL )
 	{
 		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
 	}
 	else if ( res->lexeme == NULL )
 	{
 		pfree(res);
 		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
 	}
 	else
 	{
 		text *txt = cstring_to_text(res->lexeme);
 		pfree(res->lexeme);
 		pfree(res);
 		PG_RETURN_TEXT_P(txt);
 	}
 }
--- a/contrib/unaccent/unaccent.rules
+++ b/contrib/unaccent/unaccent.rules
@ -0,0 +1,187 @@
 À	A
 Á	A
 Â	A
 Ã	A
 Ä	A
 Å	A
 Æ	A
 à	a
 á	a
 â	a
 ã	a
 ä	a
 å	a
 æ	a
 Ā	A
 ā	a
 Ă	A
 ă	a
 Ą	A
 ą	a
 Ç	C
 ç	c
 Ć	C
 ć	c
 Ĉ	C
 ĉ	c
 Ċ	C
 ċ	c
 Č	C
 č	c
 Ď	D
 ď	d
 Đ	D
 đ	d
 È	E
 É	E
 Ê	E
 Ë	E
 è	e
 é	e
 ê	e
 ë	e
 Ē	E
 ē	e
 Ĕ	E
 ĕ	e
 Ė	E
 ė	e
 Ę	E
 ę	e
 Ě	E
 ě	e
 Ĝ	G
 ĝ	g
 Ğ	G
 ğ	g
 Ġ	G
 ġ	g
 Ģ	G
 ģ	g
 Ĥ	H
 ĥ	h
 Ħ	H
 ħ	h
 Ĩ	I
 Ì	I
 Í	I
 Î	I
 Ï	I
 ì	i
 í	i
 î	i
 ï	i
 ĩ	i
 Ī	I
 ī	i
 Ĭ	I
 ĭ	i
 Į	I
 į	i
 İ	I
 ı	i
 Ĳ	I
 ĳ	i
 Ĵ	J
 ĵ	j
 Ķ	K
 ķ	k
 ĸ	k
 Ĺ	L
 ĺ	l
 Ļ	L
 ļ	l
 Ľ	L
 ľ	l
 Ŀ	L
 ŀ	l
 Ł	L
 ł	l
 Ñ	N
 ñ	n
 Ń	N
 ń	n
 Ņ	N
 ņ	n
 Ň	N
 ň	n
 ŉ	n
 Ŋ	N
 ŋ	n
 Ò	O
 Ó	O
 Ô	O
 Õ	O
 Ö	O
 ò	o
 ó	o
 ô	o
 õ	o
 ö	o
 Ō	O
 ō	o
 Ŏ	O
 ŏ	o
 Ő	O
 ő	o
 Œ	E
 œ	e
 Ø	O
 ø	o
 Ŕ	R
 ŕ	r
 Ŗ	R
 ŗ	r
 Ř	R
 ř	r
 ß	S
 Ś	S
 ś	s
 Ŝ	S
 ŝ	s
 Ş	S
 ş	s
 Š	S
 š	s
 Ţ	T
 ţ	t
 Ť	T
 ť	t
 Ŧ	T
 ŧ	t
 Ù	U
 Ú	U
 Û	U
 Ü	U
 ù	u
 ú	u
 û	u
 ü	u
 Ũ	U
 ũ	u
 Ū	U
 ū	u
 Ŭ	U
 ŭ	u
 Ů	U
 ů	u
 Ű	U
 ű	u
 Ų	U
 ų	u
 Ŵ	W
 ŵ	w
 Ý	Y
 ý	y
 ÿ	y
 Ŷ	Y
 ŷ	y
 Ÿ	Y
 Ź	Z
 ź	z
 Ż	Z
 ż	z
 Ž	Z
 ž	z
 ё	е
 Ё	Е
--- a/contrib/unaccent/unaccent.sql.in
+++ b/contrib/unaccent/unaccent.sql.in
@ -0,0 +1,33 @@
 /* $PostgreSQL: pgsql/contrib/unaccent/unaccent.sql.in,v 1.1 2009/08/18 10:34:39 teodor Exp $ */
 CREATE OR REPLACE FUNCTION unaccent(regdictionary, text)
 	RETURNS text
 	AS 'MODULE_PATHNAME', 'unaccent_dict'
 	LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
 CREATE OR REPLACE FUNCTION unaccent(text)
 	RETURNS text
 	AS 'MODULE_PATHNAME', 'unaccent_dict'
 	LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
 CREATE OR REPLACE FUNCTION unaccent_init(internal)
 	RETURNS internal
 	AS 'MODULE_PATHNAME', 'unaccent_init'
 	LANGUAGE C;
 CREATE OR REPLACE FUNCTION unaccent_lexize(internal,internal,internal,internal)
 	RETURNS internal
 	AS 'MODULE_PATHNAME', 'unaccent_lexize'
 	LANGUAGE C;
 CREATE TEXT SEARCH TEMPLATE unaccent (
    INIT = unaccent_init,
 	LEXIZE = unaccent_lexize
 );
 CREATE TEXT SEARCH DICTIONARY unaccent (
 	TEMPLATE = unaccent,
 	RULES    = 'unaccent'
 );
--- a/contrib/unaccent/uninstall_unaccent.sql
+++ b/contrib/unaccent/uninstall_unaccent.sql
@ -0,0 +1,9 @@
 /* $PostgreSQL: pgsql/contrib/unaccent/uninstall_unaccent.sql,v 1.1 2009/08/18 10:34:39 teodor Exp $ */
 DROP FUNCTION IF EXISTS unaccent(regdictionary, text) CASCADE;
 DROP FUNCTION IF EXISTS unaccent(text) CASCADE;
 DROP TEXT SEARCH DICTIONARY IF EXISTS unaccent CASCADE;
 DROP TEXT SEARCH TEMPLATE IF EXISTS unaccent CASCADE;
 DROP FUNCTION IF EXISTS unaccent_init(internal) CASCADE;
 DROP FUNCTION IF EXISTS unaccent_lexize(internal,internal,internal,internal) CASCADE;
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.13 2009/04/27 16:27:35 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.14 2009/08/18 10:34:39 teodor Exp $ -->
 <appendix id="contrib">
 <title>Additional Supplied Modules</title>
@ -113,6 +113,7 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
 &tablefunc;
 &test-parser;
 &tsearch2;
 &unaccent;
 &uuid-ossp;
 &vacuumlo;
 &xml2;
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.63 2009/08/17 22:14:44 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.64 2009/08/18 10:34:39 teodor Exp $ -->
 <!entity history    SYSTEM "history.sgml">
 <!entity info       SYSTEM "info.sgml">
@ -126,6 +126,7 @@
 <!entity tablefunc       SYSTEM "tablefunc.sgml">
 <!entity test-parser     SYSTEM "test-parser.sgml">
 <!entity tsearch2        SYSTEM "tsearch2.sgml">
 <!entity unaccent      SYSTEM "unaccent.sgml">
 <!entity uuid-ossp       SYSTEM "uuid-ossp.sgml">
 <!entity vacuumlo        SYSTEM "vacuumlo.sgml">
 <!entity xml2            SYSTEM "xml2.sgml"> 
--- a/doc/src/sgml/unaccent.sgml
+++ b/doc/src/sgml/unaccent.sgml
@ -0,0 +1,150 @@
 <sect1 id="unaccent">
 <title>unaccent</title>
 <indexterm zone="unaccent">
  <primary>unaccent</primary>
 </indexterm>
 <para>
  <filename>unaccent</> removes accents (diacritic signs) from a lexeme.
  It's a filtering dictionary, that means its output is 
  always passed to the next dictionary (if any), contrary to the standard 
  behaviour. Currently, it supports most important accents from european 
  languages. 
 </para>
 <para>
  Limitation: Current implementation of <filename>unaccent</> 
  dictionary cannot be used as a normalizing dictionary for 
  <filename>thesaurus</filename> dictionary.
 </para>
 <sect2>
  <title>Configuration</title>
  <para>
   A <literal>unaccent</> dictionary accepts the following options:
  </para>
  <itemizedlist>
   <listitem>
    <para>
     <literal>RULES</> is the base name of the file containing the list of
     translation rules.  This file must be stored in
     <filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means
     the <productname>PostgreSQL</> installation's shared-data directory).
     Its name must end in <literal>.rules</> (which is not to be included in
     the <literal>RULES</> parameter).
    </para>
   </listitem>
  </itemizedlist>
  <para>
   The rules file has the following format:
  </para>
  <itemizedlist>
   <listitem>
    <para>
     Each line represents pair: character_with_accent  character_without_accent
    <programlisting>
 &Agrave;	A
 &Aacute; 	A
 &Acirc; 	A
 &Atilde;	A
 &Auml;  	A
 &Aring;		A
 &AElig; 	A
    </programlisting>
    </para>
   </listitem>
  </itemizedlist>
  <para>
   Look at <filename>unaccent.rules</>, which is installed in
   <filename>$SHAREDIR/tsearch_data/</>, for an example.
  </para>
 </sect2>
 <sect2>
  <title>Usage</title>
  <para>
   Running the installation script creates a text search template
   <literal>unaccent</> and a dictionary <literal>unaccent</>
   based on it, with default parameters.  You can alter the
   parameters, for example
 <programlisting>
 =# ALTER TEXT SEARCH DICTIONARY unaccent (RULES='my_rules');
 </programlisting>
   or create new dictionaries based on the template.
  </para>
  <para>
   To test the dictionary, you can try
 <programlisting>
 =# select ts_lexize('unaccent','Hôtel');
 ts_lexize 
 -----------
 {Hotel}
 (1 row)
 </programlisting>
  </para>
  <para>
  Filtering dictionary are useful for correct work of 
  <function>ts_headline</function> function.
 <programlisting>
 =# CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french );
 =# ALTER TEXT SEARCH CONFIGURATION fr
 	ALTER MAPPING FOR hword, hword_part, word
 	WITH unaccent, french_stem;
 =# select to_tsvector('fr','Hôtels de la Mer');
    to_tsvector    
 -------------------
 'hotel':1 'mer':4
 (1 row)
 =# select to_tsvector('fr','Hôtel de la Mer') @@ to_tsquery('fr','Hotels');
 ?column? 
 ----------
 t
 (1 row)
 =# select ts_headline('fr','Hôtel de la Mer',to_tsquery('fr','Hotels'));
      ts_headline       
 ------------------------
  &lt;b&gt;Hôtel&lt;/b&gt;de la Mer
 (1 row)
 </programlisting>
  </para>
 </sect2>
 <sect2>
 <title>Function</title>
 <para>
  <function>unaccent</> function removes accents (diacritic signs) from
  argument string. Basically, it's a wrapper around 
  <filename>unaccent</> dictionary.
 </para>
 <indexterm>
  <primary>unaccent</primary>
 </indexterm>
 <synopsis>
   unaccent(<optional><replaceable class="PARAMETER">dictionary</replaceable>,
   </optional> <replaceable class="PARAMETER">string</replaceable>) 
  returns <type>text</type>
 </synopsis>  
 <para>
 <programlisting>
 SELECT unaccent('unaccent','Hôtel');
 SELECT unaccent('Hôtel');
 </programlisting>
 </para>
 </sect2>
 </sect1>