postgresql/contrib/unaccent/unaccent.c

/*-------------------------------------------------------------------------
 *
 * unaccent.c
 *	  Text search unaccent dictionary
 *
 * Copyright (c) 2009-2012, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  contrib/unaccent/unaccent.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/builtins.h"

PG_MODULE_MAGIC;

/*
 * Unaccent dictionary uses uncompressed suffix tree to find a
 * character to replace. Each node of tree is an array of
 * SuffixChar struct with length = 256 (n-th element of array
 * corresponds to byte)
 */
typedef struct SuffixChar
{
	struct SuffixChar *nextChar;
	char	   *replaceTo;
	int			replacelen;
} SuffixChar;

/*
 * placeChar - put str into tree's structure, byte by byte.
 */
static SuffixChar *
placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
{
	SuffixChar *curnode;

	if (!node)
	{
		node = palloc(sizeof(SuffixChar) * 256);
		memset(node, 0, sizeof(SuffixChar) * 256);
	}

	curnode = node + *str;

	if (lenstr == 1)
	{
		if (curnode->replaceTo)
			elog(WARNING, "duplicate TO argument, use first one");
		else
		{
			curnode->replacelen = replacelen;
			curnode->replaceTo = palloc(replacelen);
			memcpy(curnode->replaceTo, replaceTo, replacelen);
		}
	}
	else
	{
		curnode->nextChar = placeChar(curnode->nextChar, str + 1, lenstr - 1, replaceTo, replacelen);
	}

	return node;
}

/*
 * initSuffixTree  - create suffix tree from file. Function converts
 * UTF8-encoded file into current encoding.
 */
static SuffixChar *
initSuffixTree(char *filename)
{
	SuffixChar *volatile rootSuffixTree = NULL;
	MemoryContext ccxt = CurrentMemoryContext;
	tsearch_readline_state trst;
	volatile bool skip;

	filename = get_tsearch_config_filename(filename, "rules");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open unaccent file \"%s\": %m",
						filename)));

	do
	{
		/*
		 * pg_do_encoding_conversion() (called by tsearch_readline()) will
		 * emit exception if it finds untranslatable characters in current
		 * locale. We just skip such lines, continuing with the next.
		 */
		skip = true;

		PG_TRY();
		{
			char	   *line;

			while ((line = tsearch_readline(&trst)) != NULL)
			{
				/*
				 * The format of each line must be "src trg" where src and trg
				 * are sequences of one or more non-whitespace characters,
				 * separated by whitespace.  Whitespace at start or end of
				 * line is ignored.
				 */
				int			state;
				char	   *ptr;
				char	   *src = NULL;
				char	   *trg = NULL;
				int			ptrlen;
				int			srclen = 0;
				int			trglen = 0;

				state = 0;
				for (ptr = line; *ptr; ptr += ptrlen)
				{
					ptrlen = pg_mblen(ptr);
					/* ignore whitespace, but end src or trg */
					if (t_isspace(ptr))
					{
						if (state == 1)
							state = 2;
						else if (state == 3)
							state = 4;
						continue;
					}
					switch (state)
					{
						case 0:
							/* start of src */
							src = ptr;
							srclen = ptrlen;
							state = 1;
							break;
						case 1:
							/* continue src */
							srclen += ptrlen;
							break;
						case 2:
							/* start of trg */
							trg = ptr;
							trglen = ptrlen;
							state = 3;
							break;
						case 3:
							/* continue trg */
							trglen += ptrlen;
							break;
						default:
							/* bogus line format */
							state = -1;
							break;
					}
				}

				if (state >= 3)
					rootSuffixTree = placeChar(rootSuffixTree,
											   (unsigned char *) src, srclen,
											   trg, trglen);

				pfree(line);
			}
			skip = false;
		}
		PG_CATCH();
		{
			ErrorData  *errdata;
			MemoryContext ecxt;

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();
			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
			{
				FlushErrorState();
			}
			else
			{
				MemoryContextSwitchTo(ecxt);
				PG_RE_THROW();
			}
		}
		PG_END_TRY();
	}
	while (skip);

	tsearch_readline_end(&trst);

	return rootSuffixTree;
}

/*
 * findReplaceTo - find multibyte character in tree
 */
static SuffixChar *
findReplaceTo(SuffixChar *node, unsigned char *src, int srclen)
{
	while (node)
	{
		node = node + *src;
		if (srclen == 1)
			return node;

		src++;
		srclen--;
		node = node->nextChar;
	}

	return NULL;
}

PG_FUNCTION_INFO_V1(unaccent_init);
Datum		unaccent_init(PG_FUNCTION_ARGS);
Datum
unaccent_init(PG_FUNCTION_ARGS)
{
	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
	SuffixChar *rootSuffixTree = NULL;
	bool		fileloaded = false;
	ListCell   *l;

	foreach(l, dictoptions)
	{
		DefElem    *defel = (DefElem *) lfirst(l);

		if (pg_strcasecmp("Rules", defel->defname) == 0)
		{
			if (fileloaded)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
						 errmsg("multiple Rules parameters")));
			rootSuffixTree = initSuffixTree(defGetString(defel));
			fileloaded = true;
		}
		else
		{
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("unrecognized Unaccent parameter: \"%s\"",
							defel->defname)));
		}
	}

	if (!fileloaded)
	{
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("missing Rules parameter")));
	}

	PG_RETURN_POINTER(rootSuffixTree);
}

PG_FUNCTION_INFO_V1(unaccent_lexize);
Datum		unaccent_lexize(PG_FUNCTION_ARGS);
Datum
unaccent_lexize(PG_FUNCTION_ARGS)
{
	SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
	char	   *srcchar = (char *) PG_GETARG_POINTER(1);
	int32		len = PG_GETARG_INT32(2);
	char	   *srcstart,
			   *trgchar = NULL;
	int			charlen;
	TSLexeme   *res = NULL;
	SuffixChar *node;

	srcstart = srcchar;
	while (srcchar - srcstart < len)
	{
		charlen = pg_mblen(srcchar);

		node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
		if (node && node->replaceTo)
		{
			if (!res)
			{
				/* allocate res only it it's needed */
				res = palloc0(sizeof(TSLexeme) * 2);
				res->lexeme = trgchar = palloc(len * pg_database_encoding_max_length() + 1 /* \0 */ );
				res->flags = TSL_FILTER;
				if (srcchar != srcstart)
				{
					memcpy(trgchar, srcstart, srcchar - srcstart);
					trgchar += (srcchar - srcstart);
				}
			}
			memcpy(trgchar, node->replaceTo, node->replacelen);
			trgchar += node->replacelen;
		}
		else if (res)
		{
			memcpy(trgchar, srcchar, charlen);
			trgchar += charlen;
		}

		srcchar += charlen;
	}

	if (res)
		*trgchar = '\0';

	PG_RETURN_POINTER(res);
}

/*
 * Function-like wrapper for dictionary
 */
PG_FUNCTION_INFO_V1(unaccent_dict);
Datum		unaccent_dict(PG_FUNCTION_ARGS);
Datum
unaccent_dict(PG_FUNCTION_ARGS)
{
	text	   *str;
	int			strArg;
	Oid			dictOid;
	TSDictionaryCacheEntry *dict;
	TSLexeme   *res;

	if (PG_NARGS() == 1)
	{
		dictOid = get_ts_dict_oid(stringToQualifiedNameList("unaccent"), false);
		strArg = 0;
	}
	else
	{
		dictOid = PG_GETARG_OID(0);
		strArg = 1;
	}
	str = PG_GETARG_TEXT_P(strArg);

	dict = lookup_ts_dictionary_cache(dictOid);

	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
											 PointerGetDatum(dict->dictData),
											   PointerGetDatum(VARDATA(str)),
									  Int32GetDatum(VARSIZE(str) - VARHDRSZ),
													 PointerGetDatum(NULL)));

	PG_FREE_IF_COPY(str, strArg);

	if (res == NULL)
	{
		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
	}
	else if (res->lexeme == NULL)
	{
		pfree(res);
		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
	}
	else
	{
		text	   *txt = cstring_to_text(res->lexeme);

		pfree(res->lexeme);
		pfree(res);

		PG_RETURN_TEXT_P(txt);
	}
}
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`/*-------------------------------------------------------------------------`
			`*`
			`* unaccent.c`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`* Text search unaccent dictionary`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`*`
Update copyright notices for year 2012. 2012-01-02 07:01:58 +08:00			`* Copyright (c) 2009-2012, PostgreSQL Global Development Group`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`*`
			`* IDENTIFICATION`
Remove cvs keywords from all files. 2010-09-21 04:08:53 +08:00			`* contrib/unaccent/unaccent.c`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`*`
			`*-------------------------------------------------------------------------`
			`*/`

			`#include "postgres.h"`

			`#include "catalog/namespace.h"`
			`#include "commands/defrem.h"`
			`#include "tsearch/ts_cache.h"`
			`#include "tsearch/ts_locale.h"`
			`#include "tsearch/ts_public.h"`
			`#include "utils/builtins.h"`

			`PG_MODULE_MAGIC;`

			`/*`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`* Unaccent dictionary uses uncompressed suffix tree to find a`
			`* character to replace. Each node of tree is an array of`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`* SuffixChar struct with length = 256 (n-th element of array`
			`* corresponds to byte)`
			`*/`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`typedef struct SuffixChar`
			`{`
			`struct SuffixChar *nextChar;`
			`char *replaceTo;`
			`int replacelen;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`} SuffixChar;`

			`/*`
			`* placeChar - put str into tree's structure, byte by byte.`
			`*/`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`static SuffixChar *`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`placeChar(SuffixChar node, unsigned char str, int lenstr, char *replaceTo, int replacelen)`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`SuffixChar *curnode;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (!node)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`node = palloc(sizeof(SuffixChar) * 256);`
			`memset(node, 0, sizeof(SuffixChar) * 256);`
			`}`

			`curnode = node + *str;`

pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (lenstr == 1)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (curnode->replaceTo)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`elog(WARNING, "duplicate TO argument, use first one");`
			`else`
			`{`
			`curnode->replacelen = replacelen;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`curnode->replaceTo = palloc(replacelen);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`memcpy(curnode->replaceTo, replaceTo, replacelen);`
			`}`
			`}`
			`else`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`curnode->nextChar = placeChar(curnode->nextChar, str + 1, lenstr - 1, replaceTo, replacelen);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`}`

			`return node;`
			`}`

			`/*`
			`* initSuffixTree - create suffix tree from file. Function converts`
			`* UTF8-encoded file into current encoding.`
			`*/`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`static SuffixChar *`
			`initSuffixTree(char *filename)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`SuffixChar *volatile rootSuffixTree = NULL;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`MemoryContext ccxt = CurrentMemoryContext;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`tsearch_readline_state trst;`
			`volatile bool skip;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
			`filename = get_tsearch_config_filename(filename, "rules");`
			`if (!tsearch_readline_begin(&trst, filename))`
			`ereport(ERROR,`
			`(errcode(ERRCODE_CONFIG_FILE_ERROR),`
			`errmsg("could not open unaccent file \"%s\": %m",`
			`filename)));`

pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`do`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
Fix assorted bugs in contrib/unaccent's configuration file parsing. Make it use t_isspace() to identify whitespace, rather than relying on sscanf which is known to get it wrong on some platform/locale combinations. Get rid of fixed-size buffers. Make it actually continue to parse the file after ignoring a line with untranslatable characters, as was obviously intended. The first of these issues is per gripe from J Smith, though not exactly either of his proposed patches. 2011-11-08 00:48:53 +08:00			`/*`
			`* pg_do_encoding_conversion() (called by tsearch_readline()) will`
			`* emit exception if it finds untranslatable characters in current`
			`* locale. We just skip such lines, continuing with the next.`
			`*/`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`skip = true;`

			`PG_TRY();`
			`{`
Fix assorted bugs in contrib/unaccent's configuration file parsing. Make it use t_isspace() to identify whitespace, rather than relying on sscanf which is known to get it wrong on some platform/locale combinations. Get rid of fixed-size buffers. Make it actually continue to parse the file after ignoring a line with untranslatable characters, as was obviously intended. The first of these issues is per gripe from J Smith, though not exactly either of his proposed patches. 2011-11-08 00:48:53 +08:00			`char *line;`

Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`while ((line = tsearch_readline(&trst)) != NULL)`
			`{`
Fix assorted bugs in contrib/unaccent's configuration file parsing. Make it use t_isspace() to identify whitespace, rather than relying on sscanf which is known to get it wrong on some platform/locale combinations. Get rid of fixed-size buffers. Make it actually continue to parse the file after ignoring a line with untranslatable characters, as was obviously intended. The first of these issues is per gripe from J Smith, though not exactly either of his proposed patches. 2011-11-08 00:48:53 +08:00			`/*`
			`* The format of each line must be "src trg" where src and trg`
			`* are sequences of one or more non-whitespace characters,`
			`* separated by whitespace. Whitespace at start or end of`
			`* line is ignored.`
			`*/`
			`int state;`
			`char *ptr;`
			`char *src = NULL;`
			`char *trg = NULL;`
			`int ptrlen;`
			`int srclen = 0;`
			`int trglen = 0;`

			`state = 0;`
			`for (ptr = line; *ptr; ptr += ptrlen)`
			`{`
			`ptrlen = pg_mblen(ptr);`
			`/* ignore whitespace, but end src or trg */`
			`if (t_isspace(ptr))`
			`{`
			`if (state == 1)`
			`state = 2;`
			`else if (state == 3)`
			`state = 4;`
			`continue;`
			`}`
			`switch (state)`
			`{`
			`case 0:`
			`/* start of src */`
			`src = ptr;`
			`srclen = ptrlen;`
			`state = 1;`
			`break;`
			`case 1:`
			`/* continue src */`
			`srclen += ptrlen;`
			`break;`
			`case 2:`
			`/* start of trg */`
			`trg = ptr;`
			`trglen = ptrlen;`
			`state = 3;`
			`break;`
			`case 3:`
			`/* continue trg */`
			`trglen += ptrlen;`
			`break;`
			`default:`
			`/* bogus line format */`
			`state = -1;`
			`break;`
			`}`
			`}`

			`if (state >= 3)`
			`rootSuffixTree = placeChar(rootSuffixTree,`
			`(unsigned char *) src, srclen,`
			`trg, trglen);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
			`pfree(line);`
			`}`
Fix assorted bugs in contrib/unaccent's configuration file parsing. Make it use t_isspace() to identify whitespace, rather than relying on sscanf which is known to get it wrong on some platform/locale combinations. Get rid of fixed-size buffers. Make it actually continue to parse the file after ignoring a line with untranslatable characters, as was obviously intended. The first of these issues is per gripe from J Smith, though not exactly either of his proposed patches. 2011-11-08 00:48:53 +08:00			`skip = false;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`}`
			`PG_CATCH();`
			`{`
			`ErrorData *errdata;`
			`MemoryContext ecxt;`

			`ecxt = MemoryContextSwitchTo(ccxt);`
			`errdata = CopyErrorData();`
			`if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)`
			`{`
			`FlushErrorState();`
			`}`
			`else`
			`{`
			`MemoryContextSwitchTo(ecxt);`
			`PG_RE_THROW();`
			`}`
			`}`
			`PG_END_TRY();`
			`}`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`while (skip);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
			`tsearch_readline_end(&trst);`

			`return rootSuffixTree;`
			`}`

			`/*`
			`* findReplaceTo - find multibyte character in tree`
			`*/`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`static SuffixChar *`
			`findReplaceTo(SuffixChar node, unsigned char src, int srclen)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`while (node)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`node = node + *src;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (srclen == 1)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`return node;`

			`src++;`
			`srclen--;`
			`node = node->nextChar;`
			`}`

			`return NULL;`
			`}`

			`PG_FUNCTION_INFO_V1(unaccent_init);`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`Datum unaccent_init(PG_FUNCTION_ARGS);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`Datum`
			`unaccent_init(PG_FUNCTION_ARGS)`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`List dictoptions = (List ) PG_GETARG_POINTER(0);`
Suppress compiler warnings about uninitialized variables. 2009-08-18 23:37:04 +08:00			`SuffixChar *rootSuffixTree = NULL;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`bool fileloaded = false;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`ListCell *l;`

			`foreach(l, dictoptions)`
			`{`
			`DefElem defel = (DefElem ) lfirst(l);`

			`if (pg_strcasecmp("Rules", defel->defname) == 0)`
			`{`
			`if (fileloaded)`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
			`errmsg("multiple Rules parameters")));`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`rootSuffixTree = initSuffixTree(defGetString(defel));`
			`fileloaded = true;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`}`
			`else`
			`{`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
			`errmsg("unrecognized Unaccent parameter: \"%s\"",`
			`defel->defname)));`
			`}`
			`}`

			`if (!fileloaded)`
			`{`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
			`errmsg("missing Rules parameter")));`
			`}`

			`PG_RETURN_POINTER(rootSuffixTree);`
			`}`

			`PG_FUNCTION_INFO_V1(unaccent_lexize);`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`Datum unaccent_lexize(PG_FUNCTION_ARGS);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`Datum`
			`unaccent_lexize(PG_FUNCTION_ARGS)`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`SuffixChar rootSuffixTree = (SuffixChar ) PG_GETARG_POINTER(0);`
			`char srcchar = (char ) PG_GETARG_POINTER(1);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`int32 len = PG_GETARG_INT32(2);`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`char *srcstart,`
			`*trgchar = NULL;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`int charlen;`
			`TSLexeme *res = NULL;`
			`SuffixChar *node;`

			`srcstart = srcchar;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`while (srcchar - srcstart < len)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`charlen = pg_mblen(srcchar);`

pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);`
			`if (node && node->replaceTo)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (!res)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`/* allocate res only it it's needed */`
			`res = palloc0(sizeof(TSLexeme) * 2);`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`res->lexeme = trgchar = palloc(len * pg_database_encoding_max_length() + 1 /* \0 */ );`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`res->flags = TSL_FILTER;`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (srcchar != srcstart)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`memcpy(trgchar, srcstart, srcchar - srcstart);`
			`trgchar += (srcchar - srcstart);`
			`}`
			`}`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`memcpy(trgchar, node->replaceTo, node->replacelen);`
			`trgchar += node->replacelen;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`}`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`else if (res)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`memcpy(trgchar, srcchar, charlen);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`trgchar += charlen;`
			`}`

			`srcchar += charlen;`
			`}`

pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (res)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`*trgchar = '\0';`

			`PG_RETURN_POINTER(res);`
			`}`

			`/*`
			`* Function-like wrapper for dictionary`
			`*/`
			`PG_FUNCTION_INFO_V1(unaccent_dict);`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`Datum unaccent_dict(PG_FUNCTION_ARGS);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`Datum`
			`unaccent_dict(PG_FUNCTION_ARGS)`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`text *str;`
			`int strArg;`
			`Oid dictOid;`
			`TSDictionaryCacheEntry *dict;`
			`TSLexeme *res;`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
			`if (PG_NARGS() == 1)`
			`{`
Standardize get_whatever_oid functions for other object types. - Rename TSParserGetPrsid to get_ts_parser_oid. - Rename TSDictionaryGetDictid to get_ts_dict_oid. - Rename TSTemplateGetTmplid to get_ts_template_oid. - Rename TSConfigGetCfgid to get_ts_config_oid. - Rename FindConversionByName to get_conversion_oid. - Rename GetConstraintName to get_constraint_oid. - Add new functions get_opclass_oid, get_opfamily_oid, get_rewrite_oid, get_rewrite_oid_without_relid, get_trigger_oid, and get_cast_oid. The name of each function matches the corresponding catalog. Thanks to KaiGai Kohei for the review. 2010-08-05 23:25:36 +08:00			`dictOid = get_ts_dict_oid(stringToQualifiedNameList("unaccent"), false);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`strArg = 0;`
			`}`
			`else`
			`{`
			`dictOid = PG_GETARG_OID(0);`
			`strArg = 1;`
			`}`
			`str = PG_GETARG_TEXT_P(strArg);`

			`dict = lookup_ts_dictionary_cache(dictOid);`

			`res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`PointerGetDatum(dict->dictData),`
			`PointerGetDatum(VARDATA(str)),`
			`Int32GetDatum(VARSIZE(str) - VARHDRSZ),`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`PointerGetDatum(NULL)));`

			`PG_FREE_IF_COPY(str, strArg);`

pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`if (res == NULL)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));`
			`}`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`else if (res->lexeme == NULL)`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00			`{`
			`pfree(res);`
			`PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));`
			`}`
			`else`
			`{`
pgindent run for 9.0 2010-02-26 10:01:40 +08:00			`text *txt = cstring_to_text(res->lexeme);`
Unaccent dictionary. 2009-08-18 18:34:39 +08:00
			`pfree(res->lexeme);`
			`pfree(res);`

			`PG_RETURN_TEXT_P(txt);`
			`}`
			`}`