2004-07-01 11:25:48 +08:00
|
|
|
/*
|
|
|
|
* This is a port of the Double Metaphone algorithm for use in PostgreSQL.
|
2004-08-29 13:07:03 +08:00
|
|
|
*
|
2010-09-21 04:08:53 +08:00
|
|
|
* contrib/fuzzystrmatch/dmetaphone.c
|
2006-03-11 12:38:42 +08:00
|
|
|
*
|
2004-07-01 11:25:48 +08:00
|
|
|
* Double Metaphone computes 2 "sounds like" strings - a primary and an
|
|
|
|
* alternate. In most cases they are the same, but for foreign names
|
|
|
|
* especially they can be a bit different, depending on pronunciation.
|
|
|
|
*
|
|
|
|
* Information on using Double Metaphone can be found at
|
2005-10-01 06:38:44 +08:00
|
|
|
* http://www.codeproject.com/string/dmetaphone1.asp
|
2004-07-01 11:25:48 +08:00
|
|
|
* and the original article describing it can be found at
|
2013-09-11 09:34:01 +08:00
|
|
|
* http://drdobbs.com/184401251
|
2004-07-01 11:25:48 +08:00
|
|
|
*
|
2005-10-01 06:38:44 +08:00
|
|
|
* For PostgreSQL we provide 2 functions - one for the primary and one for
|
2004-07-01 11:25:48 +08:00
|
|
|
* the alternate. That way the functions are pure text->text mappings that
|
|
|
|
* are useful in functional indexes. These are 'dmetaphone' for the
|
|
|
|
* primary and 'dmetaphone_alt' for the alternate.
|
|
|
|
*
|
2004-08-29 13:07:03 +08:00
|
|
|
* Assuming that dmetaphone.so is in $libdir, the SQL to set up the
|
2004-07-01 11:25:48 +08:00
|
|
|
* functions looks like this:
|
|
|
|
*
|
2004-08-29 13:07:03 +08:00
|
|
|
* CREATE FUNCTION dmetaphone (text) RETURNS text
|
|
|
|
* LANGUAGE C IMMUTABLE STRICT
|
|
|
|
* AS '$libdir/dmetaphone', 'dmetaphone';
|
2004-07-01 11:25:48 +08:00
|
|
|
*
|
2004-08-29 13:07:03 +08:00
|
|
|
* CREATE FUNCTION dmetaphone_alt (text) RETURNS text
|
|
|
|
* LANGUAGE C IMMUTABLE STRICT
|
|
|
|
* AS '$libdir/dmetaphone', 'dmetaphone_alt';
|
2004-07-01 11:25:48 +08:00
|
|
|
*
|
|
|
|
* Note that you have to declare the functions IMMUTABLE if you want to
|
|
|
|
* use them in functional indexes, and you have to declare them as STRICT
|
2004-08-29 13:07:03 +08:00
|
|
|
* as they do not check for NULL input, and will segfault if given NULL input.
|
|
|
|
* (See below for alternative ) Declaring them as STRICT means PostgreSQL
|
|
|
|
* will never call them with NULL, but instead assume the result is NULL,
|
2004-07-01 11:25:48 +08:00
|
|
|
* which is what we (I) want.
|
|
|
|
*
|
|
|
|
* Alternatively, compile with -DDMETAPHONE_NOSTRICT and the functions
|
|
|
|
* will detect NULL input and return NULL. The you don't have to declare them
|
|
|
|
* as STRICT.
|
|
|
|
*
|
|
|
|
* There is a small inefficiency here - each function call actually computes
|
|
|
|
* both the primary and the alternate and then throws away the one it doesn't
|
|
|
|
* need. That's the way the perl module was written, because perl can handle
|
|
|
|
* a list return more easily than we can in PostgreSQL. The result has been
|
|
|
|
* fast enough for my needs, but it could maybe be optimized a bit to remove
|
2004-08-29 13:07:03 +08:00
|
|
|
* that behaviour.
|
2004-07-01 11:25:48 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/***************************** COPYRIGHT NOTICES ***********************
|
|
|
|
|
|
|
|
Most of this code is directly from the Text::DoubleMetaphone perl module
|
2004-08-29 13:07:03 +08:00
|
|
|
version 0.05 available from http://www.cpan.org.
|
2004-07-01 11:25:48 +08:00
|
|
|
It bears this copyright notice:
|
|
|
|
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
|
2004-07-01 11:25:48 +08:00
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
This code is based heavily on the C++ implementation by
|
|
|
|
Lawrence Philips and incorporates several bug fixes courtesy
|
|
|
|
of Kevin Atkinson <kevina@users.sourceforge.net>.
|
|
|
|
|
|
|
|
This module is free software; you may redistribute it and/or
|
|
|
|
modify it under the same terms as Perl itself.
|
|
|
|
|
|
|
|
The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
|
|
|
|
<andrew@dunslane.net> and is covered this copyright:
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
Copyright 2003, North Carolina State Highway Patrol.
|
2004-07-01 11:25:48 +08:00
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Permission to use, copy, modify, and distribute this software and its
|
|
|
|
documentation for any purpose, without fee, and without a written agreement
|
|
|
|
is hereby granted, provided that the above copyright notice and this
|
|
|
|
paragraph and the following two paragraphs appear in all copies.
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
|
|
|
|
PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
|
2004-07-01 11:25:48 +08:00
|
|
|
INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
|
2004-08-29 13:07:03 +08:00
|
|
|
DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
|
2004-07-01 11:25:48 +08:00
|
|
|
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
|
|
|
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
2004-07-01 11:25:48 +08:00
|
|
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED
|
|
|
|
HEREUNDER IS ON AN "AS IS" BASIS, AND THE NORTH CAROLINA STATE HIGHWAY PATROL
|
|
|
|
HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
|
|
|
|
MODIFICATIONS.
|
|
|
|
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* include these first, according to the docs */
|
|
|
|
#ifndef DMETAPHONE_MAIN
|
|
|
|
|
|
|
|
#include "postgres.h"
|
2006-07-14 13:28:29 +08:00
|
|
|
|
2008-03-26 06:42:46 +08:00
|
|
|
#include "utils/builtins.h"
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
/* turn off assertions for embedded function */
|
|
|
|
#define NDEBUG
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
/* prototype for the main function we got from the perl module */
|
2008-03-26 06:42:46 +08:00
|
|
|
static void DoubleMetaphone(char *, char **);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
#ifndef DMETAPHONE_MAIN
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The PostgreSQL visible dmetaphone function.
|
|
|
|
*/
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(dmetaphone);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dmetaphone(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2008-03-26 06:42:46 +08:00
|
|
|
text *arg;
|
2004-08-29 13:07:03 +08:00
|
|
|
char *aptr,
|
|
|
|
*codes[2],
|
2008-03-26 06:42:46 +08:00
|
|
|
*code;
|
2004-08-21 03:48:14 +08:00
|
|
|
|
2004-07-01 11:25:48 +08:00
|
|
|
#ifdef DMETAPHONE_NOSTRICT
|
|
|
|
if (PG_ARGISNULL(0))
|
2008-03-26 06:42:46 +08:00
|
|
|
PG_RETURN_NULL();
|
2004-07-01 11:25:48 +08:00
|
|
|
#endif
|
2004-08-21 03:48:14 +08:00
|
|
|
arg = PG_GETARG_TEXT_P(0);
|
2008-03-26 06:42:46 +08:00
|
|
|
aptr = text_to_cstring(arg);
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
DoubleMetaphone(aptr, codes);
|
2004-08-21 03:48:14 +08:00
|
|
|
code = codes[0];
|
2004-07-01 11:25:48 +08:00
|
|
|
if (!code)
|
|
|
|
code = "";
|
2008-03-26 06:42:46 +08:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text(code));
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The PostgreSQL visible dmetaphone_alt function.
|
|
|
|
*/
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(dmetaphone_alt);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dmetaphone_alt(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2008-03-26 06:42:46 +08:00
|
|
|
text *arg;
|
2004-08-29 13:07:03 +08:00
|
|
|
char *aptr,
|
|
|
|
*codes[2],
|
2008-03-26 06:42:46 +08:00
|
|
|
*code;
|
2004-08-21 03:48:14 +08:00
|
|
|
|
2004-07-01 11:25:48 +08:00
|
|
|
#ifdef DMETAPHONE_NOSTRICT
|
|
|
|
if (PG_ARGISNULL(0))
|
2008-03-26 06:42:46 +08:00
|
|
|
PG_RETURN_NULL();
|
2004-07-01 11:25:48 +08:00
|
|
|
#endif
|
2004-08-21 03:48:14 +08:00
|
|
|
arg = PG_GETARG_TEXT_P(0);
|
2008-03-26 06:42:46 +08:00
|
|
|
aptr = text_to_cstring(arg);
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
DoubleMetaphone(aptr, codes);
|
2004-08-21 03:48:14 +08:00
|
|
|
code = codes[1];
|
2004-07-01 11:25:48 +08:00
|
|
|
if (!code)
|
|
|
|
code = "";
|
2008-03-26 06:42:46 +08:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text(code));
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* here is where we start the code imported from the perl module */
|
|
|
|
|
|
|
|
/* all memory handling is done with these macros */
|
|
|
|
|
|
|
|
#define META_MALLOC(v,n,t) \
|
2004-08-29 13:07:03 +08:00
|
|
|
(v = (t*)palloc(((n)*sizeof(t))))
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
#define META_REALLOC(v,n,t) \
|
2004-08-29 13:07:03 +08:00
|
|
|
(v = (t*)repalloc((v),((n)*sizeof(t))))
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/*
|
2004-07-01 11:25:48 +08:00
|
|
|
* Don't do pfree - it seems to cause a segv sometimes - which might have just
|
|
|
|
* been caused by reloading the module in development.
|
|
|
|
* So we rely on context cleanup - Tom Lane says pfree shouldn't be necessary
|
|
|
|
* in a case like this.
|
|
|
|
*/
|
|
|
|
|
2015-05-24 09:35:49 +08:00
|
|
|
#define META_FREE(x) ((void)true) /* pfree((x)) */
|
2004-08-29 13:07:03 +08:00
|
|
|
#else /* not defined DMETAPHONE_MAIN */
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
/* use the standard malloc library when not running in PostgreSQL */
|
|
|
|
|
|
|
|
#define META_MALLOC(v,n,t) \
|
2004-08-29 13:07:03 +08:00
|
|
|
(v = (t*)malloc(((n)*sizeof(t))))
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
#define META_REALLOC(v,n,t) \
|
2004-08-29 13:07:03 +08:00
|
|
|
(v = (t*)realloc((v),((n)*sizeof(t))))
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
#define META_FREE(x) free((x))
|
2004-08-29 13:07:03 +08:00
|
|
|
#endif /* defined DMETAPHONE_MAIN */
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
2005-10-01 06:38:44 +08:00
|
|
|
/* this typedef was originally in the perl module's .h file */
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
char *str;
|
|
|
|
int length;
|
|
|
|
int bufsize;
|
|
|
|
int free_string_on_destroy;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
2009-06-11 22:49:15 +08:00
|
|
|
metastring;
|
2004-08-29 13:07:03 +08:00
|
|
|
|
|
|
|
/*
|
2004-07-01 11:25:48 +08:00
|
|
|
* remaining perl module funcs unchanged except for declaring them static
|
|
|
|
* and reformatting to PostgreSQL indentation and to fit in 80 cols.
|
|
|
|
*
|
2004-08-29 13:07:03 +08:00
|
|
|
*/
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
static metastring *
|
|
|
|
NewMetaString(char *init_str)
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
metastring *s;
|
|
|
|
char empty_string[] = "";
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
META_MALLOC(s, 1, metastring);
|
|
|
|
assert(s != NULL);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (init_str == NULL)
|
2004-07-01 11:25:48 +08:00
|
|
|
init_str = empty_string;
|
2004-08-29 13:07:03 +08:00
|
|
|
s->length = strlen(init_str);
|
|
|
|
/* preallocate a bit more for potential growth */
|
|
|
|
s->bufsize = s->length + 7;
|
|
|
|
|
|
|
|
META_MALLOC(s->str, s->bufsize, char);
|
|
|
|
assert(s->str != NULL);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
Replace a bunch more uses of strncpy() with safer coding.
strncpy() has a well-deserved reputation for being unsafe, so make an
effort to get rid of nearly all occurrences in HEAD.
A large fraction of the remaining uses were passing length less than or
equal to the known strlen() of the source, in which case no null-padding
can occur and the behavior is equivalent to memcpy(), though doubtless
slower and certainly harder to reason about. So just use memcpy() in
these cases.
In other cases, use either StrNCpy() or strlcpy() as appropriate (depending
on whether padding to the full length of the destination buffer seems
useful).
I left a few strncpy() calls alone in the src/timezone/ code, to keep it
in sync with upstream (the IANA tzcode distribution). There are also a
few such calls in ecpg that could possibly do with more analysis.
AFAICT, none of these changes are more than cosmetic, except for the four
occurrences in fe-secure-openssl.c, which are in fact buggy: an overlength
source leads to a non-null-terminated destination buffer and ensuing
misbehavior. These don't seem like security issues, first because no stack
clobber is possible and second because if your values of sslcert etc are
coming from untrusted sources then you've got problems way worse than this.
Still, it's undesirable to have unpredictable behavior for overlength
inputs, so back-patch those four changes to all active branches.
2015-01-25 02:05:42 +08:00
|
|
|
memcpy(s->str, init_str, s->length + 1);
|
2004-08-29 13:07:03 +08:00
|
|
|
s->free_string_on_destroy = 1;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
return s;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-11 22:49:15 +08:00
|
|
|
DestroyMetaString(metastring *s)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
if (s == NULL)
|
2004-07-01 11:25:48 +08:00
|
|
|
return;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (s->free_string_on_destroy && (s->str != NULL))
|
2004-07-01 11:25:48 +08:00
|
|
|
META_FREE(s->str);
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
META_FREE(s);
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-11 22:49:15 +08:00
|
|
|
IncreaseBuffer(metastring *s, int chars_needed)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
|
|
|
|
assert(s->str != NULL);
|
|
|
|
s->bufsize = s->bufsize + chars_needed + 10;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-11 22:49:15 +08:00
|
|
|
MakeUpper(metastring *s)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
char *i;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
for (i = s->str; *i; i++)
|
2006-09-23 05:39:58 +08:00
|
|
|
*i = toupper((unsigned char) *i);
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
2009-06-11 22:49:15 +08:00
|
|
|
IsVowel(metastring *s, int pos)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
char c;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if ((pos < 0) || (pos >= s->length))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 0;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
c = *(s->str + pos);
|
|
|
|
if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
|
|
|
|
(c == 'U') || (c == 'Y'))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
return 0;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
2009-06-11 22:49:15 +08:00
|
|
|
SlavoGermanic(metastring *s)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
if ((char *) strstr(s->str, "W"))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
2004-08-29 13:07:03 +08:00
|
|
|
else if ((char *) strstr(s->str, "K"))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
2004-08-29 13:07:03 +08:00
|
|
|
else if ((char *) strstr(s->str, "CZ"))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
2004-08-29 13:07:03 +08:00
|
|
|
else if ((char *) strstr(s->str, "WITZ"))
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
2004-08-29 13:07:03 +08:00
|
|
|
else
|
2004-07-01 11:25:48 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static char
|
2009-06-11 22:49:15 +08:00
|
|
|
GetAt(metastring *s, int pos)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
if ((pos < 0) || (pos >= s->length))
|
2004-07-01 11:25:48 +08:00
|
|
|
return '\0';
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
return ((char) *(s->str + pos));
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-11 22:49:15 +08:00
|
|
|
SetAt(metastring *s, int pos, char c)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
if ((pos < 0) || (pos >= s->length))
|
2004-07-01 11:25:48 +08:00
|
|
|
return;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
*(s->str + pos) = c;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/*
|
2004-07-01 11:25:48 +08:00
|
|
|
Caveats: the START value is 0 based
|
|
|
|
*/
|
|
|
|
static int
|
2009-06-11 22:49:15 +08:00
|
|
|
StringAt(metastring *s, int start, int length,...)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
char *test;
|
|
|
|
char *pos;
|
|
|
|
va_list ap;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if ((start < 0) || (start >= s->length))
|
|
|
|
return 0;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
pos = (s->str + start);
|
|
|
|
va_start(ap, length);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
do
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
test = va_arg(ap, char *);
|
|
|
|
if (*test && (strncmp(pos, test, length) == 0))
|
2015-01-04 22:35:46 +08:00
|
|
|
{
|
|
|
|
va_end(ap);
|
2004-07-01 11:25:48 +08:00
|
|
|
return 1;
|
2015-01-04 22:35:46 +08:00
|
|
|
}
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
2011-12-28 03:19:09 +08:00
|
|
|
while (strcmp(test, "") != 0);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
va_end(ap);
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
return 0;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-11 22:49:15 +08:00
|
|
|
MetaphAdd(metastring *s, char *new_str)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
int add_length;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (new_str == NULL)
|
2004-07-01 11:25:48 +08:00
|
|
|
return;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
add_length = strlen(new_str);
|
|
|
|
if ((s->length + add_length) > (s->bufsize - 1))
|
2004-07-01 11:25:48 +08:00
|
|
|
IncreaseBuffer(s, add_length);
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
strcat(s->str, new_str);
|
|
|
|
s->length += add_length;
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
DoubleMetaphone(char *str, char **codes)
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
int length;
|
|
|
|
metastring *original;
|
|
|
|
metastring *primary;
|
|
|
|
metastring *secondary;
|
|
|
|
int current;
|
|
|
|
int last;
|
|
|
|
|
|
|
|
current = 0;
|
|
|
|
/* we need the real length and last prior to padding */
|
|
|
|
length = strlen(str);
|
|
|
|
last = length - 1;
|
|
|
|
original = NewMetaString(str);
|
|
|
|
/* Pad original so we can index beyond end */
|
|
|
|
MetaphAdd(original, " ");
|
|
|
|
|
|
|
|
primary = NewMetaString("");
|
|
|
|
secondary = NewMetaString("");
|
|
|
|
primary->free_string_on_destroy = 0;
|
|
|
|
secondary->free_string_on_destroy = 0;
|
|
|
|
|
|
|
|
MakeUpper(original);
|
|
|
|
|
|
|
|
/* skip these when at start of word */
|
|
|
|
if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
current += 1;
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
|
|
|
|
if (GetAt(original, 0) == 'X')
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
current += 1;
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/* main loop */
|
|
|
|
while ((primary->length < 4) || (secondary->length < 4))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
if (current >= length)
|
|
|
|
break;
|
|
|
|
|
|
|
|
switch (GetAt(original, current))
|
2004-08-29 13:07:03 +08:00
|
|
|
{
|
2004-07-01 11:25:48 +08:00
|
|
|
case 'A':
|
|
|
|
case 'E':
|
|
|
|
case 'I':
|
|
|
|
case 'O':
|
|
|
|
case 'U':
|
|
|
|
case 'Y':
|
|
|
|
if (current == 0)
|
|
|
|
{
|
|
|
|
/* all init vowels now map to 'A' */
|
|
|
|
MetaphAdd(primary, "A");
|
|
|
|
MetaphAdd(secondary, "A");
|
|
|
|
}
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'B':
|
|
|
|
|
|
|
|
/* "-mb", e.g", "dumb", already skipped over... */
|
|
|
|
MetaphAdd(primary, "P");
|
|
|
|
MetaphAdd(secondary, "P");
|
|
|
|
|
|
|
|
if (GetAt(original, current + 1) == 'B')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
2010-07-07 03:19:02 +08:00
|
|
|
case '\xc7': /* C with cedilla */
|
2004-07-01 11:25:48 +08:00
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'C':
|
|
|
|
/* various germanic */
|
|
|
|
if ((current > 1)
|
|
|
|
&& !IsVowel(original, current - 2)
|
|
|
|
&& StringAt(original, (current - 1), 3, "ACH", "")
|
|
|
|
&& ((GetAt(original, current + 2) != 'I')
|
|
|
|
&& ((GetAt(original, current + 2) != 'E')
|
2005-10-15 10:49:52 +08:00
|
|
|
|| StringAt(original, (current - 2), 6, "BACHER",
|
|
|
|
"MACHER", ""))))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* special case 'caesar' */
|
|
|
|
if ((current == 0)
|
|
|
|
&& StringAt(original, current, 6, "CAESAR", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* italian 'chianti' */
|
|
|
|
if (StringAt(original, current, 4, "CHIA", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "CH", ""))
|
|
|
|
{
|
|
|
|
/* find 'michael' */
|
|
|
|
if ((current > 0)
|
|
|
|
&& StringAt(original, current, 4, "CHAE", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* greek roots e.g. 'chemistry', 'chorus' */
|
|
|
|
if ((current == 0)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& (StringAt(original, (current + 1), 5,
|
2004-07-01 11:25:48 +08:00
|
|
|
"HARAC", "HARIS", "")
|
|
|
|
|| StringAt(original, (current + 1), 3, "HOR",
|
|
|
|
"HYM", "HIA", "HEM", ""))
|
|
|
|
&& !StringAt(original, 0, 5, "CHORE", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* germanic, greek, or otherwise 'ch' for 'kh' sound */
|
|
|
|
if (
|
|
|
|
(StringAt(original, 0, 4, "VAN ", "VON ", "")
|
|
|
|
|| StringAt(original, 0, 3, "SCH", ""))
|
2004-08-29 13:07:03 +08:00
|
|
|
/* 'architect but not 'arch', 'orchestra', 'orchid' */
|
2004-07-01 11:25:48 +08:00
|
|
|
|| StringAt(original, (current - 2), 6, "ORCHES",
|
|
|
|
"ARCHIT", "ORCHID", "")
|
|
|
|
|| StringAt(original, (current + 2), 1, "T", "S",
|
|
|
|
"")
|
2004-08-29 13:07:03 +08:00
|
|
|
|| ((StringAt(original, (current - 1), 1,
|
|
|
|
"A", "O", "U", "E", "")
|
2004-07-01 11:25:48 +08:00
|
|
|
|| (current == 0))
|
2004-08-29 13:07:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* e.g., 'wachtler', 'wechsler', but not 'tichner'
|
|
|
|
*/
|
2005-10-15 10:49:52 +08:00
|
|
|
&& StringAt(original, (current + 2), 1, "L", "R",
|
|
|
|
"N", "M", "B", "H", "F", "V", "W",
|
|
|
|
" ", "")))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (current > 0)
|
|
|
|
{
|
|
|
|
if (StringAt(original, 0, 2, "MC", ""))
|
|
|
|
{
|
|
|
|
/* e.g., "McHugh" */
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* e.g, 'czerny' */
|
|
|
|
if (StringAt(original, current, 2, "CZ", "")
|
|
|
|
&& !StringAt(original, (current - 2), 4, "WICZ", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* e.g., 'focaccia' */
|
|
|
|
if (StringAt(original, (current + 1), 3, "CIA", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* double 'C', but not if e.g. 'McClellan' */
|
|
|
|
if (StringAt(original, current, 2, "CC", "")
|
|
|
|
&& !((current == 1) && (GetAt(original, 0) == 'M')))
|
|
|
|
{
|
|
|
|
/* 'bellocchio' but not 'bacchus' */
|
|
|
|
if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
|
2005-10-15 10:49:52 +08:00
|
|
|
&& !StringAt(original, (current + 2), 2, "HU", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
/* 'accident', 'accede' 'succeed' */
|
|
|
|
if (
|
|
|
|
((current == 1)
|
|
|
|
&& (GetAt(original, current - 1) == 'A'))
|
2005-10-15 10:49:52 +08:00
|
|
|
|| StringAt(original, (current - 1), 5, "UCCEE",
|
|
|
|
"UCCES", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "KS");
|
|
|
|
MetaphAdd(secondary, "KS");
|
|
|
|
/* 'bacci', 'bertucci', other italian */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
2004-08-29 13:07:03 +08:00
|
|
|
{ /* Pierce's rule */
|
2004-07-01 11:25:48 +08:00
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
|
|
|
|
{
|
|
|
|
/* italian vs. english */
|
|
|
|
if (StringAt
|
|
|
|
(original, current, 3, "CIO", "CIE", "CIA", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* else */
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
|
|
|
|
/* name sent in 'mac caffrey', 'mac gregor */
|
|
|
|
if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
|
|
|
|
current += 3;
|
2004-08-29 13:07:03 +08:00
|
|
|
else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
|
|
|
|
&& !StringAt(original, (current + 1), 2,
|
|
|
|
"CE", "CI", ""))
|
|
|
|
current += 2;
|
2004-07-01 11:25:48 +08:00
|
|
|
else
|
2004-08-29 13:07:03 +08:00
|
|
|
current += 1;
|
2004-07-01 11:25:48 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'D':
|
|
|
|
if (StringAt(original, current, 2, "DG", ""))
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
if (StringAt(original, (current + 2), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"I", "E", "Y", ""))
|
|
|
|
{
|
|
|
|
/* e.g. 'edge' */
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* e.g. 'edgar' */
|
|
|
|
MetaphAdd(primary, "TK");
|
|
|
|
MetaphAdd(secondary, "TK");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "DT", "DD", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "T");
|
|
|
|
MetaphAdd(secondary, "T");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* else */
|
|
|
|
MetaphAdd(primary, "T");
|
|
|
|
MetaphAdd(secondary, "T");
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'F':
|
|
|
|
if (GetAt(original, current + 1) == 'F')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "F");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'G':
|
|
|
|
if (GetAt(original, current + 1) == 'H')
|
|
|
|
{
|
|
|
|
if ((current > 0) && !IsVowel(original, current - 1))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (current < 3)
|
|
|
|
{
|
|
|
|
/* 'ghislane', ghiradelli */
|
|
|
|
if (current == 0)
|
|
|
|
{
|
|
|
|
if (GetAt(original, current + 2) == 'I')
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2004-08-29 13:07:03 +08:00
|
|
|
|
|
|
|
/*
|
2005-10-15 10:49:52 +08:00
|
|
|
* Parker's rule (with some further refinements) - e.g.,
|
|
|
|
* 'hugh'
|
2004-08-29 13:07:03 +08:00
|
|
|
*/
|
2004-07-01 11:25:48 +08:00
|
|
|
if (
|
|
|
|
((current > 1)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& StringAt(original, (current - 2), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"B", "H", "D", ""))
|
2004-08-29 13:07:03 +08:00
|
|
|
/* e.g., 'bough' */
|
2004-07-01 11:25:48 +08:00
|
|
|
|| ((current > 2)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& StringAt(original, (current - 3), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"B", "H", "D", ""))
|
2004-08-29 13:07:03 +08:00
|
|
|
/* e.g., 'broughton' */
|
2004-07-01 11:25:48 +08:00
|
|
|
|| ((current > 3)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& StringAt(original, (current - 4), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"B", "H", "")))
|
|
|
|
{
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
/*
|
|
|
|
* e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
|
|
|
|
* 'rough', 'tough'
|
|
|
|
*/
|
2004-07-01 11:25:48 +08:00
|
|
|
if ((current > 2)
|
|
|
|
&& (GetAt(original, current - 1) == 'U')
|
|
|
|
&& StringAt(original, (current - 3), 1, "C",
|
|
|
|
"G", "L", "R", "T", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "F");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
}
|
|
|
|
else if ((current > 0)
|
|
|
|
&& GetAt(original, current - 1) != 'I')
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (GetAt(original, current + 1) == 'N')
|
|
|
|
{
|
|
|
|
if ((current == 1) && IsVowel(original, 0)
|
|
|
|
&& !SlavoGermanic(original))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "KN");
|
|
|
|
MetaphAdd(secondary, "N");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
/* not e.g. 'cagney' */
|
|
|
|
if (!StringAt(original, (current + 2), 2, "EY", "")
|
|
|
|
&& (GetAt(original, current + 1) != 'Y')
|
|
|
|
&& !SlavoGermanic(original))
|
2004-08-29 13:07:03 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "N");
|
|
|
|
MetaphAdd(secondary, "KN");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "KN");
|
|
|
|
MetaphAdd(secondary, "KN");
|
|
|
|
}
|
2004-07-01 11:25:48 +08:00
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 'tagliaro' */
|
|
|
|
if (StringAt(original, (current + 1), 2, "LI", "")
|
|
|
|
&& !SlavoGermanic(original))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "KL");
|
|
|
|
MetaphAdd(secondary, "L");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* -ges-,-gep-,-gel-, -gie- at beginning */
|
|
|
|
if ((current == 0)
|
|
|
|
&& ((GetAt(original, current + 1) == 'Y')
|
2005-10-15 10:49:52 +08:00
|
|
|
|| StringAt(original, (current + 1), 2, "ES", "EP",
|
|
|
|
"EB", "EL", "EY", "IB", "IL", "IN", "IE",
|
|
|
|
"EI", "ER", "")))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/* -ger-, -gy- */
|
2004-07-01 11:25:48 +08:00
|
|
|
if (
|
|
|
|
(StringAt(original, (current + 1), 2, "ER", "")
|
|
|
|
|| (GetAt(original, current + 1) == 'Y'))
|
2004-08-29 13:07:03 +08:00
|
|
|
&& !StringAt(original, 0, 6,
|
2004-07-01 11:25:48 +08:00
|
|
|
"DANGER", "RANGER", "MANGER", "")
|
|
|
|
&& !StringAt(original, (current - 1), 1, "E", "I", "")
|
|
|
|
&& !StringAt(original, (current - 1), 3, "RGY", "OGY",
|
|
|
|
""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/* italian e.g, 'biaggi' */
|
2004-07-01 11:25:48 +08:00
|
|
|
if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
|
2004-08-29 13:07:03 +08:00
|
|
|
|| StringAt(original, (current - 1), 4,
|
2004-07-01 11:25:48 +08:00
|
|
|
"AGGI", "OGGI", ""))
|
|
|
|
{
|
|
|
|
/* obvious germanic */
|
|
|
|
if (
|
|
|
|
(StringAt(original, 0, 4, "VAN ", "VON ", "")
|
|
|
|
|| StringAt(original, 0, 3, "SCH", ""))
|
|
|
|
|| StringAt(original, (current + 1), 2, "ET", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* always soft if french ending */
|
|
|
|
if (StringAt
|
|
|
|
(original, (current + 1), 4, "IER ", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (GetAt(original, current + 1) == 'G')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'H':
|
|
|
|
/* only keep if first & before vowel or btw. 2 vowels */
|
|
|
|
if (((current == 0) || IsVowel(original, current - 1))
|
|
|
|
&& IsVowel(original, current + 1))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "H");
|
|
|
|
MetaphAdd(secondary, "H");
|
|
|
|
current += 2;
|
|
|
|
}
|
2004-08-29 13:07:03 +08:00
|
|
|
else
|
2004-10-07 23:21:58 +08:00
|
|
|
/* also takes care of 'HH' */
|
2004-07-01 11:25:48 +08:00
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'J':
|
|
|
|
/* obvious spanish, 'jose', 'san jacinto' */
|
|
|
|
if (StringAt(original, current, 4, "JOSE", "")
|
|
|
|
|| StringAt(original, 0, 4, "SAN ", ""))
|
|
|
|
{
|
|
|
|
if (((current == 0)
|
|
|
|
&& (GetAt(original, current + 4) == ' '))
|
|
|
|
|| StringAt(original, 0, 4, "SAN ", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "H");
|
|
|
|
MetaphAdd(secondary, "H");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "H");
|
|
|
|
}
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((current == 0)
|
|
|
|
&& !StringAt(original, current, 4, "JOSE", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
|
|
|
|
MetaphAdd(secondary, "A");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* spanish pron. of e.g. 'bajador' */
|
|
|
|
if (IsVowel(original, current - 1)
|
|
|
|
&& !SlavoGermanic(original)
|
|
|
|
&& ((GetAt(original, current + 1) == 'A')
|
|
|
|
|| (GetAt(original, current + 1) == 'O')))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "H");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (current == last)
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (!StringAt(original, (current + 1), 1, "L", "T",
|
2005-10-15 10:49:52 +08:00
|
|
|
"K", "S", "N", "M", "B", "Z", "")
|
2004-07-01 11:25:48 +08:00
|
|
|
&& !StringAt(original, (current - 1), 1,
|
2004-08-29 13:07:03 +08:00
|
|
|
"S", "K", "L", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (GetAt(original, current + 1) == 'J') /* it could happen! */
|
2004-07-01 11:25:48 +08:00
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'K':
|
|
|
|
if (GetAt(original, current + 1) == 'K')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'L':
|
|
|
|
if (GetAt(original, current + 1) == 'L')
|
|
|
|
{
|
|
|
|
/* spanish e.g. 'cabrillo', 'gallegos' */
|
|
|
|
if (((current == (length - 3))
|
|
|
|
&& StringAt(original, (current - 1), 4, "ILLO",
|
|
|
|
"ILLA", "ALLE", ""))
|
2005-10-15 10:49:52 +08:00
|
|
|
|| ((StringAt(original, (last - 1), 2, "AS", "OS", "")
|
|
|
|
|| StringAt(original, last, 1, "A", "O", ""))
|
|
|
|
&& StringAt(original, (current - 1), 4,
|
|
|
|
"ALLE", "")))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "L");
|
|
|
|
MetaphAdd(secondary, "");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "L");
|
|
|
|
MetaphAdd(secondary, "L");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'M':
|
|
|
|
if ((StringAt(original, (current - 1), 3, "UMB", "")
|
|
|
|
&& (((current + 1) == last)
|
2005-10-15 10:49:52 +08:00
|
|
|
|| StringAt(original, (current + 2), 2, "ER", "")))
|
2004-08-29 13:07:03 +08:00
|
|
|
/* 'dumb','thumb' */
|
2004-07-01 11:25:48 +08:00
|
|
|
|| (GetAt(original, current + 1) == 'M'))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "M");
|
|
|
|
MetaphAdd(secondary, "M");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'N':
|
|
|
|
if (GetAt(original, current + 1) == 'N')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "N");
|
|
|
|
MetaphAdd(secondary, "N");
|
|
|
|
break;
|
|
|
|
|
2010-07-07 03:19:02 +08:00
|
|
|
case '\xd1': /* N with tilde */
|
2004-07-01 11:25:48 +08:00
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "N");
|
|
|
|
MetaphAdd(secondary, "N");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'P':
|
|
|
|
if (GetAt(original, current + 1) == 'H')
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "F");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* also account for "campbell", "raspberry" */
|
|
|
|
if (StringAt(original, (current + 1), 1, "P", "B", ""))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "P");
|
|
|
|
MetaphAdd(secondary, "P");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'Q':
|
|
|
|
if (GetAt(original, current + 1) == 'Q')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "K");
|
|
|
|
MetaphAdd(secondary, "K");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'R':
|
|
|
|
/* french e.g. 'rogier', but exclude 'hochmeier' */
|
|
|
|
if ((current == last)
|
|
|
|
&& !SlavoGermanic(original)
|
|
|
|
&& StringAt(original, (current - 2), 2, "IE", "")
|
2005-10-15 10:49:52 +08:00
|
|
|
&& !StringAt(original, (current - 4), 2, "ME", "MA", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "");
|
|
|
|
MetaphAdd(secondary, "R");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "R");
|
|
|
|
MetaphAdd(secondary, "R");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (GetAt(original, current + 1) == 'R')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'S':
|
|
|
|
/* special cases 'island', 'isle', 'carlisle', 'carlysle' */
|
|
|
|
if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
|
|
|
|
{
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* special case 'sugar-' */
|
|
|
|
if ((current == 0)
|
|
|
|
&& StringAt(original, current, 5, "SUGAR", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "SH", ""))
|
|
|
|
{
|
|
|
|
/* germanic */
|
|
|
|
if (StringAt
|
2005-10-15 10:49:52 +08:00
|
|
|
(original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
|
|
|
|
"HOLZ", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* italian & armenian */
|
|
|
|
if (StringAt(original, current, 3, "SIO", "SIA", "")
|
|
|
|
|| StringAt(original, current, 4, "SIAN", ""))
|
|
|
|
{
|
|
|
|
if (!SlavoGermanic(original))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
/*
|
|
|
|
* german & anglicisations, e.g. 'smith' match 'schmidt',
|
2005-10-15 10:49:52 +08:00
|
|
|
* 'snider' match 'schneider' also, -sz- in slavic language
|
|
|
|
* although in hungarian it is pronounced 's'
|
2004-08-29 13:07:03 +08:00
|
|
|
*/
|
2004-07-01 11:25:48 +08:00
|
|
|
if (((current == 0)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& StringAt(original, (current + 1), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"M", "N", "L", "W", ""))
|
|
|
|
|| StringAt(original, (current + 1), 1, "Z", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
if (StringAt(original, (current + 1), 1, "Z", ""))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "SC", ""))
|
|
|
|
{
|
|
|
|
/* Schlesinger's rule */
|
|
|
|
if (GetAt(original, current + 2) == 'H')
|
|
|
|
{
|
|
|
|
/* dutch origin, e.g. 'school', 'schooner' */
|
2004-08-29 13:07:03 +08:00
|
|
|
if (StringAt(original, (current + 3), 2,
|
2004-07-01 11:25:48 +08:00
|
|
|
"OO", "ER", "EN",
|
|
|
|
"UY", "ED", "EM", ""))
|
|
|
|
{
|
|
|
|
/* 'schermerhorn', 'schenker' */
|
2004-08-29 13:07:03 +08:00
|
|
|
if (StringAt(original, (current + 3), 2,
|
2004-07-01 11:25:48 +08:00
|
|
|
"ER", "EN", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "SK");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "SK");
|
|
|
|
MetaphAdd(secondary, "SK");
|
|
|
|
}
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ((current == 0) && !IsVowel(original, 3)
|
|
|
|
&& (GetAt(original, 3) != 'W'))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
}
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (StringAt(original, (current + 2), 1,
|
2004-07-01 11:25:48 +08:00
|
|
|
"I", "E", "Y", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* else */
|
|
|
|
MetaphAdd(primary, "SK");
|
|
|
|
MetaphAdd(secondary, "SK");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* french e.g. 'resnais', 'artois' */
|
|
|
|
if ((current == last)
|
2005-10-15 10:49:52 +08:00
|
|
|
&& StringAt(original, (current - 2), 2, "AI", "OI", ""))
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
|
|
|
MetaphAdd(primary, "");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, (current + 1), 1, "S", "Z", ""))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'T':
|
|
|
|
if (StringAt(original, current, 4, "TION", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 3, "TIA", "TCH", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "X");
|
|
|
|
MetaphAdd(secondary, "X");
|
|
|
|
current += 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, current, 2, "TH", "")
|
|
|
|
|| StringAt(original, current, 3, "TTH", ""))
|
|
|
|
{
|
|
|
|
/* special case 'thomas', 'thames' or germanic */
|
|
|
|
if (StringAt(original, (current + 2), 2, "OM", "AM", "")
|
|
|
|
|| StringAt(original, 0, 4, "VAN ", "VON ", "")
|
|
|
|
|| StringAt(original, 0, 3, "SCH", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "T");
|
|
|
|
MetaphAdd(secondary, "T");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "0");
|
|
|
|
MetaphAdd(secondary, "T");
|
|
|
|
}
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StringAt(original, (current + 1), 1, "T", "D", ""))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "T");
|
|
|
|
MetaphAdd(secondary, "T");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'V':
|
|
|
|
if (GetAt(original, current + 1) == 'V')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
MetaphAdd(primary, "F");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'W':
|
|
|
|
/* can also be in middle of word */
|
|
|
|
if (StringAt(original, current, 2, "WR", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "R");
|
|
|
|
MetaphAdd(secondary, "R");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((current == 0)
|
|
|
|
&& (IsVowel(original, current + 1)
|
|
|
|
|| StringAt(original, current, 2, "WH", "")))
|
|
|
|
{
|
|
|
|
/* Wasserman should match Vasserman */
|
|
|
|
if (IsVowel(original, current + 1))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "A");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* need Uomo to match Womo */
|
|
|
|
MetaphAdd(primary, "A");
|
|
|
|
MetaphAdd(secondary, "A");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Arnow should match Arnoff */
|
|
|
|
if (((current == last) && IsVowel(original, current - 1))
|
2005-10-15 10:49:52 +08:00
|
|
|
|| StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
|
|
|
|
"OWSKI", "OWSKY", "")
|
2004-07-01 11:25:48 +08:00
|
|
|
|| StringAt(original, 0, 3, "SCH", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "");
|
|
|
|
MetaphAdd(secondary, "F");
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* polish e.g. 'filipowicz' */
|
|
|
|
if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "TS");
|
|
|
|
MetaphAdd(secondary, "FX");
|
|
|
|
current += 4;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* else skip it */
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'X':
|
|
|
|
/* french e.g. breaux */
|
|
|
|
if (!((current == last)
|
2004-08-29 13:07:03 +08:00
|
|
|
&& (StringAt(original, (current - 3), 3,
|
2004-07-01 11:25:48 +08:00
|
|
|
"IAU", "EAU", "")
|
2004-08-29 13:07:03 +08:00
|
|
|
|| StringAt(original, (current - 2), 2,
|
2004-07-01 11:25:48 +08:00
|
|
|
"AU", "OU", ""))))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "KS");
|
|
|
|
MetaphAdd(secondary, "KS");
|
|
|
|
}
|
2004-08-29 13:07:03 +08:00
|
|
|
|
2004-07-01 11:25:48 +08:00
|
|
|
|
|
|
|
if (StringAt(original, (current + 1), 1, "C", "X", ""))
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'Z':
|
|
|
|
/* chinese pinyin e.g. 'zhao' */
|
|
|
|
if (GetAt(original, current + 1) == 'H')
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "J");
|
|
|
|
MetaphAdd(secondary, "J");
|
|
|
|
current += 2;
|
|
|
|
break;
|
|
|
|
}
|
2004-08-29 13:07:03 +08:00
|
|
|
else if (StringAt(original, (current + 1), 2,
|
2004-07-01 11:25:48 +08:00
|
|
|
"ZO", "ZI", "ZA", "")
|
|
|
|
|| (SlavoGermanic(original)
|
|
|
|
&& ((current > 0)
|
|
|
|
&& GetAt(original, current - 1) != 'T')))
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "TS");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MetaphAdd(primary, "S");
|
|
|
|
MetaphAdd(secondary, "S");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (GetAt(original, current + 1) == 'Z')
|
|
|
|
current += 2;
|
|
|
|
else
|
|
|
|
current += 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
current += 1;
|
2004-08-29 13:07:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-10-15 10:49:52 +08:00
|
|
|
* printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
|
|
|
|
* secondary->str);
|
2004-08-29 13:07:03 +08:00
|
|
|
*/
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (primary->length > 4)
|
2004-07-01 11:25:48 +08:00
|
|
|
SetAt(primary, 4, '\0');
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
if (secondary->length > 4)
|
2004-07-01 11:25:48 +08:00
|
|
|
SetAt(secondary, 4, '\0');
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
*codes = primary->str;
|
|
|
|
*++codes = secondary->str;
|
2004-07-01 11:25:48 +08:00
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
DestroyMetaString(original);
|
|
|
|
DestroyMetaString(primary);
|
|
|
|
DestroyMetaString(secondary);
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DMETAPHONE_MAIN
|
|
|
|
|
|
|
|
/* just for testing - not part of the perl code */
|
|
|
|
|
2004-08-29 13:07:03 +08:00
|
|
|
main(int argc, char **argv)
|
2004-07-01 11:25:48 +08:00
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
char *codes[2];
|
|
|
|
|
2004-07-01 11:25:48 +08:00
|
|
|
if (argc > 1)
|
|
|
|
{
|
2004-08-29 13:07:03 +08:00
|
|
|
DoubleMetaphone(argv[1], codes);
|
|
|
|
printf("%s|%s\n", codes[0], codes[1]);
|
|
|
|
}
|
2004-07-01 11:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|