mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-30 19:00:29 +08:00
Sorry - I should have gotten to this sooner. Here's a patch which you should
be able to apply against what you just committed. It rolls soundex into fuzzystrmatch. Remove soundex/metaphone and merge into fuzzystrmatch. Joe Conway
This commit is contained in:
parent
fb5b85a8f2
commit
cdd02cdf00
@ -57,7 +57,7 @@ fulltextindex -
|
||||
|
||||
fuzzystrmatch -
|
||||
Levenshtein and Metaphone fuzzy string matching
|
||||
by Joe Conway <joseph.conway@home.com>
|
||||
by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
|
||||
|
||||
intarray -
|
||||
Index support for arrays of int4, using GiST
|
||||
@ -86,7 +86,6 @@ mac -
|
||||
|
||||
metaphone -
|
||||
Improved Soundex function
|
||||
by Joel Burton <jburton@scw.org>
|
||||
|
||||
miscutil -
|
||||
PostgreSQL assert checking and various utility functions
|
||||
@ -149,9 +148,6 @@ seg -
|
||||
Confidence-interval datatype (GiST indexing example)
|
||||
by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
|
||||
|
||||
soundex -
|
||||
Soundex function
|
||||
|
||||
spi -
|
||||
Various trigger functions, examples for using SPI.
|
||||
|
||||
|
@ -20,6 +20,11 @@
|
||||
* Metaphone was originally created by Lawrence Philips and presented in article
|
||||
* in "Computer Language" December 1990 issue.
|
||||
*
|
||||
* soundex()
|
||||
* -----------
|
||||
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
|
||||
* to soundex() for consistency.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and its
|
||||
* documentation for any purpose, without fee, and without a written agreement
|
||||
* is hereby granted, provided that the above copyright notice and this
|
||||
@ -40,12 +45,15 @@
|
||||
*/
|
||||
|
||||
|
||||
Version 0.1 (3 August, 2001):
|
||||
Version 0.2 (7 August, 2001):
|
||||
Functions to calculate the degree to which two strings match in a "fuzzy" way
|
||||
Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel
|
||||
|
||||
Release Notes:
|
||||
|
||||
Version 0.2
|
||||
- folded soundex contrib into this one
|
||||
|
||||
Version 0.1
|
||||
- initial release
|
||||
|
||||
|
62
contrib/fuzzystrmatch/README.soundex
Normal file
62
contrib/fuzzystrmatch/README.soundex
Normal file
@ -0,0 +1,62 @@
|
||||
NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy
|
||||
after combining soundex code into the fuzzystrmatch contrib
|
||||
---------------------------------------------------------------------
|
||||
The Soundex system is a method of matching similar sounding names
|
||||
(or any words) to the same code. It was initially used by the
|
||||
United States Census in 1880, 1900, and 1910, but it has little use
|
||||
beyond English names (or the English pronunciation of names), and
|
||||
it is not a linguistic tool.
|
||||
|
||||
The following are some usage examples:
|
||||
|
||||
SELECT soundex('hello world!');
|
||||
|
||||
CREATE TABLE s (nm text)\g
|
||||
|
||||
insert into s values ('john')\g
|
||||
insert into s values ('joan')\g
|
||||
insert into s values ('wobbly')\g
|
||||
|
||||
select * from s
|
||||
where soundex(nm) = soundex('john')\g
|
||||
|
||||
select nm from s a, s b
|
||||
where soundex(a.nm) = soundex(b.nm)
|
||||
and a.oid <> b.oid\g
|
||||
|
||||
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
|
||||
'select soundex($1) = soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
|
||||
'select soundex($1) < soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
|
||||
'select soundex($1) > soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
|
||||
'select soundex($1) <= soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
|
||||
'select soundex($1) >= soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
|
||||
'select soundex($1) <> soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
DROP OPERATOR #= (text,text)\g
|
||||
|
||||
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
|
||||
commutator=text_sx_eq)\g
|
||||
|
||||
SELECT *
|
||||
FROM s
|
||||
WHERE text_sx_eq(nm,'john')\g
|
||||
|
||||
SELECT *
|
||||
from s
|
||||
where s.nm #= 'john';
|
@ -629,3 +629,71 @@ int _metaphone (
|
||||
|
||||
return(META_SUCCESS);
|
||||
} /* END metaphone */
|
||||
|
||||
|
||||
/*
|
||||
* SQL function: soundex(text) returns text
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(soundex);
|
||||
|
||||
Datum
|
||||
soundex(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char outstr[SOUNDEX_LEN + 1];
|
||||
char *arg;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
|
||||
_soundex(arg, outstr);
|
||||
|
||||
PG_RETURN_TEXT_P(_textin(outstr));
|
||||
}
|
||||
|
||||
static void
|
||||
_soundex(const char *instr, char *outstr)
|
||||
{
|
||||
int count;
|
||||
|
||||
AssertArg(instr);
|
||||
AssertArg(outstr);
|
||||
|
||||
outstr[SOUNDEX_LEN] = '\0';
|
||||
|
||||
/* Skip leading non-alphabetic characters */
|
||||
while (!isalpha((unsigned char) instr[0]) && instr[0])
|
||||
++instr;
|
||||
|
||||
/* No string left */
|
||||
if (!instr[0])
|
||||
{
|
||||
outstr[0] = (char) 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Take the first letter as is */
|
||||
*outstr++ = (char) toupper((unsigned char) *instr++);
|
||||
|
||||
count = 1;
|
||||
while (*instr && count < SOUNDEX_LEN)
|
||||
{
|
||||
if (isalpha((unsigned char) *instr) &&
|
||||
soundex_code(*instr) != soundex_code(*(instr - 1)))
|
||||
{
|
||||
*outstr = soundex_code(instr[0]);
|
||||
if (*outstr != '0')
|
||||
{
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
++instr;
|
||||
}
|
||||
|
||||
/* Fill with 0's */
|
||||
while (count < SOUNDEX_LEN)
|
||||
{
|
||||
*outstr = '0';
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
@ -51,32 +51,43 @@
|
||||
#include "utils/builtins.h"
|
||||
|
||||
|
||||
#define MAX_LEVENSHTEIN_STRLEN 255
|
||||
#define MAX_METAPHONE_STRLEN 255
|
||||
|
||||
typedef struct dynmatrix
|
||||
{
|
||||
int value;
|
||||
} dynmat;
|
||||
|
||||
|
||||
/*
|
||||
* External declarations
|
||||
*/
|
||||
extern Datum levenshtein(PG_FUNCTION_ARGS);
|
||||
extern Datum metaphone(PG_FUNCTION_ARGS);
|
||||
extern Datum soundex(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* Internal declarations
|
||||
* Soundex
|
||||
*/
|
||||
static void _soundex(const char *instr, char *outstr);
|
||||
|
||||
#define SOUNDEX_LEN 4
|
||||
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
|
||||
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
|
||||
|
||||
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||
static const char *soundex_table = "01230120022455012623010202";
|
||||
|
||||
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Levenshtein
|
||||
*/
|
||||
#define STRLEN(p) strlen(p)
|
||||
#define CHAREQ(p1, p2) (*(p1) == *(p2))
|
||||
#define NextChar(p) ((p)++)
|
||||
#define MAX_LEVENSHTEIN_STRLEN 255
|
||||
|
||||
|
||||
/*
|
||||
* Metaphone
|
||||
*/
|
||||
#define MAX_METAPHONE_STRLEN 255
|
||||
|
||||
/*
|
||||
* Original code by Michael G Schwern starts here.
|
||||
* Code slightly modified for use as PostgreSQL
|
||||
|
@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int
|
||||
|
||||
CREATE FUNCTION metaphone (text,int) RETURNS text
|
||||
AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict);
|
||||
|
||||
CREATE FUNCTION soundex(text) RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict);
|
||||
|
||||
CREATE FUNCTION text_soundex(text) RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c';
|
||||
|
@ -1,40 +0,0 @@
|
||||
#
|
||||
# $Header: /cvsroot/pgsql/contrib/metaphone/Attic/Makefile,v 1.2 2001/06/20 00:04:44 momjian Exp $
|
||||
#
|
||||
|
||||
subdir = contrib/metaphone
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
NAME := metaphone
|
||||
SONAME := $(NAME)$(DLSUFFIX)
|
||||
|
||||
override CPPFLAGS += -I$(srcdir)
|
||||
override CFLAGS += $(CFLAGS_SL)
|
||||
override DLLLIBS := $(BE_DLLLIBS) $(DLLLIBS)
|
||||
|
||||
all: $(SONAME) $(NAME).sql
|
||||
|
||||
$(NAME).sql: $(NAME).sql.in
|
||||
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
|
||||
|
||||
install: all installdirs
|
||||
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
|
||||
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
|
||||
$(INSTALL_DATA) README.$(NAME) $(docdir)/contrib
|
||||
|
||||
installdirs:
|
||||
$(mkinstalldirs) $(libdir)/contrib $(datadir)/contrib $(docdir)/contrib
|
||||
|
||||
uninstall:
|
||||
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
|
||||
|
||||
clean distclean maintainer-clean:
|
||||
rm -f $(SONAME) $(NAME).o $(NAME).sql
|
||||
|
||||
depend dep:
|
||||
$(CC) -MM -MG $(CFLAGS) *.c > depend
|
||||
|
||||
ifeq (depend,$(wildcard depend))
|
||||
include depend
|
||||
endif
|
@ -1,79 +0,0 @@
|
||||
This directory contains a module that implements the "Metaphone" code as
|
||||
a PostgreSQL user-defined function. The Metaphone system is a method of
|
||||
matching similar sounding names (or any words) to the same code.
|
||||
|
||||
Metaphone was invented by Lawrence Philips as an improvement to the popular
|
||||
name-hashing routine, Soundex.
|
||||
|
||||
This metaphone code is from Michael Kuhn, and is detailed at
|
||||
http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt
|
||||
|
||||
Code for this (including this help file!) was liberally borrowed from
|
||||
the soundex() module for PostgreSQL.
|
||||
|
||||
There are two functions:
|
||||
metaphone(text) : returns hash of a name
|
||||
metaphone(text,int) : returns hash (maximum length of int) of name
|
||||
|
||||
---
|
||||
|
||||
To install it, first configure the main source tree, then run make;
|
||||
make install in this directory. Finally, load the function definition
|
||||
with psql:
|
||||
|
||||
psql -f PREFIX/share/contrib/metaphone.sql
|
||||
|
||||
The following are some usage examples:
|
||||
|
||||
SELECT text_metaphone('hello world!');
|
||||
SELECT text_metaphone('hello world!', 4);
|
||||
|
||||
CREATE TABLE s (nm text)\g
|
||||
|
||||
insert into s values ('john')\g
|
||||
insert into s values ('joan')\g
|
||||
insert into s values ('wobbly')\g
|
||||
|
||||
select * from s
|
||||
where text_metaphone(nm) = text_metaphone('john')\g
|
||||
|
||||
select nm from s a, s b
|
||||
where text_metaphone(a.nm) = text_metaphone(b.nm)
|
||||
and a.oid <> b.oid\g
|
||||
|
||||
CREATE FUNCTION text_mp_eq(text, text) RETURNS bool AS
|
||||
'select text_metaphone($1) = text_metaphone($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_mp_lt(text,text) RETURNS bool AS
|
||||
'select text_metaphone($1) < text_metaphone($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_mp_gt(text,text) RETURNS bool AS
|
||||
'select text_metaphone($1) > text_metaphone($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_mp_le(text,text) RETURNS bool AS
|
||||
'select text_metaphone($1) <= text_metaphone($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_mp_ge(text,text) RETURNS bool AS
|
||||
'select text_metaphone($1) >= text_metaphone($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_mp_ne(text,text) RETURNS bool AS
|
||||
'select text_metaphone($1) <> text_metaphone($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
DROP OPERATOR #= (text,text)\g
|
||||
|
||||
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_mp_eq,
|
||||
commutator=text_mp_eq)\g
|
||||
|
||||
SELECT *
|
||||
FROM s
|
||||
WHERE text_mp_eq(nm,'pillsbury')\g
|
||||
|
||||
SELECT *
|
||||
from s
|
||||
where s.nm #= 'pillsbury';
|
@ -1,321 +0,0 @@
|
||||
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
Datum text_metaphone(PG_FUNCTION_ARGS);
|
||||
Datum text_metaphone_length(PG_FUNCTION_ARGS);
|
||||
|
||||
void phonetic(char *name, char *metaph, int metalen);
|
||||
|
||||
#define METAPHONE_LEN 50
|
||||
#undef METAPHONE_TEST
|
||||
|
||||
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
|
||||
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
|
||||
|
||||
#define NULLCHAR (char *) 0
|
||||
|
||||
char *VOWELS="AEIOU",
|
||||
*FRONTV="EIY", /* special cases for letters in FRONT of these */
|
||||
*VARSON="CSPTG", /* variable sound--those modified by adding an "h" */
|
||||
*DOUBLE="."; /* let these double letters through */
|
||||
|
||||
char *excpPAIR="AGKPW", /* exceptions "ae-", "gn-", "kn-", "pn-", "wr-" */
|
||||
*nextLTR ="ENNNR";
|
||||
char *chrptr, *chrptr1;
|
||||
|
||||
void phonetic(name,metaph,metalen)
|
||||
char *name, *metaph;
|
||||
int metalen;
|
||||
{
|
||||
|
||||
int ii, jj, silent, hard, Lng, lastChr;
|
||||
|
||||
char curLtr, prevLtr, nextLtr, nextLtr2, nextLtr3;
|
||||
|
||||
int vowelAfter, vowelBefore, frontvAfter;
|
||||
|
||||
char wname[60];
|
||||
char *ename=wname;
|
||||
|
||||
jj = 0;
|
||||
for (ii=0; name[ii] != '\0'; ii++) {
|
||||
if ( isalpha(name[ii]) ) {
|
||||
ename[jj] = toupper(name[ii]);
|
||||
jj++;
|
||||
}
|
||||
}
|
||||
ename[jj] = '\0';
|
||||
|
||||
if (strlen(ename) == 0) return;
|
||||
|
||||
/* if ae, gn, kn, pn, wr then drop the first letter */
|
||||
if ( (chrptr=strchr(excpPAIR,ename[0]) ) != NULLCHAR ) {
|
||||
chrptr1 = nextLTR + (chrptr-excpPAIR);
|
||||
if ( *chrptr1 == ename[1] ) strcpy(ename,&ename[1]);
|
||||
}
|
||||
/* change x to s */
|
||||
if (ename[0] == 'X') ename[0] = 'S';
|
||||
/* get rid of the "h" in "wh" */
|
||||
if ( strncmp(ename,"WH",2) == 0 ) strcpy(&ename[1], &ename[2]);
|
||||
|
||||
Lng = strlen(ename);
|
||||
lastChr = Lng -1; /* index to last character in string makes code easier*/
|
||||
|
||||
/* Remove an S from the end of the string */
|
||||
if ( ename[lastChr] == 'S' ) {
|
||||
ename[lastChr] = '\0';
|
||||
Lng = strlen(ename);
|
||||
lastChr = Lng -1;
|
||||
}
|
||||
|
||||
for (ii=0; ( (strlen(metaph) < metalen) && (ii < Lng) ); ii++) {
|
||||
|
||||
curLtr = ename[ii];
|
||||
|
||||
vowelBefore = FALSE; prevLtr = ' ';
|
||||
if (ii > 0) {
|
||||
prevLtr = ename[ii-1];
|
||||
if ( strchr(VOWELS,prevLtr) != NULLCHAR ) vowelBefore = TRUE;
|
||||
}
|
||||
/* if first letter is a vowel KEEP it */
|
||||
if (ii == 0 && (strchr(VOWELS,curLtr) != NULLCHAR) ) {
|
||||
strncat(metaph,&curLtr,1);
|
||||
continue;
|
||||
}
|
||||
|
||||
vowelAfter = FALSE; frontvAfter = FALSE; nextLtr = ' ';
|
||||
if ( ii < lastChr ) {
|
||||
nextLtr = ename[ii+1];
|
||||
if ( strchr(VOWELS,nextLtr) != NULLCHAR ) vowelAfter = TRUE;
|
||||
if ( strchr(FRONTV,nextLtr) != NULLCHAR ) frontvAfter = TRUE;
|
||||
}
|
||||
/* skip double letters except ones in list */
|
||||
if (curLtr == nextLtr && (strchr(DOUBLE,nextLtr) == NULLCHAR) ) continue;
|
||||
|
||||
nextLtr2 = ' ';
|
||||
if (ii < (lastChr-1) ) nextLtr2 = ename[ii+2];
|
||||
|
||||
nextLtr3 = ' ';
|
||||
if (ii < (lastChr-2) ) nextLtr3 = ename[ii+3];
|
||||
|
||||
switch (curLtr) {
|
||||
|
||||
case 'B': silent = FALSE;
|
||||
if (ii == lastChr && prevLtr == 'M') silent = TRUE;
|
||||
if (! silent) strncat(metaph,&curLtr,1);
|
||||
break;
|
||||
|
||||
/*silent -sci-,-sce-,-scy-; sci-, etc OK*/
|
||||
case 'C': if (! (ii > 1 && prevLtr == 'S' && frontvAfter) ) {
|
||||
|
||||
if ( ii > 0 && nextLtr == 'I' && nextLtr2 == 'A' )
|
||||
strncat(metaph,"X",1);
|
||||
else
|
||||
if (frontvAfter)
|
||||
strncat(metaph,"S",1);
|
||||
else
|
||||
if (ii > 1 && prevLtr == 'S' && nextLtr == 'H')
|
||||
strncat(metaph,"K",1);
|
||||
else
|
||||
if (nextLtr == 'H')
|
||||
if (ii == 0 && (strchr(VOWELS,nextLtr2) == NULLCHAR) )
|
||||
strncat(metaph,"K",1);
|
||||
else
|
||||
strncat(metaph,"X",1);
|
||||
else
|
||||
if (prevLtr == 'C')
|
||||
strncat(metaph,"C",1);
|
||||
else
|
||||
strncat(metaph,"K",1);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'D': if (nextLtr == 'G' && (strchr(FRONTV,nextLtr2) != NULLCHAR))
|
||||
strncat(metaph,"J",1);
|
||||
else
|
||||
strncat(metaph,"T",1);
|
||||
break;
|
||||
|
||||
case 'G': silent=FALSE;
|
||||
/* SILENT -gh- except for -gh and no vowel after h */
|
||||
if ( (ii < (lastChr-1) && nextLtr == 'H')
|
||||
&& (strchr(VOWELS,nextLtr2) == NULLCHAR) )
|
||||
silent=TRUE;
|
||||
|
||||
if ( (ii == (lastChr-3) )
|
||||
&& nextLtr == 'N' && nextLtr2 == 'E' && nextLtr3 == 'D')
|
||||
silent=TRUE;
|
||||
else
|
||||
if ( (ii == (lastChr-1)) && nextLtr == 'N') silent=TRUE;
|
||||
|
||||
if (prevLtr == 'D' && frontvAfter) silent=TRUE;
|
||||
|
||||
if (prevLtr == 'G')
|
||||
hard=TRUE;
|
||||
else
|
||||
hard=FALSE;
|
||||
|
||||
if (!silent) {
|
||||
if (frontvAfter && (! hard) )
|
||||
strncat(metaph,"J",1);
|
||||
else
|
||||
strncat(metaph,"K",1);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'H': silent = FALSE;
|
||||
if ( strchr(VARSON,prevLtr) != NULLCHAR ) silent = TRUE;
|
||||
|
||||
if ( vowelBefore && !vowelAfter) silent = TRUE;
|
||||
|
||||
if (!silent) strncat(metaph,&curLtr,1);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
case 'J':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'R': strncat(metaph,&curLtr,1);
|
||||
break;
|
||||
|
||||
case 'K': if (prevLtr != 'C') strncat(metaph,&curLtr,1);
|
||||
break;
|
||||
|
||||
case 'P': if (nextLtr == 'H')
|
||||
strncat(metaph,"F",1);
|
||||
else
|
||||
strncat(metaph,"P",1);
|
||||
break;
|
||||
|
||||
case 'Q': strncat(metaph,"K",1);
|
||||
break;
|
||||
|
||||
case 'S': if (ii > 1 && nextLtr == 'I'
|
||||
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
|
||||
strncat(metaph,"X",1);
|
||||
else
|
||||
if (nextLtr == 'H')
|
||||
strncat(metaph,"X",1);
|
||||
else
|
||||
strncat(metaph,"S",1);
|
||||
break;
|
||||
|
||||
case 'T': if (ii > 1 && nextLtr == 'I'
|
||||
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
|
||||
strncat(metaph,"X",1);
|
||||
else
|
||||
if (nextLtr == 'H') /* The=0, Tho=T, Withrow=0 */
|
||||
if (ii > 0 || (strchr(VOWELS,nextLtr2) != NULLCHAR) )
|
||||
strncat(metaph,"0",1);
|
||||
else
|
||||
strncat(metaph,"T",1);
|
||||
else
|
||||
if (! (ii < (lastChr-2) && nextLtr == 'C' && nextLtr2 == 'H'))
|
||||
strncat(metaph,"T",1);
|
||||
break;
|
||||
|
||||
case 'V': strncat(metaph,"F",1);
|
||||
break;
|
||||
|
||||
case 'W':
|
||||
case 'Y': if (ii < lastChr && vowelAfter) strncat(metaph,&curLtr,1);
|
||||
break;
|
||||
|
||||
case 'X': strncat(metaph,"KS",2);
|
||||
break;
|
||||
|
||||
case 'Z': strncat(metaph,"S",1);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* DON'T DO THIS NOW, REMOVING "S" IN BEGINNING HAS the same effect
|
||||
with plurals, in addition imbedded S's in the Metaphone are included
|
||||
Lng = strlen(metaph);
|
||||
lastChr = Lng -1;
|
||||
if ( metaph[lastChr] == 'S' && Lng >= 3 ) metaph[lastChr] = '\0';
|
||||
*/
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#ifdef METAPHONE_TEST
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2)
|
||||
{
|
||||
fprintf(stderr, "usage: %s string\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
char output[51]="";
|
||||
|
||||
phonetic(argv[1], output, 50);
|
||||
printf("metaphone(%s) = %s\n", argv[1], output);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* METAPHONE_TEST */
|
||||
|
||||
#ifndef METAPHONE_TEST
|
||||
/*
|
||||
* SQL function: text_metaphone(text) returns text
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(text_metaphone);
|
||||
|
||||
Datum
|
||||
text_metaphone(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char outstr[51]="";
|
||||
char *arg;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
|
||||
phonetic(arg, outstr, 50);
|
||||
|
||||
PG_RETURN_TEXT_P(_textin(outstr));
|
||||
}
|
||||
|
||||
/*
|
||||
char outstr[51]="";
|
||||
char *arg;
|
||||
int32 metalen;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
metalen = PG_GETARG_INT32(1);
|
||||
|
||||
|
||||
phonetic(arg, outstr, metalen);
|
||||
*/
|
||||
|
||||
PG_FUNCTION_INFO_V1(text_metaphone_length);
|
||||
|
||||
Datum
|
||||
text_metaphone_length(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char outstr[51]="";
|
||||
char *arg;
|
||||
int32 metalen;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
metalen = PG_GETARG_INT32(1);
|
||||
|
||||
phonetic(arg, outstr, metalen);
|
||||
|
||||
PG_RETURN_TEXT_P(_textin(outstr));
|
||||
}
|
||||
|
||||
|
||||
#endif /* not METAPHONE_TEST */
|
@ -1,3 +0,0 @@
|
||||
CREATE FUNCTION text_soundex(text) RETURNS text
|
||||
AS '@MODULE_FILENAME@', 'text_metaphone' LANGUAGE 'C';
|
||||
|
@ -1,40 +0,0 @@
|
||||
#
|
||||
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.10 2001/06/18 21:38:02 momjian Exp $
|
||||
#
|
||||
|
||||
subdir = contrib/soundex
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
NAME := soundex
|
||||
SONAME := $(NAME)$(DLSUFFIX)
|
||||
|
||||
override CPPFLAGS += -I$(srcdir)
|
||||
override CFLAGS += $(CFLAGS_SL)
|
||||
override DLLLIBS := $(BE_DLLLIBS) $(DLLLIBS)
|
||||
|
||||
all: $(SONAME) $(NAME).sql
|
||||
|
||||
$(NAME).sql: $(NAME).sql.in
|
||||
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
|
||||
|
||||
install: all installdirs
|
||||
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
|
||||
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
|
||||
$(INSTALL_DATA) README.$(NAME) $(docdir)/contrib
|
||||
|
||||
installdirs:
|
||||
$(mkinstalldirs) $(libdir)/contrib $(datadir)/contrib $(docdir)/contrib
|
||||
|
||||
uninstall:
|
||||
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
|
||||
|
||||
clean distclean maintainer-clean:
|
||||
rm -f $(SONAME) $(NAME).o $(NAME).sql
|
||||
|
||||
depend dep:
|
||||
$(CC) -MM -MG $(CFLAGS) *.c > depend
|
||||
|
||||
ifeq (depend,$(wildcard depend))
|
||||
include depend
|
||||
endif
|
@ -1,66 +0,0 @@
|
||||
This directory contains a module that implements the "Soundex" code as
|
||||
a PostgreSQL user-defined function. The Soundex system is a method of
|
||||
matching similar sounding names (or any words) to the same code. It
|
||||
was initially used by the United States Census in 1880, 1900, and
|
||||
1910, but it has little use beyond English names (or the English
|
||||
pronunciation of names), and it is not a linguistic tool.
|
||||
|
||||
To install it, first configure the main source tree, then run make;
|
||||
make install in this directory. Finally, load the function definition
|
||||
with psql:
|
||||
|
||||
psql -f PREFIX/share/contrib/soundex.sql
|
||||
|
||||
The following are some usage examples:
|
||||
|
||||
SELECT text_soundex('hello world!');
|
||||
|
||||
CREATE TABLE s (nm text)\g
|
||||
|
||||
insert into s values ('john')\g
|
||||
insert into s values ('joan')\g
|
||||
insert into s values ('wobbly')\g
|
||||
|
||||
select * from s
|
||||
where text_soundex(nm) = text_soundex('john')\g
|
||||
|
||||
select nm from s a, s b
|
||||
where text_soundex(a.nm) = text_soundex(b.nm)
|
||||
and a.oid <> b.oid\g
|
||||
|
||||
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
|
||||
'select text_soundex($1) = text_soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
|
||||
'select text_soundex($1) < text_soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
|
||||
'select text_soundex($1) > text_soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
|
||||
'select text_soundex($1) <= text_soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
|
||||
'select text_soundex($1) >= text_soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
|
||||
'select text_soundex($1) <> text_soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
DROP OPERATOR #= (text,text)\g
|
||||
|
||||
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
|
||||
commutator=text_sx_eq)\g
|
||||
|
||||
SELECT *
|
||||
FROM s
|
||||
WHERE text_sx_eq(nm,'john')\g
|
||||
|
||||
SELECT *
|
||||
from s
|
||||
where s.nm #= 'john';
|
@ -1,119 +0,0 @@
|
||||
/* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.11 2001/03/22 03:59:10 momjian Exp $ */
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
|
||||
Datum text_soundex(PG_FUNCTION_ARGS);
|
||||
|
||||
static void soundex(const char *instr, char *outstr);
|
||||
|
||||
#define SOUNDEX_LEN 4
|
||||
|
||||
|
||||
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
|
||||
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
|
||||
|
||||
|
||||
#ifndef SOUNDEX_TEST
|
||||
/*
|
||||
* SQL function: text_soundex(text) returns text
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(text_soundex);
|
||||
|
||||
Datum
|
||||
text_soundex(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char outstr[SOUNDEX_LEN + 1];
|
||||
char *arg;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
|
||||
soundex(arg, outstr);
|
||||
|
||||
PG_RETURN_TEXT_P(_textin(outstr));
|
||||
}
|
||||
|
||||
#endif /* not SOUNDEX_TEST */
|
||||
|
||||
|
||||
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||
static const char *soundex_table = "01230120022455012623010202";
|
||||
|
||||
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
|
||||
|
||||
|
||||
static void
|
||||
soundex(const char *instr, char *outstr)
|
||||
{
|
||||
int count;
|
||||
|
||||
AssertArg(instr);
|
||||
AssertArg(outstr);
|
||||
|
||||
outstr[SOUNDEX_LEN] = '\0';
|
||||
|
||||
/* Skip leading non-alphabetic characters */
|
||||
while (!isalpha((unsigned char) instr[0]) && instr[0])
|
||||
++instr;
|
||||
|
||||
/* No string left */
|
||||
if (!instr[0])
|
||||
{
|
||||
outstr[0] = (char) 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Take the first letter as is */
|
||||
*outstr++ = (char) toupper((unsigned char) *instr++);
|
||||
|
||||
count = 1;
|
||||
while (*instr && count < SOUNDEX_LEN)
|
||||
{
|
||||
if (isalpha((unsigned char) *instr) &&
|
||||
soundex_code(*instr) != soundex_code(*(instr - 1)))
|
||||
{
|
||||
*outstr = soundex_code(instr[0]);
|
||||
if (*outstr != '0')
|
||||
{
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
++instr;
|
||||
}
|
||||
|
||||
/* Fill with 0's */
|
||||
while (count < SOUNDEX_LEN)
|
||||
{
|
||||
*outstr = '0';
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef SOUNDEX_TEST
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2)
|
||||
{
|
||||
fprintf(stderr, "usage: %s string\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
char output[SOUNDEX_LEN + 1];
|
||||
|
||||
soundex(argv[1], output);
|
||||
printf("soundex(%s) = %s\n", argv[1], output);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* SOUNDEX_TEST */
|
@ -1,5 +0,0 @@
|
||||
CREATE FUNCTION text_soundex(text) RETURNS text
|
||||
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'C';
|
||||
|
||||
CREATE FUNCTION soundex(text) RETURNS text
|
||||
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'C';
|
Loading…
Reference in New Issue
Block a user