mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-30 19:00:29 +08:00
Defend against non-ASCII letters in fuzzystrmatch code. The functions
still don't behave very sanely for multibyte encodings, but at least they won't be indexing off the ends of static arrays.
This commit is contained in:
parent
4bc176deae
commit
6a42ab4eb8
@ -5,7 +5,7 @@
|
||||
*
|
||||
* Joe Conway <mail@joeconway.com>
|
||||
*
|
||||
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v 1.14 2006/07/10 18:40:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v 1.14.2.1 2009/04/07 15:54:07 tgl Exp $
|
||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||
* ALL RIGHTS RESERVED;
|
||||
*
|
||||
@ -75,7 +75,15 @@ static void _soundex(const char *instr, char *outstr);
|
||||
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||
static const char *soundex_table = "01230120022455012623010202";
|
||||
|
||||
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
|
||||
static char
|
||||
soundex_code(char letter)
|
||||
{
|
||||
letter = toupper((unsigned char) letter);
|
||||
/* Defend against non-ASCII letters */
|
||||
if (letter >= 'A' && letter <= 'Z')
|
||||
return soundex_table[letter - 'A'];
|
||||
return letter;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@ -148,26 +156,36 @@ _metaphone(
|
||||
|
||||
/*-- Character encoding array & accessing macros --*/
|
||||
/* Stolen directly out of the book... */
|
||||
char _codes[26] = {
|
||||
static const char _codes[26] = {
|
||||
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
|
||||
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
|
||||
};
|
||||
|
||||
static int
|
||||
getcode(char c)
|
||||
{
|
||||
if (isalpha((unsigned char) c))
|
||||
{
|
||||
c = toupper((unsigned char) c);
|
||||
/* Defend against non-ASCII letters */
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return _codes[c - 'A'];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0)
|
||||
|
||||
#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
|
||||
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
|
||||
|
||||
/* These letters are passed through unchanged */
|
||||
#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
|
||||
#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
|
||||
|
||||
/* These form dipthongs when preceding H */
|
||||
#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
|
||||
#define AFFECTH(c) (getcode(c) & 4) /* CGPST */
|
||||
|
||||
/* These make C and G soft */
|
||||
#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
|
||||
#define MAKESOFT(c) (getcode(c) & 8) /* EIY */
|
||||
|
||||
/* These prevent GH from becoming F */
|
||||
#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
|
||||
#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
|
||||
|
||||
#endif /* FUZZYSTRMATCH_H */
|
||||
|
Loading…
Reference in New Issue
Block a user