From 6a42ab4eb8dc0b192f61671890bd34d495426f96 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 7 Apr 2009 15:54:07 +0000 Subject: [PATCH] Defend against non-ASCII letters in fuzzystrmatch code. The functions still don't behave very sanely for multibyte encodings, but at least they won't be indexing off the ends of static arrays. --- contrib/fuzzystrmatch/fuzzystrmatch.h | 38 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h index a8038d23f3..c58158e88e 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.h +++ b/contrib/fuzzystrmatch/fuzzystrmatch.h @@ -5,7 +5,7 @@ * * Joe Conway * - * $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v 1.14 2006/07/10 18:40:16 momjian Exp $ + * $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v 1.14.2.1 2009/04/07 15:54:07 tgl Exp $ * Copyright (c) 2001-2006, PostgreSQL Global Development Group * ALL RIGHTS RESERVED; * @@ -75,7 +75,15 @@ static void _soundex(const char *instr, char *outstr); /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ static const char *soundex_table = "01230120022455012623010202"; -#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A'] +static char +soundex_code(char letter) +{ + letter = toupper((unsigned char) letter); + /* Defend against non-ASCII letters */ + if (letter >= 'A' && letter <= 'Z') + return soundex_table[letter - 'A']; + return letter; +} /* @@ -148,26 +156,36 @@ _metaphone( /*-- Character encoding array & accessing macros --*/ /* Stolen directly out of the book... */ -char _codes[26] = { +static const char _codes[26] = { 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0 /* a b c d e f g h i j k l m n o p q r s t u v w x y z */ }; +static int +getcode(char c) +{ + if (isalpha((unsigned char) c)) + { + c = toupper((unsigned char) c); + /* Defend against non-ASCII letters */ + if (c >= 'A' && c <= 'Z') + return _codes[c - 'A']; + } + return 0; +} -#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0) - -#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */ +#define isvowel(c) (getcode(c) & 1) /* AEIOU */ /* These letters are passed through unchanged */ -#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */ +#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */ /* These form dipthongs when preceding H */ -#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */ +#define AFFECTH(c) (getcode(c) & 4) /* CGPST */ /* These make C and G soft */ -#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */ +#define MAKESOFT(c) (getcode(c) & 8) /* EIY */ /* These prevent GH from becoming F */ -#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */ +#define NOGHTOF(c) (getcode(c) & 16) /* BDH */ #endif /* FUZZYSTRMATCH_H */