postgresql/contrib/tsearch/dict/porter_english.dct

/*
 * ----START-LICENCE----
 * Copyright 1999,2000 BrightStation PLC
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 * -----END-LICENCE-----
 */
/* Version 1: see http://open.muscat.com/ for further information */


#ifdef DICT_BODY
#include <ctype.h> /* tolower */

static void * setup_english_stemmer();

static const char * english_stem(void * z, const char * q, int i0, int i1);

static void closedown_english_stemmer(void * z);


/* To set up the english stemming process:

       void * z = setup_stemmer();

   to use it:

       char * p = stem(z, q, i0, i1);

   The word to be stemmed is in byte address q offsets i0 to i1
   inclusive (i.e. from q[i0] to q[i1]). The stemmed result is the
   C string at address p.

   To close down the stemming process:

   closedown_stemmer(z);

*/

/* The English stemming algorithm is essentially the Porter stemming
 * algorithm, and has been coded up by its author. It follows the algorithm
 * presented in
 *
 * Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
 * no. 3, pp 130-137,
 *
 * only differing from it at the points marked -DEPARTURE- and -NEW-
 * below.
 *
 * For a more faithful version of the Porter algorithm, see
 *
 *     http://www.muscat.com/~martin/stem.html
 *
 */

/* Later additions:

   June 2000

   The 'l' of the 'logi' -> 'log' rule is put with the stem, so that
   short stems like 'geo' 'theo' etc work like 'archaeo' 'philo' etc.

   This follows a suggestion of Barry Wilkins, reasearch student at
   Birmingham.


   February 2000

   the cvc test for not dropping final -e now looks after vc at the
   beginning of a word, so are, eve, ice, ore, use keep final -e. In this
   test c is any consonant, including w, x and y. This extension was
   suggested by Chris Emerson.

   -fully    -> -ful   treated like  -fulness -> -ful, and
   -tionally -> -tion  treated like  -tional  -> -tion

   both in Step 2. These were suggested by Hiranmay Ghosh, of New Delhi.

   Invariants proceed, succeed, exceed. Also suggested by Hiranmay Ghosh.

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct pool {

    int size;
    struct pool_entry * entries;

};

/*  This is used as a library to resolve exceptions in the various
    stemming algorithms. Typical use is,

        struct pool * p = create_pool(t);
        char * s_translated = search_pool(p, strlen(s), s);
        ...
        free_pool(p);

    t is an array of strings, e.g.

        static char * t[] = {

            "sky",     "sky/skies/",
            "die",     "dying/",
            "lie",     "lying/",
            "tie",     "tying/",
            ....
            0, 0

        };

    if s is "sky", "skies", "dying" etc., translated_s is becomes "sky",
    "sky", "die" etc.

    The code includes a sort/merge capability which may be turned into
    (or replaced by) something more general later on.

*/

/*  merge(n, p, q, r, l, k, f) repeatedly merges n-byte sequences of items of
    size k from addresses p and q into r. f is the comparison routine and
    l is the limit point for q.
*/

static void merge(int n, char * p, char * q, char * r, char * l, int k,
                  int (*f)(char *, char *))
{  char * q0 = q;
   if (q0 > l) { memmove(r, p, l-p); return; }
   while (p < q0)
   {  char * pl = n+p;
      char * ql = n+q;
      if (ql > l) ql = l;
      while(true)
      {  if (p >= pl) { memmove(r, q, ql-q); r += ql-q; q = ql; break; }
         if (q >= ql) { memmove(r, p, pl-p); r += pl-p; p = pl; break; }
         if (f(p, q)) { memmove(r, p, k); p += k; }
                else { memmove(r, q, k); q += k; }
         r += k;
      }
   }
   memmove(r, q, l-q);
}

/*  In sort(p, c, k, f), p+c is a byte address at which begin a sequence of
    items of size k to be sorted. p+l is the address of the byte after the
    last of these items, so l - c is divisible by k. f is a comparison function
    for a pair of these items: f(p+i, q+j) is true if the item at p+i is before
    the item at q+j, false if it is equal to or after it.
*/

static void sort(char * p, int c, int l, int k,
                 int (*f)(char *, char *))
{
    char * q = malloc(l-c);  /* temporary work space */
    int j = k;
    int w = l-c;
    while (j < w)
    {   int cycle;
        for (cycle = 1; cycle <= 2; cycle++)
        {   int h = (w+j-1) / j / 2 * j;     /* half way */
            if (cycle == 1) merge(j, p+c, p+c+h, q, p+l, k, f);
                       else merge(j, q, q+h, p+c, q+w, k, f);
            j *= 2;
        }
    }
    free(q);
}

struct pool_entry {

    const char * translation;
    const char * pointer;
    int length;

};

static void print_entry(struct pool_entry * p)
    {
       { int j; for (j=0;j<p->length;j++) fprintf(stderr, "%c", (p->pointer)[j]); }
       fprintf(stderr, " --> %s\n", p->translation);
    }

/*  - debugging aid
    static void print_pool(struct pool * p)
    {   int i;
        int size = p->size;
        struct pool_entry * q = p->entries;
        fprintf(stderr, "\nPool:\n");
        for (i = 0; i < size; i++) print_entry(q+i);
    }
*/

/* compare(p, q) is our comparison function, used for f above
*/

static int compare(char * char_p, char * char_q)
{   struct pool_entry * p = (struct pool_entry *) char_p;
    struct pool_entry * q = (struct pool_entry *) char_q;
    if (p->length == q->length) return memcmp(p->pointer, q->pointer, p->length) < 0;
    return p->length < q->length;
}

static int count_slashes(const char * s[])
{   int slash_count = 0;
    int i;
    for (i = 1; s[i] != 0; i += 2)
    {   const char * p = s[i];
        int j = 0;
        while (p[j] != 0) if (p[j++] == '/') slash_count++;
    }
    return slash_count;
}

static struct pool * create_pool(const char * s[])
{   int size = count_slashes(s);
    struct pool_entry * z = (struct pool_entry *) malloc(size * sizeof(struct pool_entry));
    struct pool_entry * q = z;
    int i;
    for (i = 1; s[i] != 0; i += 2)
    {   const char * p = s[i];
        int j = 0;
        int j0 = 0;
        while(true)
        {   if (p[j] == 0)
            {  if (j0 != j) { fprintf(stderr, "%s lacks final '/'\n", p); exit(1); }
               break;
            }
            if (p[j] == '/')
            {
                q->translation = s[i-1];
                q->pointer = p+j0; q->length = j-j0;
                q++;
                j0 = j+1;
            }
            j++;
        }
    }
    sort((char *) z, 0, size * sizeof(struct pool_entry), sizeof(struct pool_entry), compare);

    /* now validate the contents */

    for (i = 1; i < size; i++)
    {   struct pool_entry * p = z+i;
        struct pool_entry * q = z+i-1;
        if (p->length == q->length && memcmp(p->pointer, q->pointer, p->length) == 0)
        {   fprintf(stderr, "warning: "); print_entry(p);
            fprintf(stderr, " and "); print_entry(q);
        }
    }

    {   struct pool * p = (struct pool *) malloc(sizeof(struct pool));
        p->entries = z;
        p->size = size;
        return p;
    }
}

static int compare_to_pool(int length, const char * s, int length_p, const char * s_p)
{   if (length != length_p) return length-length_p;
    return memcmp(s, s_p, length);
}

static const char * search_pool(struct pool * p, int length, char * s)
{   int i = 0;
    int j = p->size;
    struct pool_entry * q = p->entries;
    if (j == 0) return 0;  /* empty pool */
    if (compare_to_pool(length, s, q->length, q->pointer) < 0) return 0;
    while(true)
    {
        int h = (i+j)/2;
        int diff = compare_to_pool(length, s, (q+h)->length, (q+h)->pointer);
        if (diff == 0) return (q+h)->translation;
        if (j-i <= 1) return 0;
        if (diff < 0) j = h; else i = h;
    }
}

static void free_pool(struct pool * p)
{   free(p->entries);
    free(p);
}

struct english_stemmer
{
    char * p;
    int p_size;
    int k;
    int j;
    struct pool * irregulars;
};

/* The main part of the stemming algorithm starts here. z->p is a buffer
   holding a word to be stemmed. The letters are in z->p[0], z->p[1] ...
   ending at z->p[z->k]. z->k is readjusted downwards as the stemming
   progresses. Zero termination is not in fact used in the algorithm.

   Note that only lower case sequences are stemmed. Forcing to lower case
   should be done before english_stem(...) is called.

   We will write p, k etc in place of z->p, z->k in the comments.
*/

/* cons(z, i) is true <=> p[i] is a consonant.
*/

static int cons(struct english_stemmer * z, int i)
{   switch (z->p[i])
    {   case 'a': case 'e': case 'i': case 'o': case 'u':
            return false;
        case 'y':
            return (i==0) ? true : !cons(z, i - 1);
        default: return true;
    }
}

/* m(z) measures the number of consonant sequences between 0 and j. if c is
   a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
   presence,

      <c><v>       gives 0
      <c>vc<v>     gives 1
      <c>vcvc<v>   gives 2
      <c>vcvcvc<v> gives 3
      ....
*/

static int m(struct english_stemmer * z)
{   int n = 0;
    int i = 0;
    while(true)
    {   if (i > z->j) return n;
        if (! cons(z, i)) break; i++;
    }
    i++;
    while(true)
    {   while(true)
        {   if (i > z->j) return n;
            if (cons(z, i)) break;
            i++;
        }
        i++;
        n++;
        while(true)
        {   if (i > z->j) return n;
            if (! cons(z, i)) break;
            i++;
        }
        i++;
    }
}

/* vowelinstem(z) is true p[0], ... p[j] contains a vowel
*/

static int vowelinstem(struct english_stemmer * z)
{   int i;
    for (i = 0; i <= z->j; i++) if (! cons(z, i)) return true;
    return false;
}

/* doublec(z, i) is true <=> p[i], p[i - 1] contain a double consonant.
*/

static int doublec(struct english_stemmer * z, int i)
{   if (i < 1) return false;
    if (z->p[i] != z->p[i - 1]) return false;
    return cons(z, i);
}

/* cvc(z, i) is true <=>

   a) ( -NEW- ) i == 1, and p[0] p[1] is vowel consonant, or

   b) p[i - 2], p[i - 1], p[i] has the form consonant -
      vowel - consonant and also if the second c is not w, x or y. this is used
      when trying to restore an e at the end of a short word. e.g.

         cav(e), lov(e), hop(e), crim(e), but
         snow, box, tray.

*/

static int cvc(struct english_stemmer * z, int i)
{
    if (i == 0) return false;   /* i == 0 never happens perhaps */

    if (i == 1) return !cons(z, 0) && cons(z, 1);

    if (!cons(z, i) || cons(z, i - 1) || !cons(z, i - 2)) return false;
    {   int ch = z->p[i];
        if (ch == 'w' || ch == 'x' || ch == 'y') return false;
    }
    return true;
}

/* ends(z, s, length) is true <=> p[0], ... p[k] ends with the string s.
*/

static int ends(struct english_stemmer * z, const char * s, int length)
{
    if (length > z->k + 1) return false;
    if (memcmp(z->p + z->k - length + 1, s, length) != 0) return false;
    z->j = z->k - length;
    return true;
}

/* setto(z, s, length) sets p[j + 1] ... to the characters in the string s,
   readjusting k.
*/

static void setto(struct english_stemmer * z, const char * s, int length)
{
    memmove(z->p + z->j + 1, s, length);
    z->k = z->j + length;
}

/* r(z, s, length) is used further down. */

static void r(struct english_stemmer * z, const char * s, int length)
{
    if (m(z) > 0) setto(z, s, length);
}

/* step_1ab(z) gets rid of plurals and -ed or -ing. e.g.

       caresses  ->  caress
       ponies    ->  poni
       sties     ->  sti
       tie       ->  tie       (-NEW-: see below)
       caress    ->  caress
       cats      ->  cat

       feed      ->  feed
       agreed    ->  agree
       disabled  ->  disable

       matting   ->  mat
       mating    ->  mate
       meeting   ->  meet
       milling   ->  mill
       messing   ->  mess

       meetings  ->  meet

*/

static void step_1ab(struct english_stemmer * z)
{   if (z->p[z->k] == 's')
    {   if (ends(z, "sses", 4)) z->k -= 2; else
        if (ends(z, "ies", 3))
            if (z->j == 0) z->k--; else z->k -= 2;

        /* this line extends the original algorithm, so that 'flies'->'fli' but
           'dies'->'die' etc */

        else
            if (z->p[z->k - 1] != 's') z->k--;
    }

    if (ends(z, "ied", 3)) { if (z->j == 0) z->k--; else z->k -= 2; } else

    /* this line extends the original algorithm, so that 'spied'->'spi' but
       'died'->'die' etc */

    if (ends(z, "eed", 3)) { if (m(z) > 0) z->k--; } else
    if ((ends(z, "ed", 2) || ends(z, "ing", 3)) && vowelinstem(z))
    {   z->k = z->j;
        if (ends(z, "at", 2)) setto(z, "ate", 3); else
        if (ends(z, "bl", 2)) setto(z, "ble", 3); else
        if (ends(z, "iz", 2)) setto(z, "ize", 3); else
        if (doublec(z, z->k))
        {   z->k--;
            {   int ch = z->p[z->k];
                if (ch == 'l' || ch == 's' || ch == 'z') z->k++;
            }
        }
        else if (m(z) == 1 && cvc(z, z->k)) setto(z, "e", 1);
    }
}

/* step_1c(z) turns terminal y to i when there is another vowel in the stem.

   -NEW-: This has been modified from the original Porter algorithm so that y->i
   is only done when y is preceded by a consonant, but not if the stem
   is only a single consonant, i.e.

       (*c and not c) Y -> I

   So 'happy' -> 'happi', but
      'enjoy' -> 'enjoy'  etc

   This is a much better rule. Formerly 'enjoy'->'enjoi' and 'enjoyment'->
   'enjoy'. Step 1c is perhaps done too soon; but with this modification that
   no longer really matters.

   Also, the removal of the vowelinstem(z) condition means that 'spy', 'fly',
   'try' ... stem to 'spi', 'fli', 'tri' and conflate with 'spied', 'tried',
   'flies' ...

*/

static void step_1c(struct english_stemmer * z)
{
    if (ends(z, "y", 1) && z->j > 0 && cons(z, z->k - 1)) z->p[z->k] = 'i';
}


/* step_2(z) maps double suffices to single ones. so -ization ( = -ize plus
   -ation) maps to -ize etc. Note that the string before the suffix must give
   m(z) > 0.
*/

static void step_2(struct english_stemmer * z)
{   switch (z->p[z->k - 1])
    {
        case 'a':
            if (ends(z, "ational", 7)) { r(z, "ate", 3); break; }
            if (ends(z, "tional", 6)) { r(z, "tion", 4); break; }
            break;
        case 'c':
            if (ends(z, "enci", 4)) { r(z, "ence", 4); break; }
            if (ends(z, "anci", 4)) { r(z, "ance", 4); break; }
            break;
        case 'e':
            if (ends(z, "izer", 4)) { r(z, "ize", 3); break; }
            break;
        case 'l':
            if (ends(z, "bli", 3)) { r(z, "ble", 3); break; } /*-DEPARTURE-*/

     /* To match the published algorithm, replace this line with
        case 'l':
            if (ends(z, "abli", 4)) { r(z, "able", 4); break; }
     */
            if (ends(z, "alli", 4))
            {
                if (m(z) > 0) { setto(z, "al", 2); step_2(z); } /*-NEW-*/
                break;
            }

            if (ends(z, "fulli", 5)) { r(z, "ful", 3); break; } /*-NEW-*/
            if (ends(z, "entli", 5)) { r(z, "ent", 3); break; }
            if (ends(z, "eli", 3)) { r(z, "e", 1); break; }
            if (ends(z, "ousli", 5)) { r(z, "ous", 3); break; }
            break;
        case 'o':
            if (ends(z, "ization", 7)) { r(z, "ize", 3); break; }
            if (ends(z, "ation", 5)) { r(z, "ate", 3); break; }
            if (ends(z, "ator", 4)) { r(z, "ate", 3); break; }
            break;
        case 's':
            if (ends(z, "alism", 5)) { r(z, "al", 2); break; }
            if (ends(z, "iveness", 7)) { r(z, "ive", 3); break; }
            if (ends(z, "fulness", 7)) { r(z, "ful", 3); break; }
            if (ends(z, "ousness", 7)) { r(z, "ous", 3); break; }
            break;
        case 't':
            if (ends(z, "aliti", 5)) { r(z, "al", 2); break; }
            if (ends(z, "iviti", 5)) { r(z, "ive", 3); break; }
            if (ends(z, "biliti", 6)) { r(z, "ble", 3); break; }
            break;
        case 'g':
            if (ends(z, "logi", 4))
            {   z->j++;                /*-NEW-*/ /*(Barry Wilkins)*/
                r(z, "og", 2); break;
            } /*-DEPARTURE-*/

     /* To match the published algorithm, delete this line */

    }
}

/* step_3(z) deals with -ic-, -full, -ness etc. Similar strategy to step_2.
*/

static void step_3(struct english_stemmer * z)
{   switch (z->p[z->k])
    {
        case 'e':
            if (ends(z, "icate", 5)) { r(z, "ic", 2); break; }
            if (ends(z, "ative", 5)) { r(z, "", 0); break; }
            if (ends(z, "alize", 5)) { r(z, "al", 2); break; }
            break;
        case 'i':
            if (ends(z, "iciti", 5)) { r(z, "ic", 2); break; }
            break;
        case 'l':
            if (ends(z, "ical", 4)) { r(z, "ic", 2); break; }
            if (ends(z, "ful", 3)) { r(z, "", 0); break; }
            break;
        case 's':
            if (ends(z, "ness", 4)) { r(z, "", 0); break; }
            break;
    }
}

/* step_4() takes off -ant, -ence etc., in context <c>vcvc<v>.
*/

static void step_4(struct english_stemmer * z)
{   switch (z->p[z->k - 1])
    {   case 'a':
            if (ends(z, "al", 2)) break; return;
        case 'c':
            if (ends(z, "ance", 4)) break;
            if (ends(z, "ence", 4)) break; return;
        case 'e':
            if (ends(z, "er", 2)) break; return;
        case 'i':
            if (ends(z, "ic", 2)) break; return;
        case 'l':
            if (ends(z, "able", 4)) break;
            if (ends(z, "ible", 4)) break; return;
        case 'n':
            if (ends(z, "ant", 3)) break;
            if (ends(z, "ement", 5)) break;
            if (ends(z, "ment", 4)) break;
            if (ends(z, "ent", 3)) break; return;
        case 'o':
            if (ends(z, "ion", 3) && (z->p[z->j] == 's' ||
                                    z->p[z->j] == 't')) break;
            if (ends(z, "ou", 2)) break; return;
            /* takes care of -ous */
        case 's':
            if (ends(z, "ism", 3)) break; return;
        case 't':
            if (ends(z, "ate", 3)) break;
            if (ends(z, "iti", 3)) break; return;
        case 'u':
            if (ends(z, "ous", 3)) break; return;
        case 'v':
            if (ends(z, "ive", 3)) break; return;
        case 'z':
            if (ends(z, "ize", 3)) break; return;
        default:
            return;
    }
    if (m(z) > 1) z->k = z->j;
}

/* step_5(z) removes a final -e if m(z) > 1, and changes -ll to -l if
   m(z) > 1.
*/

static void step_5(struct english_stemmer * z)
{   z->j = z->k;
    if (z->p[z->k] == 'e')
    {   int a = m(z);
        if (a > 1 || (a == 1 && !cvc(z, z->k - 1))) z->k--;
    }
    if (z->p[z->k] == 'l' && doublec(z, z->k) && m(z) > 1) z->k--;
}

static const char * english_stem(void * z_, const char * q, int i0, int i1)
{
    struct english_stemmer * z = (struct english_stemmer *) z_;
    int p_size = z->p_size;

    if (i1 - i0 + 50 > p_size)
    {   free(z->p);
        p_size = i1 - i0 + 75; /* ample */ z->p_size = p_size;
        z->p = (char *) malloc(p_size);
    }

    memmove(z->p, q + i0, i1 - i0 + 1);

    z->k = i1 - i0;


    {   const char * t = search_pool(z->irregulars, z->k + 1, z->p);
        if (t != 0)  {
		z->k = strlen(t) - 1;
		return t;
	}
    }

    if (z->k > 1) /*-DEPARTURE-*/

   /* With this line, strings of length 1 or 2 don't go through the
      stemming process, although no mention is made of this in the
      published algorithm. Remove the line to match the published
      algorithm. */

    {   step_1ab(z); step_1c(z);
        step_2(z);
        step_3(z);
        step_4(z);
        step_5(z);
    }

    z->p[z->k + 1] = 0; /* C string form for now */
    return z->p;
}

/* -NEW-
   This is a table of irregular forms. It is quite short, but still
   reflects the errors actually drawn to Martin Porter's attention over
   a 20 year period!

   Extend it as necessary.

   The form of the table is:

     "p1" "s11/s12/s13/ ... /"
     "p2" "s21/s22/s23/ ... /"
     ...
     "pn" "sn1/sn2/sn3/ ... /"
     0, 0

   String sij is mapped to paradigm form pi, and the main stemming
   process is then bypassed.
*/

static const char * irregular_forms[] = {

    "sky",     "sky/skies/",
    "die",     "dying/",
    "lie",     "lying/",
    "tie",     "tying/",
    "news",    "news/",
    "inning",  "innings/inning/",
    "outing",  "outings/outing/",
    "canning", "cannings/canning/",
    "howe",    "howe/",

    /*-NEW-*/
    "proceed", "proceed/",
    "exceed",  "exceed/",
    "succeed", "succeed/",  /* Hiranmay Ghosh */

    0, 0  /* terminator */

};


/*
 * is_stopword part
 */
typedef struct {
	unsigned char	val;
	unsigned char	flag;
	unsigned char	right;

	unsigned char	child;
} ESWNODE;

/* is exists left tree ? */
#define L	0x01
/* finish word flag */
#define F	0x02
#define ISLEFT(x)	(((ESWNODE*)x)->flag & L)
#define ISFINISH(x)	(((ESWNODE*)x)->flag & F)


static ESWNODE engstoptree[] = {
	{'m',L,9,126},
	{'d',L,4,71},
	{'b',L,2,40},
	{'a',F,0,14},
	{'c',0,0,62},
	{'f',L,2,79},
	{'e',0,0,75},
	{'h',0,1,90},
	{'i',F,0,108},
	{'t',L,4,177},
	{'o',L,2,135},
	{'n',0,0,131},
	{'s',0,0,156},
	{'v',L,2,210},
	{'u',0,0,201},
	{'w',0,1,211},
	{'y',0,0,237},

	{'m',L|F,5,0},
	{'f',L,2,12},
	{'b',0,0,7},
	{'g',0,1,13},
	{'l',0,0,17},
	{'r',L,2,19},
	{'n',F,0,16},
	{'s',F,1,0},
	{'t',F,0,0},

	{'o',0,0,1},

	{'u',0,1,2},
	{'v',F,0,0},

	{'t',F,0,0},

	{'t',0,0,1},

	{'e',0,0,1},

	{'r',F,0,0},

	{'a',0,0,1},

	{'i',0,0,1},

	{'n',F,0,1},

	{'s',0,0,1},

	{'t',F,0,0},

	{'l',F,0,0},

	{'d',F,1,0},
	{'i',F,0,0},

	{'e',F,0,0},

	{'o',L,2,21},
	{'e',F,0,3},
	{'u',0,1,21},
	{'y',F,0,0},

	{'f',L,3,9},
	{'c',0,1,4},
	{'e',0,0,6},
	{'l',0,1,8},
	{'t',0,0,9},

	{'a',0,0,1},

	{'u',0,0,1},

	{'s',F,0,0},

	{'n',F,0,0},

	{'o',0,0,1},

	{'r',F,0,0},

	{'o',0,0,1},

	{'w',F,0,0},

	{'w',0,0,1},

	{'e',0,0,1},

	{'e',0,0,1},

	{'n',F,0,0},

	{'t',0,0,1},

	{'h',F,0,0},

	{'t',F,0,0},

	{'a',0,1,2},
	{'o',0,0,2},

	{'n',F,0,0},

	{'u',0,0,1},

	{'l',0,0,1},

	{'d',F,0,0},

	{'o',L|F,2,4},
	{'i',0,0,2},
	{'u',0,0,5},

	{'d',F,0,0},

	{'e',F,1,0},
	{'w',0,0,1},

	{'n',F,0,0},

	{'r',0,0,1},

	{'e',F,0,0},

	{'a',0,0,1},

	{'c',0,0,1},

	{'h',F,0,0},

	{'o',L,2,5},
	{'e',0,0,3},
	{'r',0,1,4},
	{'u',0,0,5},

	{'w',F,0,0},

	{'r',F,0,0},

	{'o',0,0,1},

	{'m',F,0,0},

	{'r',0,0,1},

	{'t',0,0,1},

	{'h',0,0,1},

	{'e',0,0,1},

	{'r',F,0,0},

	{'e',L|F,2,7},
	{'a',F,0,3},
	{'i',F,1,11},
	{'o',0,0,15},

	{'d',F,1,0},
	{'v',0,0,1},

	{'e',F,0,0},

	{'r',F,0,1},

	{'e',F,1,0},
	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'f',F,0,0},

	{'m',F,0,1},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'f',F,0,0},

	{'w',F,0,0},

	{'n',L|F,2,4},
	{'f',F,0,0},
	{'s',F,1,0},
	{'t',F,0,3},

	{'t',0,0,1},

	{'o',F,0,0},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'f',F,0,0},

	{'o',L,3,6},
	{'a',0,1,4},
	{'e',F,0,0},
	{'u',0,1,7},
	{'y',F,0,8},

	{'y',F,0,0},

	{'r',0,1,2},
	{'s',0,0,2},

	{'e',F,0,0},

	{'t',F,0,0},

	{'s',0,0,1},

	{'t',F,0,0},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'f',F,0,0},

	{'o',F,0,1},

	{'r',F,1,0},
	{'t',F,0,0},

	{'t',L,4,11},
	{'n',L|F,2,7},
	{'f',F,0,5},
	{'r',F,0,0},
	{'v',L,2,16},
	{'u',0,0,9},
	{'w',0,0,16},

	{'f',F,0,0},

	{'c',F,1,0},
	{'l',0,0,1},

	{'i',F,0,0},

	{'h',0,0,1},

	{'e',0,0,1},

	{'r',F,0,0},

	{'r',F,1,2},
	{'t',F,0,0},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'v',F,0,0},

	{'e',0,0,1},

	{'r',F,0,0},

	{'n',F,0,0},

	{'h',L,2,6},
	{'a',0,0,3},
	{'o',F,1,12},
	{'u',0,0,13},

	{'m',0,0,1},

	{'e',F,0,0},

	{'e',L|F,2,0},
	{'a',0,0,2},
	{'o',0,0,3},

	{'l',0,0,1},

	{'l',F,0,0},

	{'u',0,0,1},

	{'l',0,0,1},

	{'d',F,0,0},

	{'m',0,0,1},

	{'e',F,0,0},

	{'c',0,0,1},

	{'h',F,0,0},

	{'h',0,1,2},
	{'o',F,0,27},

	{'i',L|F,3,0},
	{'a',0,1,4},
	{'e',F,0,5},
	{'o',0,1,17},
	{'r',0,0,18},

	{'n',F,1,0},
	{'t',F,0,0},

	{'n',L|F,3,0},
	{'i',0,1,5},
	{'m',F,0,5},
	{'s',L,2,9},
	{'r',0,0,7},
	{'y',F,0,0},

	{'r',F,0,0},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'v',F,0,0},

	{'e',F,0,0},

	{'e',F,0,0},

	{'s',0,0,1},

	{'e',F,0,0},

	{'o',0,0,1},

	{'u',0,0,1},

	{'g',0,0,1},

	{'h',F,0,0},

	{'o',F,0,0},

	{'n',0,1,2},
	{'p',F,0,0},

	{'d',0,1,2},
	{'t',0,0,3},

	{'e',0,0,1},

	{'r',F,0,0},

	{'i',0,0,1},

	{'l',F,0,0},

	{'e',0,0,1},

	{'r',0,0,1},

	{'i',F,0,0},

	{'h',L,3,7},
	{'a',F,1,0},
	{'e',F,0,3},
	{'i',0,1,17},
	{'o',0,0,20},

	{'r',0,0,1},

	{'e',F,0,0},

	{'e',L,2,5},
	{'a',0,0,3},
	{'i',F,1,6},
	{'o',F,0,9},

	{'t',F,0,0},

	{'n',F,1,0},
	{'r',0,0,1},

	{'e',F,0,0},

	{'c',0,1,2},
	{'l',0,0,2},

	{'h',F,0,0},

	{'e',F,0,0},

	{'m',F,0,0},

	{'l',0,1,2},
	{'t',0,0,2},

	{'l',F,0,0},

	{'h',F,0,0},

	{'u',0,0,1},

	{'l',0,0,1},

	{'d',F,0,0},

	{'o',0,0,1},

	{'u',F,0,1},

	{'r',F,0,1},

	{'s',0,0,1},

	{'e',0,0,1},

	{'l',0,0,1},

	{'f',F,1,0},
	{'v',F,0,0}
};

static unsigned int
find_english_stopword( unsigned char *buf, int len ) {
	ESWNODE    *ptr = engstoptree;
	int     result = 0;
	unsigned char *cur = buf;

	while( cur - buf < len ) {
		if ( ptr->val == *cur ) {
			cur++;
			if ( ISFINISH(ptr) ) result = cur - buf;
			if ( ! ptr->child ) break;
			ptr += ptr->child;
		} else if ( ptr->val > *cur ) {
			if ( ISLEFT(ptr) )
				ptr++;
			else
				break;
		} else {
			if ( ptr->right )
				ptr += ptr->right;
			else
				break;
		}
	}
	return result;
}

#undef L
#undef F
#undef ISLEFT
#undef ISFINISH

static int
is_stopengword(void* obj,char* word,int len) {
	return ( len == find_english_stopword((unsigned char*)word, len) ) ? 1 : 0;
}

static void * setup_english_stemmer()
{
    struct english_stemmer * z = (struct english_stemmer *) malloc(sizeof(struct english_stemmer));
    z->p = 0; z->p_size = 0;
    z->irregulars = create_pool(irregular_forms);
    return (void *) z;
}

static void closedown_english_stemmer(void * z_)
{
    struct english_stemmer * z = (struct english_stemmer *) z_;
    free_pool(z->irregulars);
    free(z->p);
    free(z);
}

static char*
engstemming(void* obj, char *word, int *len) {
	struct english_stemmer * z = (struct english_stemmer *) obj;
	const char* stemmed_word;
	char *result = word;

	while(result-word < *len) {
		*result = tolower((unsigned char) *result);
		result++;
	}
	stemmed_word = english_stem(obj, word, 0, *len-1);
	*len = z->k + 1;

	result = (char*)palloc( *len );
	memcpy((void*)result, (void*)stemmed_word, *len);
	return result;
}
#endif /* DICT_BODY */

#ifdef DICT_TABLE
TABLE_DICT_START
	"C",
	setup_english_stemmer,
	closedown_english_stemmer,
	engstemming,
	NULL,
	is_stopengword
TABLE_DICT_END
#endif