From 031cc55bbea6b3a6b67c700498a78fb1d4399476 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 1 Jul 2013 11:00:14 +0300 Subject: [PATCH] Optimize pglz compressor for small inputs. The pglz compressor has a significant startup cost, because it has to initialize to zeros the history-tracking hash table. On a 64-bit system, the hash table was 64kB in size. While clearing memory is pretty fast, for very short inputs the relative cost of that was quite large. This patch alleviates that in two ways. First, instead of storing pointers in the hash table, store 16-bit indexes into the hist_entries array. That slashes the size of the hash table to 1/2 or 1/4 of the original, depending on the pointer width. Secondly, adjust the size of the hash table based on input size. For very small inputs, you don't need a large hash table to avoid collisions. Review by Amit Kapila. --- src/backend/utils/adt/pg_lzcompress.c | 95 +++++++++++++++++++-------- 1 file changed, 66 insertions(+), 29 deletions(-) diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c index 66c64c198f..ae6751929e 100644 --- a/src/backend/utils/adt/pg_lzcompress.c +++ b/src/backend/utils/adt/pg_lzcompress.c @@ -112,7 +112,7 @@ * of identical bytes like trailing spaces) and for bigger ones * our 4K maximum look-back distance is too small. * - * The compressor creates a table for 8192 lists of positions. + * The compressor creates a table for lists of positions. * For each input position (except the last 3), a hash key is * built from the 4 next input bytes and the position remembered * in the appropriate list. Thus, the table points to linked @@ -120,9 +120,12 @@ * matching strings. This is done on the fly while the input * is compressed into the output area. Table entries are only * kept for the last 4096 input positions, since we cannot use - * back-pointers larger than that anyway. + * back-pointers larger than that anyway. The size of the hash + * table is chosen based on the size of the input - a larger table + * has a larger startup cost, as it needs to be initialized to + * zero, but reduces the number of hash collisions on long inputs. * - * For each byte in the input, it's hash key (built from this + * For each byte in the input, its hash key (built from this * byte and the next 3) is used to find the appropriate list * in the table. The lists remember the positions of all bytes * that had the same hash key in the past in increasing backward @@ -180,8 +183,7 @@ * Local definitions * ---------- */ -#define PGLZ_HISTORY_LISTS 8192 /* must be power of 2 */ -#define PGLZ_HISTORY_MASK (PGLZ_HISTORY_LISTS - 1) +#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ #define PGLZ_HISTORY_SIZE 4096 #define PGLZ_MAX_MATCH 273 @@ -241,9 +243,15 @@ const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; * Statically allocated work arrays for history * ---------- */ -static PGLZ_HistEntry *hist_start[PGLZ_HISTORY_LISTS]; -static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE]; +static int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; +static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; +/* + * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, + * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. + */ +#define INVALID_ENTRY 0 +#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY]) /* ---------- * pglz_hist_idx - @@ -257,10 +265,10 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE]; * hash keys more. * ---------- */ -#define pglz_hist_idx(_s,_e) ( \ +#define pglz_hist_idx(_s,_e, _mask) ( \ ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ - (((_s)[0] << 9) ^ ((_s)[1] << 6) ^ \ - ((_s)[2] << 3) ^ (_s)[3])) & (PGLZ_HISTORY_MASK) \ + (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \ + ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \ ) @@ -276,28 +284,35 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE]; * _hn and _recycle are modified in the macro. * ---------- */ -#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e) \ +#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \ do { \ - int __hindex = pglz_hist_idx((_s),(_e)); \ - PGLZ_HistEntry **__myhsp = &(_hs)[__hindex]; \ + int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \ + int16 *__myhsp = &(_hs)[__hindex]; \ PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ if (_recycle) { \ if (__myhe->prev == NULL) \ - (_hs)[__myhe->hindex] = __myhe->next; \ + (_hs)[__myhe->hindex] = __myhe->next - (_he); \ else \ __myhe->prev->next = __myhe->next; \ if (__myhe->next != NULL) \ __myhe->next->prev = __myhe->prev; \ } \ - __myhe->next = *__myhsp; \ + __myhe->next = &(_he)[*__myhsp]; \ __myhe->prev = NULL; \ __myhe->hindex = __hindex; \ __myhe->pos = (_s); \ - if (*__myhsp != NULL) \ - (*__myhsp)->prev = __myhe; \ - *__myhsp = __myhe; \ - if (++(_hn) >= PGLZ_HISTORY_SIZE) { \ - (_hn) = 0; \ + /* If there was an existing entry in this hash slot, link */ \ + /* this new entry to it. However, the 0th entry in the */ \ + /* entries table is unused, so we can freely scribble on it. */ \ + /* So don't bother checking if the slot was used - we'll */ \ + /* scribble on the unused entry if it was not, but that's */ \ + /* harmless. Avoiding the branch in this critical path */ \ + /* speeds this up a little bit. */ \ + /* if (*__myhsp != INVALID_ENTRY) */ \ + (_he)[(*__myhsp)].prev = __myhe; \ + *__myhsp = _hn; \ + if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \ + (_hn) = 1; \ (_recycle) = true; \ } \ } while (0) @@ -372,18 +387,20 @@ do { \ * ---------- */ static inline int -pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end, - int *lenp, int *offp, int good_match, int good_drop) +pglz_find_match(int16 *hstart, const char *input, const char *end, + int *lenp, int *offp, int good_match, int good_drop, int mask) { PGLZ_HistEntry *hent; + int16 hentno; int32 len = 0; int32 off = 0; /* * Traverse the linked history list until a good enough match is found. */ - hent = hstart[pglz_hist_idx(input, end)]; - while (hent) + hentno = hstart[pglz_hist_idx(input, end, mask)]; + hent = &hist_entries[hentno]; + while (hent != INVALID_ENTRY_PTR) { const char *ip = input; const char *hp = hent->pos; @@ -484,7 +501,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, { unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bstart = bp; - int hist_next = 0; + int hist_next = 1; bool hist_recycle = false; const char *dp = source; const char *dend = source + slen; @@ -500,6 +517,8 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, int32 result_size; int32 result_max; int32 need_rate; + int hashsz; + int mask; /* * Our fallback strategy is the default. @@ -555,11 +574,29 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, else result_max = (slen * (100 - need_rate)) / 100; + /* + * Experiments suggest that these hash sizes work pretty well. A large + * hash table minimizes collision, but has a higher startup cost. For + * a small input, the startup cost dominates. The table size must be + * a power of two. + */ + if (slen < 128) + hashsz = 512; + else if (slen < 256) + hashsz = 1024; + else if (slen < 512) + hashsz = 2048; + else if (slen < 1024) + hashsz = 4096; + else + hashsz = 8192; + mask = hashsz - 1; + /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ - memset(hist_start, 0, sizeof(hist_start)); + memset(hist_start, 0, hashsz * sizeof(int16)); /* * Compress the source directly into the output buffer. @@ -589,7 +626,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, - &match_off, good_match, good_drop)) + &match_off, good_match, good_drop, mask)) { /* * Create the tag and add history entries for all matched @@ -600,7 +637,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, - dp, dend); + dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } @@ -614,7 +651,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, - dp, dend); + dp, dend, mask); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ }