Add support to dynahash.c for partitioning shared hashtables according

to the low-order bits of the entry hash value.  Also make some incidental
cleanups in the dynahash API, such as not exporting the hash header
structs to the world.
This commit is contained in:
Tom Lane 2006-07-22 23:04:39 +00:00
parent c0e9b3139f
commit 51ee9fa157
4 changed files with 387 additions and 180 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/shmem.c,v 1.93 2006/07/14 14:52:22 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/ipc/shmem.c,v 1.94 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -211,9 +211,6 @@ InitShmemIndex(void)
{
HASHCTL info;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
@ -227,32 +224,11 @@ InitShmemIndex(void)
info.entrysize = sizeof(ShmemIndexEnt);
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "could not initialize Shmem Index");
/*
* Now, create an entry in the hashtable for the index itself.
*/
if (!IsUnderPostmaster)
{
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (void *) &item, HASH_ENTER, &found);
Assert(!found);
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
}
/* now release the lock acquired in ShmemInitStruct */
LWLockRelease(ShmemIndexLock);
}
/*
@ -295,7 +271,7 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
/* look it up in the shmem index */
location = ShmemInitStruct(name,
sizeof(HASHHDR) + infoP->dsize * sizeof(HASHSEGMENT),
hash_get_shared_size(infoP, hash_flags),
&found);
/*
@ -312,9 +288,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
if (found)
hash_flags |= HASH_ATTACH;
/* Now provide the header and directory pointers */
/* Pass location of hashtable header to hash_create */
infoP->hctl = (HASHHDR *) location;
infoP->dir = (HASHSEGMENT *) (((char *) location) + sizeof(HASHHDR));
return hash_create(name, init_size, infoP, hash_flags);
}
@ -363,14 +338,16 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr)
* If the shmem index doesn't exist, we are bootstrapping: we must
* be trying to init the shmem index itself.
*
* Notice that the ShmemIndexLock is held until the shmem index
* has been completely initialized.
* Notice that the ShmemIndexLock is released before the shmem
* index has been initialized. This should be OK because no
* other process can be accessing shared memory yet.
*/
Assert(shmemseghdr->indexoffset == 0);
structPtr = ShmemAlloc(size);
shmemseghdr->indexoffset = MAKE_OFFSET(structPtr);
*foundPtr = FALSE;
}
LWLockRelease(ShmemIndexLock);
return structPtr;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.166 2006/07/14 14:52:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.167 2006/07/22 23:04:39 tgl Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
@ -1958,7 +1958,7 @@ GetLockStatusData(void)
{
LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
proclockTable = LockMethodProcLockHash[i];
els += proclockTable->hctl->nentries;
els += hash_get_num_entries(proclockTable);
}
data->nelements = els;

View File

@ -3,17 +3,36 @@
* dynahash.c
* dynamic hash tables
*
* dynahash.c supports both local-to-a-backend hash tables and hash tables in
* shared memory. For shared hash tables, it is the caller's responsibility
* to provide appropriate access interlocking. The simplest convention is
* that a single LWLock protects the whole hash table. Searches (HASH_FIND or
* hash_seq_search) need only shared lock, but any update requires exclusive
* lock. For heavily-used shared tables, the single-lock approach creates a
* concurrency bottleneck, so we also support "partitioned" locking wherein
* there are multiple LWLocks guarding distinct subsets of the table. To use
* a hash table in partitioned mode, the HASH_PARTITION flag must be given
* to hash_create. This prevents any attempt to split buckets on-the-fly.
* Therefore, each hash bucket chain operates independently, and no fields
* of the hash header change after init except nentries and freeList.
* A partitioned table uses a spinlock to guard changes of those two fields.
* This lets any subset of the hash buckets be treated as a separately
* lockable partition. We expect callers to use the low-order bits of a
* lookup key's hash value as a partition number --- this will work because
* of the way calc_bucket() maps hash values to bucket numbers.
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.69 2006/07/14 14:52:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.70 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Original comments:
*
* Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
* Coded into C, with minor code improvements, and with hsearch(3) interface,
@ -45,9 +64,107 @@
#include "postgres.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
/*
* Constants
*
* A hash table has a top-level "directory", each of whose entries points
* to a "segment" of ssize bucket headers. The maximum number of hash
* buckets is thus dsize * ssize (but dsize may be expansible). Of course,
* the number of records in the table can be larger, but we don't want a
* whole lot of records per bucket or performance goes down.
*
* In a hash table allocated in shared memory, the directory cannot be
* expanded because it must stay at a fixed address. The directory size
* should be selected using hash_select_dirsize (and you'd better have
* a good idea of the maximum number of entries!). For non-shared hash
* tables, the initial directory size can be left at the default.
*/
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1 /* default fill factor */
/* A hash bucket is a linked list of HASHELEMENTs */
typedef HASHELEMENT *HASHBUCKET;
/* A hash segment is an array of bucket headers */
typedef HASHBUCKET *HASHSEGMENT;
/*
* Header structure for a hash table --- contains all changeable info
*
* In a shared-memory hash table, the HASHHDR is in shared memory, while
* each backend has a local HTAB struct. For a non-shared table, there isn't
* any functional difference between HASHHDR and HTAB, but we separate them
* anyway to share code between shared and non-shared tables.
*/
struct HASHHDR
{
/* In a partitioned table, take this lock to touch nentries or freeList */
slock_t mutex; /* unused if not partitioned table */
/* These fields change during entry addition/deletion */
long nentries; /* number of entries in hash table */
HASHELEMENT *freeList; /* linked list of free elements */
/* These fields can change, but not in a partitioned table */
/* Also, dsize can't change in a shared table, even if unpartitioned */
long dsize; /* directory size */
long nsegs; /* number of allocated segments (<= dsize) */
uint32 max_bucket; /* ID of maximum bucket in use */
uint32 high_mask; /* mask to modulo into entire table */
uint32 low_mask; /* mask to modulo into lower half of table */
/* These fields are fixed at hashtable creation */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
long num_partitions; /* # partitions (must be power of 2), or 0 */
long ffactor; /* target fill factor */
long max_dsize; /* 'dsize' limit if directory is fixed size */
long ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
int nelem_alloc; /* number of entries to allocate at once */
#ifdef HASH_STATISTICS
/*
* Count statistics here. NB: stats code doesn't bother with mutex,
* so counts could be corrupted a bit in a partitioned table.
*/
long accesses;
long collisions;
#endif
};
#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
/*
* Top control structure for a hashtable --- in a shared table, each backend
* has its own copy (OK since no fields change at runtime)
*/
struct HTAB
{
HASHHDR *hctl; /* => shared control information */
HASHSEGMENT *dir; /* directory of segment starts */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
/* We keep local copies of these fixed values to reduce contention */
Size keysize; /* hash key length in bytes */
long ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
};
/*
* Key (also entry) part of a HASHELEMENT
*/
@ -58,6 +175,12 @@
*/
#define MOD(x,y) ((x) & ((y)-1))
#if HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
#endif
/*
* Private function prototypes
*/
@ -66,6 +189,7 @@ static HASHSEGMENT seg_alloc(HTAB *hashp);
static bool element_alloc(HTAB *hashp, int nelem);
static bool dir_realloc(HTAB *hashp);
static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
static bool init_htab(HTAB *hashp, long nelem);
@ -85,13 +209,6 @@ DynaHashAlloc(Size size)
}
#if HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
#endif
/************************** CREATE ROUTINES **********************/
/*
@ -185,17 +302,26 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
if (flags & HASH_SHARED_MEM)
{
/*
* ctl structure is preallocated for shared memory tables. Note that
* HASH_DIRSIZE and HASH_ALLOC had better be set as well.
* ctl structure and directory are preallocated for shared memory
* tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set
* as well.
*/
hashp->hctl = info->hctl;
hashp->dir = info->dir;
hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
hashp->hcxt = NULL;
hashp->isshared = true;
/* hash table already exists, we're just attaching to it */
if (flags & HASH_ATTACH)
{
/* make local copies of some heavily-used values */
hctl = hashp->hctl;
hashp->keysize = hctl->keysize;
hashp->ssize = hctl->ssize;
hashp->sshift = hctl->sshift;
return hashp;
}
}
else
{
@ -218,9 +344,16 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hdefault(hashp);
hctl = hashp->hctl;
#ifdef HASH_STATISTICS
hctl->accesses = hctl->collisions = 0;
#endif
if (flags & HASH_PARTITION)
{
/* Doesn't make sense to partition a local hash table */
Assert(flags & HASH_SHARED_MEM);
/* # of partitions had better be a power of 2 */
Assert(info->num_partitions == (1L << my_log2(info->num_partitions)));
hctl->num_partitions = info->num_partitions;
}
if (flags & HASH_SEGMENT)
{
@ -252,6 +385,11 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hctl->entrysize = info->entrysize;
}
/* make local copies of heavily-used constant fields */
hashp->keysize = hctl->keysize;
hashp->ssize = hctl->ssize;
hashp->sshift = hctl->sshift;
/* Build the hash directory structure */
if (!init_htab(hashp, nelem))
{
@ -292,22 +430,29 @@ hdefault(HTAB *hashp)
MemSet(hctl, 0, sizeof(HASHHDR));
hctl->ssize = DEF_SEGSIZE;
hctl->sshift = DEF_SEGSIZE_SHIFT;
hctl->dsize = DEF_DIRSIZE;
hctl->ffactor = DEF_FFACTOR;
hctl->nentries = 0;
hctl->freeList = NULL;
hctl->dsize = DEF_DIRSIZE;
hctl->nsegs = 0;
/* rather pointless defaults for key & entry size */
hctl->keysize = sizeof(char *);
hctl->entrysize = 2 * sizeof(char *);
hctl->num_partitions = 0; /* not partitioned */
hctl->ffactor = DEF_FFACTOR;
/* table has no fixed maximum size */
hctl->max_dsize = NO_MAX_DSIZE;
/* garbage collection for HASH_REMOVE */
hctl->freeList = NULL;
hctl->ssize = DEF_SEGSIZE;
hctl->sshift = DEF_SEGSIZE_SHIFT;
#ifdef HASH_STATISTICS
hctl->accesses = hctl->collisions = 0;
#endif
}
/*
@ -342,6 +487,10 @@ choose_nelem_alloc(Size entrysize)
return nelem_alloc;
}
/*
* Compute derived fields of hctl and build the initial directory/segment
* arrays
*/
static bool
init_htab(HTAB *hashp, long nelem)
{
@ -351,6 +500,12 @@ init_htab(HTAB *hashp, long nelem)
int nbuckets;
int nsegs;
/*
* initialize mutex if it's a partitioned table
*/
if (IS_PARTITIONED(hctl))
SpinLockInit(&hctl->mutex);
/*
* Divide number of elements by the fill factor to determine a desired
* number of buckets. Allocate space for the next greater power of two
@ -360,6 +515,15 @@ init_htab(HTAB *hashp, long nelem)
nbuckets = 1 << my_log2(lnbuckets);
/*
* In a partitioned table, nbuckets must be at least equal to
* num_partitions; were it less, keys with apparently different partition
* numbers would map to the same bucket, breaking partition independence.
* (Normally nbuckets will be much bigger; this is just a safety check.)
*/
while (nbuckets < hctl->num_partitions)
nbuckets <<= 1;
hctl->max_bucket = hctl->low_mask = nbuckets - 1;
hctl->high_mask = (nbuckets << 1) - 1;
@ -491,6 +655,19 @@ hash_select_dirsize(long num_entries)
return nDirEntries;
}
/*
* Compute the required initial memory allocation for a shared-memory
* hashtable with the given parameters. We need space for the HASHHDR
* and for the (non expansible) directory.
*/
Size
hash_get_shared_size(HASHCTL *info, int flags)
{
Assert(flags & HASH_DIRSIZE);
Assert(info->dsize == info->max_dsize);
return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
}
/********************** DESTROY ROUTINES ************************/
@ -521,7 +698,7 @@ hash_stats(const char *where, HTAB *hashp)
where, hashp->hctl->accesses, hashp->hctl->collisions);
fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
hashp->hctl->nentries, hashp->hctl->keysize,
hashp->hctl->nentries, (long) hashp->hctl->keysize,
hashp->hctl->max_bucket, hashp->hctl->nsegs);
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
where, hash_accesses, hash_collisions);
@ -533,6 +710,19 @@ hash_stats(const char *where, HTAB *hashp)
/*******************************SEARCH ROUTINES *****************************/
/*
* get_hash_value -- exported routine to calculate a key's hash value
*
* We export this because for partitioned tables, callers need to compute
* the partition number (from the low-order bits of the hash value) before
* searching.
*/
uint32
get_hash_value(HTAB *hashp, const void *keyPtr)
{
return hashp->hash(keyPtr, hashp->keysize);
}
/* Convert a hash value to a bucket number */
static inline uint32
calc_bucket(HASHHDR *hctl, uint32 hash_val)
@ -546,8 +736,9 @@ calc_bucket(HASHHDR *hctl, uint32 hash_val)
return bucket;
}
/*----------
/*
* hash_search -- look up key in table and perform action
* hash_search_with_hash_value -- same, with key's hash value already computed
*
* action is one of:
* HASH_FIND: look up key in table
@ -568,17 +759,32 @@ calc_bucket(HASHHDR *hctl, uint32 hash_val)
* If foundPtr isn't NULL, then *foundPtr is set TRUE if we found an
* existing entry in the table, FALSE otherwise. This is needed in the
* HASH_ENTER case, but is redundant with the return value otherwise.
*----------
*
* For hash_search_with_hash_value, the hashvalue parameter must have been
* calculated with get_hash_value().
*/
void *
hash_search(HTAB *hashp,
const void *keyPtr,
HASHACTION action,
bool *foundPtr)
{
return hash_search_with_hash_value(hashp,
keyPtr,
hashp->hash(keyPtr, hashp->keysize),
action,
foundPtr);
}
void *
hash_search_with_hash_value(HTAB *hashp,
const void *keyPtr,
uint32 hashvalue,
HASHACTION action,
bool *foundPtr)
{
HASHHDR *hctl = hashp->hctl;
Size keysize = hctl->keysize;
uint32 hashvalue;
Size keysize;
uint32 bucket;
long segment_num;
long segment_ndx;
@ -595,11 +801,10 @@ hash_search(HTAB *hashp,
/*
* Do the initial lookup
*/
hashvalue = hashp->hash(keyPtr, keysize);
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >> hctl->sshift;
segment_ndx = MOD(bucket, hctl->ssize);
segment_num = bucket >> hashp->sshift;
segment_ndx = MOD(bucket, hashp->ssize);
segp = hashp->dir[segment_num];
@ -613,6 +818,7 @@ hash_search(HTAB *hashp,
* Follow collision chain looking for matching key
*/
match = hashp->match; /* save one fetch in inner loop */
keysize = hashp->keysize; /* ditto */
while (currBucket != NULL)
{
@ -643,15 +849,25 @@ hash_search(HTAB *hashp,
case HASH_REMOVE:
if (currBucket != NULL)
{
Assert(hctl->nentries > 0);
hctl->nentries--;
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hctl;
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
Assert(hctlv->nentries > 0);
hctlv->nentries--;
/* remove record from hash bucket's chain. */
*prevBucketPtr = currBucket->link;
/* add the record to the freelist for this table. */
currBucket->link = hctl->freeList;
hctl->freeList = currBucket;
currBucket->link = hctlv->freeList;
hctlv->freeList = currBucket;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
/*
* better hope the caller is synchronizing access to this
@ -672,32 +888,23 @@ hash_search(HTAB *hashp,
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
/* get the next free element */
currBucket = hctl->freeList;
currBucket = get_hash_entry(hashp);
if (currBucket == NULL)
{
/* no free elements. allocate another chunk of buckets */
if (!element_alloc(hashp, hctl->nelem_alloc))
{
/* out of memory */
if (action == HASH_ENTER_NULL)
return NULL;
/* report a generic message */
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
currBucket = hctl->freeList;
Assert(currBucket != NULL);
/* out of memory */
if (action == HASH_ENTER_NULL)
return NULL;
/* report a generic message */
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
hctl->freeList = currBucket->link;
/* link into hashbucket chain */
*prevBucketPtr = currBucket;
currBucket->link = NULL;
@ -708,8 +915,10 @@ hash_search(HTAB *hashp,
/* caller is expected to fill the data field on return */
/* Check if it is time to split the segment */
if (++hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
/* Check if it is time to split a bucket */
/* Can't split if running in partitioned mode */
if (!IS_PARTITIONED(hctl) &&
hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
{
/*
* NOTE: failure to expand table is not a fatal error, it just
@ -726,6 +935,61 @@ hash_search(HTAB *hashp,
return NULL; /* keep compiler quiet */
}
/*
* create a new entry if possible
*/
static HASHBUCKET
get_hash_entry(HTAB *hashp)
{
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hashp->hctl;
HASHBUCKET newElement;
for (;;)
{
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
/* try to get an entry from the freelist */
newElement = hctlv->freeList;
if (newElement != NULL)
break;
/* no free elements. allocate another chunk of buckets */
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
if (!element_alloc(hashp, hctlv->nelem_alloc))
{
/* out of memory */
return NULL;
}
}
/* remove entry from freelist, bump nentries */
hctlv->freeList = newElement->link;
hctlv->nentries++;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
return newElement;
}
/*
* hash_get_num_entries -- get the number of entries in a hashtable
*/
long
hash_get_num_entries(HTAB *hashp)
{
/*
* We currently don't bother with the mutex; it's only sensible to call
* this function if you've got lock on all partitions of the table.
*/
return hashp->hctl->nentries;
}
/*
* hash_seq_init/_search
* Sequentially search through hash table and return
@ -736,6 +1000,9 @@ hash_search(HTAB *hashp,
* UNDEFINED (it might be the one that curIndex is pointing at!). Also,
* if elements are added to the table while the scan is in progress, it is
* unspecified whether they will be visited by the scan or not.
*
* NOTE: to use this with a partitioned hashtable, caller had better hold
* at least shared lock on all partitions of the table throughout the scan!
*/
void
hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
@ -773,7 +1040,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
curBucket = status->curBucket;
hashp = status->hashp;
hctl = hashp->hctl;
ssize = hctl->ssize;
ssize = hashp->ssize;
max_bucket = hctl->max_bucket;
if (curBucket > max_bucket)
@ -782,7 +1049,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
/*
* first find the right segment in the table directory.
*/
segment_num = curBucket >> hctl->sshift;
segment_num = curBucket >> hashp->sshift;
segment_ndx = MOD(curBucket, ssize);
segp = hashp->dir[segment_num];
@ -840,13 +1107,15 @@ expand_table(HTAB *hashp)
HASHBUCKET currElement,
nextElement;
Assert(!IS_PARTITIONED(hctl));
#ifdef HASH_STATISTICS
hash_expansions++;
#endif
new_bucket = hctl->max_bucket + 1;
new_segnum = new_bucket >> hctl->sshift;
new_segndx = MOD(new_bucket, hctl->ssize);
new_segnum = new_bucket >> hashp->sshift;
new_segndx = MOD(new_bucket, hashp->ssize);
if (new_segnum >= hctl->nsegs)
{
@ -885,8 +1154,8 @@ expand_table(HTAB *hashp)
* split at this point. With a different way of reducing the hash value,
* that might not be true!
*/
old_segnum = old_bucket >> hctl->sshift;
old_segndx = MOD(old_bucket, hctl->ssize);
old_segnum = old_bucket >> hashp->sshift;
old_segndx = MOD(old_bucket, hashp->ssize);
old_seg = hashp->dir[old_segnum];
new_seg = hashp->dir[new_segnum];
@ -963,12 +1232,12 @@ seg_alloc(HTAB *hashp)
HASHSEGMENT segp;
CurrentDynaHashCxt = hashp->hcxt;
segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->hctl->ssize);
segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
if (!segp)
return NULL;
MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->hctl->ssize);
MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
return segp;
}
@ -979,29 +1248,44 @@ seg_alloc(HTAB *hashp)
static bool
element_alloc(HTAB *hashp, int nelem)
{
HASHHDR *hctl = hashp->hctl;
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hashp->hctl;
Size elementSize;
HASHELEMENT *firstElement;
HASHELEMENT *tmpElement;
HASHELEMENT *prevElement;
int i;
/* Each element has a HASHELEMENT header plus user data. */
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctlv->entrysize);
CurrentDynaHashCxt = hashp->hcxt;
tmpElement = (HASHELEMENT *)
hashp->alloc(nelem * elementSize);
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
if (!tmpElement)
if (!firstElement)
return false;
/* link all the new entries into the freelist */
/* prepare to link all the new entries into the freelist */
prevElement = NULL;
tmpElement = firstElement;
for (i = 0; i < nelem; i++)
{
tmpElement->link = hctl->freeList;
hctl->freeList = tmpElement;
tmpElement->link = prevElement;
prevElement = tmpElement;
tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
}
/* if partitioned, must lock to touch freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
/* freelist could be nonempty if two backends did this concurrently */
firstElement->link = hctlv->freeList;
hctlv->freeList = prevElement;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
return true;
}

View File

@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* hsearch.h
* for hash tables, particularly hash tables in shared memory
* exported definitions for utils/hash/dynahash.c; see notes therein
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.43 2006/06/25 18:29:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.44 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -43,27 +43,6 @@ typedef void *(*HashCopyFunc) (void *dest, const void *src, Size keysize);
*/
typedef void *(*HashAllocFunc) (Size request);
/*
* Constants
*
* A hash table has a top-level "directory", each of whose entries points
* to a "segment" of ssize bucket headers. The maximum number of hash
* buckets is thus dsize * ssize (but dsize may be expansible). Of course,
* the number of records in the table can be larger, but we don't want a
* whole lot of records per bucket or performance goes down.
*
* In a hash table allocated in shared memory, the directory cannot be
* expanded because it must stay at a fixed address. The directory size
* should be selected using hash_select_dirsize (and you'd better have
* a good idea of the maximum number of entries!). For non-shared hash
* tables, the initial directory size can be left at the default.
*/
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1 /* default fill factor */
/*
* HASHELEMENT is the private part of a hashtable entry. The caller's data
* follows the HASHELEMENT structure (on a MAXALIGN'd boundary). The hash key
@ -75,81 +54,42 @@ typedef struct HASHELEMENT
uint32 hashvalue; /* hash function result for this entry */
} HASHELEMENT;
/* A hash bucket is a linked list of HASHELEMENTs */
typedef HASHELEMENT *HASHBUCKET;
/* Hash table header struct is an opaque type known only within dynahash.c */
typedef struct HASHHDR HASHHDR;
/* A hash segment is an array of bucket headers */
typedef HASHBUCKET *HASHSEGMENT;
/* Header structure for a hash table --- contains all changeable info */
typedef struct HASHHDR
{
long dsize; /* Directory Size */
long ssize; /* Segment Size --- must be power of 2 */
int sshift; /* Segment shift = log2(ssize) */
uint32 max_bucket; /* ID of Maximum bucket in use */
uint32 high_mask; /* Mask to modulo into entire table */
uint32 low_mask; /* Mask to modulo into lower half of table */
long ffactor; /* Fill factor */
long nentries; /* Number of entries in hash table */
long nsegs; /* Number of allocated segments */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
long max_dsize; /* 'dsize' limit if directory is fixed size */
int nelem_alloc; /* number of entries to allocate at once */
HASHELEMENT *freeList; /* linked list of free elements */
#ifdef HASH_STATISTICS
long accesses;
long collisions;
#endif
} HASHHDR;
/*
* Top control structure for a hashtable --- need not be shared, since
* no fields change at runtime
*/
typedef struct HTAB
{
HASHHDR *hctl; /* shared control information */
HASHSEGMENT *dir; /* directory of segment starts */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
} HTAB;
/* Hash table control struct is an opaque type known only within dynahash.c */
typedef struct HTAB HTAB;
/* Parameter data structure for hash_create */
/* Only those fields indicated by hash_flags need be set */
typedef struct HASHCTL
{
long ssize; /* Segment Size */
long dsize; /* (initial) Directory Size */
long max_dsize; /* limit to dsize if directory size is limited */
long ffactor; /* Fill factor */
long num_partitions; /* # partitions (must be power of 2) */
long ssize; /* segment size */
long dsize; /* (initial) directory size */
long max_dsize; /* limit to dsize if dir size is limited */
long ffactor; /* fill factor */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
HASHSEGMENT *dir; /* directory of segment starts */
HASHHDR *hctl; /* location of header in shared mem */
MemoryContext hcxt; /* memory context to use for allocations */
HASHHDR *hctl; /* location of header in shared mem */
} HASHCTL;
/* Flags to indicate which parameters are supplied */
#define HASH_PARTITION 0x001 /* Hashtable is used w/partitioned locking */
#define HASH_SEGMENT 0x002 /* Set segment size */
#define HASH_DIRSIZE 0x004 /* Set directory size */
#define HASH_DIRSIZE 0x004 /* Set directory size (initial and max) */
#define HASH_FFACTOR 0x008 /* Set fill factor */
#define HASH_FUNCTION 0x010 /* Set user defined hash function */
#define HASH_ELEM 0x020 /* Set key/entry size */
#define HASH_ELEM 0x020 /* Set keysize and entrysize */
#define HASH_SHARED_MEM 0x040 /* Hashtable is in shared memory */
#define HASH_ATTACH 0x080 /* Do not initialize hctl */
#define HASH_ALLOC 0x100 /* Set memory allocator */
#define HASH_CONTEXT 0x200 /* Set explicit memory context */
#define HASH_CONTEXT 0x200 /* Set memory allocation context */
#define HASH_COMPARE 0x400 /* Set user defined comparison function */
#define HASH_KEYCOPY 0x800 /* Set user defined key-copying function */
@ -183,10 +123,16 @@ extern void hash_destroy(HTAB *hashp);
extern void hash_stats(const char *where, HTAB *hashp);
extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr);
extern uint32 get_hash_value(HTAB *hashp, const void *keyPtr);
extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
uint32 hashvalue, HASHACTION action,
bool *foundPtr);
extern long hash_get_num_entries(HTAB *hashp);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern Size hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
extern Size hash_get_shared_size(HASHCTL *info, int flags);
/*
* prototypes for functions in hashfn.c