mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-02-11 19:20:40 +08:00
hash: Refactor hash index creation.
The primary goal here is to move all of the related page modifications to a single section of code, in preparation for adding write-ahead logging. In passing, rename _hash_metapinit to _hash_init, since it initializes more than just the metapage. Amit Kapila. The larger patch series of which this is a part has been reviewed and tested by Álvaro Herrera, Ashutosh Sharma, Mark Kirkwood, Jeff Janes, and Jesper Pedersen.
This commit is contained in:
parent
2b87dd8d7a
commit
38305398cd
@ -120,7 +120,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
|
||||
|
||||
/* Initialize the hash index metadata page and initial buckets */
|
||||
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
|
||||
num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
|
||||
|
||||
/*
|
||||
* If we just insert the tuples into the index in scan order, then
|
||||
@ -182,7 +182,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
void
|
||||
hashbuildempty(Relation index)
|
||||
{
|
||||
_hash_metapinit(index, 0, INIT_FORKNUM);
|
||||
_hash_init(index, 0, INIT_FORKNUM);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -570,68 +570,6 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash_initbitmap()
|
||||
*
|
||||
* Initialize a new bitmap page. The metapage has a write-lock upon
|
||||
* entering the function, and must be written by caller after return.
|
||||
*
|
||||
* 'blkno' is the block number of the new bitmap page.
|
||||
*
|
||||
* All bits in the new bitmap page are set to "1", indicating "in use".
|
||||
*/
|
||||
void
|
||||
_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
|
||||
ForkNumber forkNum)
|
||||
{
|
||||
Buffer buf;
|
||||
Page pg;
|
||||
HashPageOpaque op;
|
||||
uint32 *freep;
|
||||
|
||||
/*
|
||||
* It is okay to write-lock the new bitmap page while holding metapage
|
||||
* write lock, because no one else could be contending for the new page.
|
||||
* Also, the metapage lock makes it safe to extend the index using
|
||||
* _hash_getnewbuf.
|
||||
*
|
||||
* There is some loss of concurrency in possibly doing I/O for the new
|
||||
* page while holding the metapage lock, but this path is taken so seldom
|
||||
* that it's not worth worrying about.
|
||||
*/
|
||||
buf = _hash_getnewbuf(rel, blkno, forkNum);
|
||||
pg = BufferGetPage(buf);
|
||||
|
||||
/* initialize the page's special space */
|
||||
op = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
op->hasho_prevblkno = InvalidBlockNumber;
|
||||
op->hasho_nextblkno = InvalidBlockNumber;
|
||||
op->hasho_bucket = -1;
|
||||
op->hasho_flag = LH_BITMAP_PAGE;
|
||||
op->hasho_page_id = HASHO_PAGE_ID;
|
||||
|
||||
/* set all of the bits to 1 */
|
||||
freep = HashPageGetBitmap(pg);
|
||||
MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
|
||||
|
||||
/* dirty the new bitmap page, and release write lock and pin */
|
||||
MarkBufferDirty(buf);
|
||||
_hash_relbuf(rel, buf);
|
||||
|
||||
/* add the new bitmap page to the metapage's list of bitmaps */
|
||||
/* metapage already has a write lock */
|
||||
if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("out of overflow pages in hash index \"%s\"",
|
||||
RelationGetRelationName(rel))));
|
||||
|
||||
metap->hashm_mapp[metap->hashm_nmaps] = blkno;
|
||||
|
||||
metap->hashm_nmaps++;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash_initbitmapbuffer()
|
||||
*
|
||||
|
@ -156,6 +156,36 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_initbuf() -- Get and initialize a buffer by bucket number.
|
||||
*/
|
||||
void
|
||||
_hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag,
|
||||
bool initpage)
|
||||
{
|
||||
HashPageOpaque pageopaque;
|
||||
Page page;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
/* initialize the page */
|
||||
if (initpage)
|
||||
_hash_pageinit(page, BufferGetPageSize(buf));
|
||||
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Set hasho_prevblkno with current hashm_maxbucket. This value will
|
||||
* be used to validate cached HashMetaPageData. See
|
||||
* _hash_getbucketbuf_from_hashkey().
|
||||
*/
|
||||
pageopaque->hasho_prevblkno = max_bucket;
|
||||
pageopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_bucket = num_bucket;
|
||||
pageopaque->hasho_flag = flag;
|
||||
pageopaque->hasho_page_id = HASHO_PAGE_ID;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_getnewbuf() -- Get a new page at the end of the index.
|
||||
*
|
||||
@ -288,7 +318,7 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
|
||||
|
||||
|
||||
/*
|
||||
* _hash_metapinit() -- Initialize the metadata page of a hash index,
|
||||
* _hash_init() -- Initialize the metadata page of a hash index,
|
||||
* the initial buckets, and the initial bitmap page.
|
||||
*
|
||||
* The initial number of buckets is dependent on num_tuples, an estimate
|
||||
@ -300,19 +330,18 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
|
||||
* multiple buffer locks is ignored.
|
||||
*/
|
||||
uint32
|
||||
_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
_hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
HashPageOpaque pageopaque;
|
||||
Buffer metabuf;
|
||||
Buffer buf;
|
||||
Buffer bitmapbuf;
|
||||
Page pg;
|
||||
HashMetaPage metap;
|
||||
RegProcedure procid;
|
||||
int32 data_width;
|
||||
int32 item_width;
|
||||
int32 ffactor;
|
||||
double dnumbuckets;
|
||||
uint32 num_buckets;
|
||||
uint32 log2_num_buckets;
|
||||
uint32 i;
|
||||
|
||||
/* safety check */
|
||||
@ -334,6 +363,96 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
if (ffactor < 10)
|
||||
ffactor = 10;
|
||||
|
||||
procid = index_getprocid(rel, 1, HASHPROC);
|
||||
|
||||
/*
|
||||
* We initialize the metapage, the first N bucket pages, and the first
|
||||
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
|
||||
* calls to occur. This ensures that the smgr level has the right idea of
|
||||
* the physical index length.
|
||||
*
|
||||
* Critical section not required, because on error the creation of the
|
||||
* whole relation will be rolled back.
|
||||
*/
|
||||
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
|
||||
_hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
|
||||
MarkBufferDirty(metabuf);
|
||||
|
||||
pg = BufferGetPage(metabuf);
|
||||
metap = HashPageGetMeta(pg);
|
||||
|
||||
num_buckets = metap->hashm_maxbucket + 1;
|
||||
|
||||
/*
|
||||
* Release buffer lock on the metapage while we initialize buckets.
|
||||
* Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
|
||||
* won't accomplish anything. It's a bad idea to hold buffer locks for
|
||||
* long intervals in any case, since that can block the bgwriter.
|
||||
*/
|
||||
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* Initialize and WAL Log the first N buckets
|
||||
*/
|
||||
for (i = 0; i < num_buckets; i++)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
|
||||
/* Allow interrupts, in case N is huge */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
blkno = BUCKET_TO_BLKNO(metap, i);
|
||||
buf = _hash_getnewbuf(rel, blkno, forkNum);
|
||||
_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
|
||||
MarkBufferDirty(buf);
|
||||
_hash_relbuf(rel, buf);
|
||||
}
|
||||
|
||||
/* Now reacquire buffer lock on metapage */
|
||||
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Initialize bitmap page
|
||||
*/
|
||||
bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
|
||||
_hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
|
||||
MarkBufferDirty(bitmapbuf);
|
||||
|
||||
/* add the new bitmap page to the metapage's list of bitmaps */
|
||||
/* metapage already has a write lock */
|
||||
if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("out of overflow pages in hash index \"%s\"",
|
||||
RelationGetRelationName(rel))));
|
||||
|
||||
metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
|
||||
|
||||
metap->hashm_nmaps++;
|
||||
MarkBufferDirty(metabuf);
|
||||
|
||||
/* all done */
|
||||
_hash_relbuf(rel, bitmapbuf);
|
||||
_hash_relbuf(rel, metabuf);
|
||||
|
||||
return num_buckets;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_init_metabuffer() -- Initialize the metadata page of a hash index.
|
||||
*/
|
||||
void
|
||||
_hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
|
||||
uint16 ffactor, bool initpage)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
HashPageOpaque pageopaque;
|
||||
Page page;
|
||||
double dnumbuckets;
|
||||
uint32 num_buckets;
|
||||
uint32 log2_num_buckets;
|
||||
uint32 i;
|
||||
|
||||
/*
|
||||
* Choose the number of initial bucket pages to match the fill factor
|
||||
* given the estimated number of tuples. We round up the result to the
|
||||
@ -353,30 +472,25 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
|
||||
Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
|
||||
|
||||
/*
|
||||
* We initialize the metapage, the first N bucket pages, and the first
|
||||
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
|
||||
* calls to occur. This ensures that the smgr level has the right idea of
|
||||
* the physical index length.
|
||||
*/
|
||||
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
|
||||
pg = BufferGetPage(metabuf);
|
||||
page = BufferGetPage(buf);
|
||||
if (initpage)
|
||||
_hash_pageinit(page, BufferGetPageSize(buf));
|
||||
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
pageopaque->hasho_prevblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_bucket = -1;
|
||||
pageopaque->hasho_flag = LH_META_PAGE;
|
||||
pageopaque->hasho_page_id = HASHO_PAGE_ID;
|
||||
|
||||
metap = HashPageGetMeta(pg);
|
||||
metap = HashPageGetMeta(page);
|
||||
|
||||
metap->hashm_magic = HASH_MAGIC;
|
||||
metap->hashm_version = HASH_VERSION;
|
||||
metap->hashm_ntuples = 0;
|
||||
metap->hashm_nmaps = 0;
|
||||
metap->hashm_ffactor = ffactor;
|
||||
metap->hashm_bsize = HashGetMaxBitmapSize(pg);
|
||||
metap->hashm_bsize = HashGetMaxBitmapSize(page);
|
||||
/* find largest bitmap array size that will fit in page size */
|
||||
for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
|
||||
{
|
||||
@ -393,7 +507,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
* pretty useless for normal operation (in fact, hashm_procid is not used
|
||||
* anywhere), but it might be handy for forensic purposes so we keep it.
|
||||
*/
|
||||
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
|
||||
metap->hashm_procid = procid;
|
||||
|
||||
/*
|
||||
* We initialize the index with N buckets, 0 .. N-1, occupying physical
|
||||
@ -411,54 +525,9 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
|
||||
metap->hashm_ovflpoint = log2_num_buckets;
|
||||
metap->hashm_firstfree = 0;
|
||||
|
||||
/*
|
||||
* Release buffer lock on the metapage while we initialize buckets.
|
||||
* Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
|
||||
* won't accomplish anything. It's a bad idea to hold buffer locks for
|
||||
* long intervals in any case, since that can block the bgwriter.
|
||||
*/
|
||||
MarkBufferDirty(metabuf);
|
||||
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* Initialize the first N buckets
|
||||
*/
|
||||
for (i = 0; i < num_buckets; i++)
|
||||
{
|
||||
/* Allow interrupts, in case N is huge */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
|
||||
pg = BufferGetPage(buf);
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
|
||||
|
||||
/*
|
||||
* Set hasho_prevblkno with current hashm_maxbucket. This value will
|
||||
* be used to validate cached HashMetaPageData. See
|
||||
* _hash_getbucketbuf_from_hashkey().
|
||||
*/
|
||||
pageopaque->hasho_prevblkno = metap->hashm_maxbucket;
|
||||
pageopaque->hasho_nextblkno = InvalidBlockNumber;
|
||||
pageopaque->hasho_bucket = i;
|
||||
pageopaque->hasho_flag = LH_BUCKET_PAGE;
|
||||
pageopaque->hasho_page_id = HASHO_PAGE_ID;
|
||||
MarkBufferDirty(buf);
|
||||
_hash_relbuf(rel, buf);
|
||||
}
|
||||
|
||||
/* Now reacquire buffer lock on metapage */
|
||||
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Initialize first bitmap page
|
||||
*/
|
||||
_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
|
||||
|
||||
/* all done */
|
||||
MarkBufferDirty(metabuf);
|
||||
_hash_relbuf(rel, metabuf);
|
||||
|
||||
return num_buckets;
|
||||
/* Set pd_lower just past the end of the metadata. */
|
||||
((PageHeader) page)->pd_lower =
|
||||
((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -535,7 +604,7 @@ restart_expand:
|
||||
* than a disk block then this would be an independent constraint.
|
||||
*
|
||||
* If you change this, see also the maximum initial number of buckets in
|
||||
* _hash_metapinit().
|
||||
* _hash_init().
|
||||
*/
|
||||
if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
|
||||
goto fail;
|
||||
|
@ -311,8 +311,6 @@ extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool r
|
||||
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
|
||||
Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
|
||||
Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy);
|
||||
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
|
||||
BlockNumber blkno, ForkNumber forkNum);
|
||||
extern void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage);
|
||||
extern void _hash_squeezebucket(Relation rel,
|
||||
Bucket bucket, BlockNumber bucket_blkno,
|
||||
@ -331,6 +329,8 @@ extern Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey,
|
||||
int access,
|
||||
HashMetaPage *cachedmetap);
|
||||
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
|
||||
extern void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket,
|
||||
uint32 flag, bool initpage);
|
||||
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
|
||||
ForkNumber forkNum);
|
||||
extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
|
||||
@ -339,8 +339,10 @@ extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
|
||||
extern void _hash_relbuf(Relation rel, Buffer buf);
|
||||
extern void _hash_dropbuf(Relation rel, Buffer buf);
|
||||
extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
|
||||
extern uint32 _hash_metapinit(Relation rel, double num_tuples,
|
||||
ForkNumber forkNum);
|
||||
extern uint32 _hash_init(Relation rel, double num_tuples,
|
||||
ForkNumber forkNum);
|
||||
extern void _hash_init_metabuffer(Buffer buf, double num_tuples,
|
||||
RegProcedure procid, uint16 ffactor, bool initpage);
|
||||
extern void _hash_pageinit(Page page, Size size);
|
||||
extern void _hash_expandtable(Relation rel, Buffer metabuf);
|
||||
extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf,
|
||||
|
Loading…
Reference in New Issue
Block a user