mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-30 19:00:29 +08:00
Add support for building GiST index by sorting.
This adds a new optional support function to the GiST access method: sortsupport. If it is defined, the GiST index is built by sorting all data to the order defined by the sortsupport's comparator function, and packing the tuples in that order to GiST pages. This is similar to how B-tree index build works, and is much faster than inserting the tuples one by one. The resulting index is smaller too, because the pages are packed more tightly, upto 'fillfactor'. The normal build method works by splitting pages, which tends to lead to more wasted space. The quality of the resulting index depends on how good the opclass-defined sort order is. A good order preserves locality of the input data. As the first user of this facility, add 'sortsupport' function to the point_ops opclass. It sorts the points in Z-order (aka Morton Code), by interleaving the bits of the X and Y coordinates. Author: Andrey Borodin Reviewed-by: Pavel Borisov, Thomas Munro Discussion: https://www.postgresql.org/message-id/1A36620E-CAD8-4267-9067-FB31385E7C0D%40yandex-team.ru
This commit is contained in:
parent
089da3c477
commit
16fa9b2b30
@ -259,6 +259,8 @@ CREATE INDEX ON my_table USING GIST (my_inet_column inet_ops);
|
||||
<function>compress</function> method is omitted. The optional tenth method
|
||||
<function>options</function> is needed if the operator class provides
|
||||
the user-specified parameters.
|
||||
The <function>sortsupport</function> method is also optional and is used to
|
||||
speed up building a <acronym>GiST</acronym> index.
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
@ -1065,6 +1067,74 @@ my_compress(PG_FUNCTION_ARGS)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><function>sortsupport</function></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Returns a comparator function to sort data in a way that preserves
|
||||
locality. It is used by <command>CREATE INDEX</command> and
|
||||
<command>REINDEX</command> commands. The quality of the created index
|
||||
depends on how well the sort order determined by the comparator function
|
||||
preserves locality of the inputs.
|
||||
</para>
|
||||
<para>
|
||||
The <function>sortsupport</function> method is optional. If it is not
|
||||
provided, <command>CREATE INDEX</command> builds the index by inserting
|
||||
each tuple to the tree using the <function>penalty</function> and
|
||||
<function>picksplit</function> functions, which is much slower.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <acronym>SQL</acronym> declaration of the function must look like
|
||||
this:
|
||||
|
||||
<programlisting>
|
||||
CREATE OR REPLACE FUNCTION my_sortsupport(internal)
|
||||
RETURNS void
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
</programlisting>
|
||||
|
||||
The argument is a pointer to a <structname>SortSupport</structname>
|
||||
struct. At a minimum, the function must fill in its comparator field.
|
||||
The comparator takes three arguments: two Datums to compare, and
|
||||
a pointer to the <structname>SortSupport</structname> struct. The
|
||||
Datums are the two indexed values in the format that they are stored
|
||||
in the index; that is, in the format returned by the
|
||||
<function>compress</function> method. The full API is defined in
|
||||
<filename>src/include/utils/sortsupport.h</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The matching code in the C module could then follow this skeleton:
|
||||
|
||||
<programlisting>
|
||||
PG_FUNCTION_INFO_V1(my_sortsupport);
|
||||
|
||||
static int
|
||||
my_fastcmp(Datum x, Datum y, SortSupport ssup)
|
||||
{
|
||||
/* establish order between x and y by computing some sorting value z */
|
||||
|
||||
int z1 = ComputeSpatialCode(x);
|
||||
int z2 = ComputeSpatialCode(y);
|
||||
|
||||
return z1 == z2 ? 0 : z1 > z2 ? 1 : -1;
|
||||
}
|
||||
|
||||
Datum
|
||||
my_sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = my_fastcmp;
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
|
@ -3,6 +3,24 @@
|
||||
* gistbuild.c
|
||||
* build algorithm for GiST indexes implementation.
|
||||
*
|
||||
* There are two different strategies:
|
||||
*
|
||||
* 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
|
||||
* order, and create downlinks and internal pages as we go. This builds
|
||||
* the index from the bottom up, similar to how B-tree index build
|
||||
* works.
|
||||
*
|
||||
* 2. Start with an empty index, and insert all tuples one by one.
|
||||
*
|
||||
* The sorted method is used if the operator classes for all columns have
|
||||
* a 'sortsupport' defined. Otherwise, we resort to the second strategy.
|
||||
*
|
||||
* The second strategy can optionally use buffers at different levels of
|
||||
* the tree to reduce I/O, see "Buffering build algorithm" in the README
|
||||
* for a more detailed explanation. It initially calls insert over and
|
||||
* over, but switches to the buffered algorithm after a certain number of
|
||||
* tuples (unless buffering mode is disabled).
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@ -28,6 +46,7 @@
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/tuplesort.h"
|
||||
|
||||
/* Step of index tuples for check whether to switch to buffering build mode */
|
||||
#define BUFFERING_MODE_SWITCH_CHECK_STEP 256
|
||||
@ -40,8 +59,14 @@
|
||||
*/
|
||||
#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
|
||||
|
||||
/*
|
||||
* Strategy used to build the index. It can change between the
|
||||
* GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
|
||||
* that needs to be decided up-front and cannot be changed afterwards.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
GIST_SORTED_BUILD, /* bottom-up build by sorting */
|
||||
GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to
|
||||
* switch */
|
||||
GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to
|
||||
@ -51,7 +76,7 @@ typedef enum
|
||||
* before switching to the buffering build
|
||||
* mode */
|
||||
GIST_BUFFERING_ACTIVE /* in buffering build mode */
|
||||
} GistBufferingMode;
|
||||
} GistBuildMode;
|
||||
|
||||
/* Working state for gistbuild and its callback */
|
||||
typedef struct
|
||||
@ -60,23 +85,58 @@ typedef struct
|
||||
Relation heaprel;
|
||||
GISTSTATE *giststate;
|
||||
|
||||
int64 indtuples; /* number of tuples indexed */
|
||||
int64 indtuplesSize; /* total size of all indexed tuples */
|
||||
|
||||
Size freespace; /* amount of free space to leave on pages */
|
||||
|
||||
GistBuildMode buildMode;
|
||||
|
||||
int64 indtuples; /* number of tuples indexed */
|
||||
|
||||
/*
|
||||
* Extra data structures used during a buffering build. 'gfbb' contains
|
||||
* information related to managing the build buffers. 'parentMap' is a
|
||||
* lookup table of the parent of each internal page.
|
||||
*/
|
||||
int64 indtuplesSize; /* total size of all indexed tuples */
|
||||
GISTBuildBuffers *gfbb;
|
||||
HTAB *parentMap;
|
||||
|
||||
GistBufferingMode bufferingMode;
|
||||
/*
|
||||
* Extra data structures used during a sorting build.
|
||||
*/
|
||||
Tuplesortstate *sortstate; /* state data for tuplesort.c */
|
||||
|
||||
BlockNumber pages_allocated;
|
||||
BlockNumber pages_written;
|
||||
|
||||
int ready_num_pages;
|
||||
BlockNumber ready_blknos[XLR_MAX_BLOCK_ID];
|
||||
Page ready_pages[XLR_MAX_BLOCK_ID];
|
||||
} GISTBuildState;
|
||||
|
||||
/*
|
||||
* In sorted build, we use a stack of these structs, one for each level,
|
||||
* to hold an in-memory buffer of the righmost page at the level. When the
|
||||
* page fills up, it is written out and a new page is allocated.
|
||||
*/
|
||||
typedef struct GistSortedBuildPageState
|
||||
{
|
||||
Page page;
|
||||
struct GistSortedBuildPageState *parent; /* Upper level, if any */
|
||||
} GistSortedBuildPageState;
|
||||
|
||||
/* prototypes for private functions */
|
||||
|
||||
static void gistSortedBuildCallback(Relation index, ItemPointer tid,
|
||||
Datum *values, bool *isnull,
|
||||
bool tupleIsAlive, void *state);
|
||||
static void gist_indexsortbuild(GISTBuildState *state);
|
||||
static void gist_indexsortbuild_pagestate_add(GISTBuildState *state,
|
||||
GistSortedBuildPageState *pagestate,
|
||||
IndexTuple itup);
|
||||
static void gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
|
||||
GistSortedBuildPageState *pagestate);
|
||||
static void gist_indexsortbuild_flush_ready_pages(GISTBuildState *state);
|
||||
|
||||
static void gistInitBuffering(GISTBuildState *buildstate);
|
||||
static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
|
||||
static void gistBuildCallback(Relation index,
|
||||
@ -107,10 +167,9 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
|
||||
static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
|
||||
static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
|
||||
|
||||
|
||||
/*
|
||||
* Main entry point to GiST index build. Initially calls insert over and over,
|
||||
* but switches to more efficient buffering build algorithm after a certain
|
||||
* number of tuples (unless buffering mode is disabled).
|
||||
* Main entry point to GiST index build.
|
||||
*/
|
||||
IndexBuildResult *
|
||||
gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
@ -118,39 +177,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
IndexBuildResult *result;
|
||||
double reltuples;
|
||||
GISTBuildState buildstate;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
MemoryContext oldcxt = CurrentMemoryContext;
|
||||
int fillfactor;
|
||||
|
||||
buildstate.indexrel = index;
|
||||
buildstate.heaprel = heap;
|
||||
|
||||
if (index->rd_options)
|
||||
{
|
||||
/* Get buffering mode from the options string */
|
||||
GiSTOptions *options = (GiSTOptions *) index->rd_options;
|
||||
|
||||
if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
|
||||
buildstate.bufferingMode = GIST_BUFFERING_STATS;
|
||||
else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
|
||||
buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
|
||||
else
|
||||
buildstate.bufferingMode = GIST_BUFFERING_AUTO;
|
||||
|
||||
fillfactor = options->fillfactor;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* By default, switch to buffering mode when the index grows too large
|
||||
* to fit in cache.
|
||||
*/
|
||||
buildstate.bufferingMode = GIST_BUFFERING_AUTO;
|
||||
fillfactor = GIST_DEFAULT_FILLFACTOR;
|
||||
}
|
||||
/* Calculate target amount of free space to leave on pages */
|
||||
buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
|
||||
Oid SortSupportFnOids[INDEX_MAX_KEYS];
|
||||
bool hasallsortsupports;
|
||||
int keyscount = IndexRelationGetNumberOfKeyAttributes(index);
|
||||
GiSTOptions *options = NULL;
|
||||
|
||||
/*
|
||||
* We expect to be called exactly once for any index relation. If that's
|
||||
@ -160,7 +192,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
elog(ERROR, "index \"%s\" already contains data",
|
||||
RelationGetRelationName(index));
|
||||
|
||||
/* no locking is needed */
|
||||
if (index->rd_options)
|
||||
options = (GiSTOptions *) index->rd_options;
|
||||
|
||||
buildstate.indexrel = index;
|
||||
buildstate.heaprel = heap;
|
||||
buildstate.sortstate = NULL;
|
||||
buildstate.giststate = initGISTstate(index);
|
||||
|
||||
/*
|
||||
@ -170,42 +207,129 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
*/
|
||||
buildstate.giststate->tempCxt = createTempGistContext();
|
||||
|
||||
/* initialize the root page */
|
||||
buffer = gistNewBuffer(index);
|
||||
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
|
||||
page = BufferGetPage(buffer);
|
||||
/*
|
||||
* Choose build strategy. If all keys support sorting, do that. Otherwise
|
||||
* the default strategy is switch to buffering mode when the index grows
|
||||
* too large to fit in cache.
|
||||
*/
|
||||
hasallsortsupports = true;
|
||||
for (int i = 0; i < keyscount; i++)
|
||||
{
|
||||
SortSupportFnOids[i] = index_getprocid(index, i + 1,
|
||||
GIST_SORTSUPPORT_PROC);
|
||||
if (!OidIsValid(SortSupportFnOids[i]))
|
||||
{
|
||||
hasallsortsupports = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
if (hasallsortsupports)
|
||||
{
|
||||
buildstate.buildMode = GIST_SORTED_BUILD;
|
||||
}
|
||||
else if (options)
|
||||
{
|
||||
if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
|
||||
buildstate.buildMode = GIST_BUFFERING_STATS;
|
||||
else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
|
||||
buildstate.buildMode = GIST_BUFFERING_DISABLED;
|
||||
else
|
||||
buildstate.buildMode = GIST_BUFFERING_AUTO;
|
||||
}
|
||||
else
|
||||
{
|
||||
buildstate.buildMode = GIST_BUFFERING_AUTO;
|
||||
}
|
||||
|
||||
GISTInitBuffer(buffer, F_LEAF);
|
||||
/*
|
||||
* Calculate target amount of free space to leave on pages.
|
||||
*/
|
||||
fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
|
||||
buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
PageSetLSN(page, GistBuildLSN);
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* build the index */
|
||||
/*
|
||||
* Build the index using the chosen strategy.
|
||||
*/
|
||||
buildstate.indtuples = 0;
|
||||
buildstate.indtuplesSize = 0;
|
||||
|
||||
/*
|
||||
* Do the heap scan.
|
||||
*/
|
||||
reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
|
||||
gistBuildCallback,
|
||||
(void *) &buildstate, NULL);
|
||||
|
||||
/*
|
||||
* If buffering was used, flush out all the tuples that are still in the
|
||||
* buffers.
|
||||
*/
|
||||
if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
|
||||
if (buildstate.buildMode == GIST_SORTED_BUILD)
|
||||
{
|
||||
elog(DEBUG1, "all tuples processed, emptying buffers");
|
||||
gistEmptyAllBuffers(&buildstate);
|
||||
gistFreeBuildBuffers(buildstate.gfbb);
|
||||
/*
|
||||
* Sort all data, build the index from bottom up.
|
||||
*/
|
||||
buildstate.sortstate = tuplesort_begin_index_gist(heap,
|
||||
index,
|
||||
maintenance_work_mem,
|
||||
NULL,
|
||||
false);
|
||||
|
||||
/* Scan the table, adding all tuples to the tuplesort */
|
||||
reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
|
||||
gistSortedBuildCallback,
|
||||
(void *) &buildstate, NULL);
|
||||
|
||||
/*
|
||||
* Perform the sort and build index pages.
|
||||
*/
|
||||
tuplesort_performsort(buildstate.sortstate);
|
||||
|
||||
gist_indexsortbuild(&buildstate);
|
||||
|
||||
tuplesort_end(buildstate.sortstate);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Initialize an empty index and insert all tuples, possibly using
|
||||
* buffers on intermediate levels.
|
||||
*/
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
/* initialize the root page */
|
||||
buffer = gistNewBuffer(index);
|
||||
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
GISTInitBuffer(buffer, F_LEAF);
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
PageSetLSN(page, GistBuildLSN);
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* Scan the table, inserting all the tuples to the index. */
|
||||
reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
|
||||
gistBuildCallback,
|
||||
(void *) &buildstate, NULL);
|
||||
|
||||
/*
|
||||
* If buffering was used, flush out all the tuples that are still in
|
||||
* the buffers.
|
||||
*/
|
||||
if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
|
||||
{
|
||||
elog(DEBUG1, "all tuples processed, emptying buffers");
|
||||
gistEmptyAllBuffers(&buildstate);
|
||||
gistFreeBuildBuffers(buildstate.gfbb);
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't write WAL records as we built the index, so if
|
||||
* WAL-logging is required, write all pages to the WAL now.
|
||||
*/
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
log_newpage_range(index, MAIN_FORKNUM,
|
||||
0, RelationGetNumberOfBlocks(index),
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
/* okay, all heap tuples are indexed */
|
||||
@ -214,17 +338,6 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
|
||||
freeGISTstate(buildstate.giststate);
|
||||
|
||||
/*
|
||||
* We didn't write WAL records as we built the index, so if WAL-logging is
|
||||
* required, write all pages to the WAL now.
|
||||
*/
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
log_newpage_range(index, MAIN_FORKNUM,
|
||||
0, RelationGetNumberOfBlocks(index),
|
||||
true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return statistics
|
||||
*/
|
||||
@ -236,6 +349,235 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Routines for sorted build
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Per-tuple callback for table_index_build_scan.
|
||||
*/
|
||||
static void
|
||||
gistSortedBuildCallback(Relation index,
|
||||
ItemPointer tid,
|
||||
Datum *values,
|
||||
bool *isnull,
|
||||
bool tupleIsAlive,
|
||||
void *state)
|
||||
{
|
||||
GISTBuildState *buildstate = (GISTBuildState *) state;
|
||||
MemoryContext oldCtx;
|
||||
Datum compressed_values[INDEX_MAX_KEYS];
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
|
||||
|
||||
/* Form an index tuple and point it at the heap tuple */
|
||||
gistCompressValues(buildstate->giststate, index,
|
||||
values, isnull,
|
||||
true, compressed_values);
|
||||
|
||||
tuplesort_putindextuplevalues(buildstate->sortstate,
|
||||
buildstate->indexrel,
|
||||
tid,
|
||||
compressed_values, isnull);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
MemoryContextReset(buildstate->giststate->tempCxt);
|
||||
|
||||
/* Update tuple count. */
|
||||
buildstate->indtuples += 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build GiST index from bottom up from pre-sorted tuples.
|
||||
*/
|
||||
static void
|
||||
gist_indexsortbuild(GISTBuildState *state)
|
||||
{
|
||||
IndexTuple itup;
|
||||
GistSortedBuildPageState *leafstate;
|
||||
GistSortedBuildPageState *pagestate;
|
||||
Page page;
|
||||
|
||||
state->pages_allocated = 0;
|
||||
state->pages_written = 0;
|
||||
state->ready_num_pages = 0;
|
||||
|
||||
/*
|
||||
* Write an empty page as a placeholder for the root page. It will be
|
||||
* replaced with the real root page at the end.
|
||||
*/
|
||||
page = palloc0(BLCKSZ);
|
||||
smgrextend(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
|
||||
page, true);
|
||||
state->pages_allocated++;
|
||||
state->pages_written++;
|
||||
|
||||
/* Allocate a temporary buffer for the first leaf page. */
|
||||
leafstate = palloc(sizeof(GistSortedBuildPageState));
|
||||
leafstate->page = page;
|
||||
leafstate->parent = NULL;
|
||||
gistinitpage(page, F_LEAF);
|
||||
|
||||
/*
|
||||
* Fill index pages with tuples in the sorted order.
|
||||
*/
|
||||
while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
|
||||
{
|
||||
gist_indexsortbuild_pagestate_add(state, leafstate, itup);
|
||||
MemoryContextReset(state->giststate->tempCxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out the partially full non-root pages.
|
||||
*
|
||||
* Keep in mind that flush can build a new root.
|
||||
*/
|
||||
pagestate = leafstate;
|
||||
while (pagestate->parent != NULL)
|
||||
{
|
||||
GistSortedBuildPageState *parent;
|
||||
|
||||
gist_indexsortbuild_pagestate_flush(state, pagestate);
|
||||
parent = pagestate->parent;
|
||||
pfree(pagestate->page);
|
||||
pfree(pagestate);
|
||||
pagestate = parent;
|
||||
}
|
||||
|
||||
gist_indexsortbuild_flush_ready_pages(state);
|
||||
|
||||
/* Write out the root */
|
||||
PageSetLSN(pagestate->page, GistBuildLSN);
|
||||
smgrwrite(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
|
||||
pagestate->page, true);
|
||||
if (RelationNeedsWAL(state->indexrel))
|
||||
log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
|
||||
pagestate->page, true);
|
||||
|
||||
pfree(pagestate->page);
|
||||
pfree(pagestate);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add tuple to a page. If the pages is full, write it out and re-initialize
|
||||
* a new page first.
|
||||
*/
|
||||
static void
|
||||
gist_indexsortbuild_pagestate_add(GISTBuildState *state,
|
||||
GistSortedBuildPageState *pagestate,
|
||||
IndexTuple itup)
|
||||
{
|
||||
Size sizeNeeded;
|
||||
|
||||
/* Does the tuple fit? If not, flush */
|
||||
sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace;
|
||||
if (PageGetFreeSpace(pagestate->page) < sizeNeeded)
|
||||
gist_indexsortbuild_pagestate_flush(state, pagestate);
|
||||
|
||||
gistfillbuffer(pagestate->page, &itup, 1, InvalidOffsetNumber);
|
||||
}
|
||||
|
||||
static void
|
||||
gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
|
||||
GistSortedBuildPageState *pagestate)
|
||||
{
|
||||
GistSortedBuildPageState *parent;
|
||||
IndexTuple *itvec;
|
||||
IndexTuple union_tuple;
|
||||
int vect_len;
|
||||
bool isleaf;
|
||||
BlockNumber blkno;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
/* check once per page */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
if (state->ready_num_pages == XLR_MAX_BLOCK_ID)
|
||||
gist_indexsortbuild_flush_ready_pages(state);
|
||||
|
||||
/*
|
||||
* The page is now complete. Assign a block number to it, and add it to
|
||||
* the list of finished pages. (We don't write it out immediately, because
|
||||
* we want to WAL-log the pages in batches.)
|
||||
*/
|
||||
blkno = state->pages_allocated++;
|
||||
state->ready_blknos[state->ready_num_pages] = blkno;
|
||||
state->ready_pages[state->ready_num_pages] = pagestate->page;
|
||||
state->ready_num_pages++;
|
||||
|
||||
isleaf = GistPageIsLeaf(pagestate->page);
|
||||
|
||||
/*
|
||||
* Form a downlink tuple to represent all the tuples on the page.
|
||||
*/
|
||||
oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
|
||||
itvec = gistextractpage(pagestate->page, &vect_len);
|
||||
union_tuple = gistunion(state->indexrel, itvec, vect_len,
|
||||
state->giststate);
|
||||
ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
/*
|
||||
* Insert the downlink to the parent page. If this was the root, create a
|
||||
* new page as the parent, which becomes the new root.
|
||||
*/
|
||||
parent = pagestate->parent;
|
||||
if (parent == NULL)
|
||||
{
|
||||
parent = palloc(sizeof(GistSortedBuildPageState));
|
||||
parent->page = (Page) palloc(BLCKSZ);
|
||||
parent->parent = NULL;
|
||||
gistinitpage(parent->page, 0);
|
||||
|
||||
pagestate->parent = parent;
|
||||
}
|
||||
gist_indexsortbuild_pagestate_add(state, parent, union_tuple);
|
||||
|
||||
/* Re-initialize the page buffer for next page on this level. */
|
||||
pagestate->page = palloc(BLCKSZ);
|
||||
gistinitpage(pagestate->page, isleaf ? F_LEAF : 0);
|
||||
}
|
||||
|
||||
static void
|
||||
gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
|
||||
{
|
||||
if (state->ready_num_pages == 0)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < state->ready_num_pages; i++)
|
||||
{
|
||||
Page page = state->ready_pages[i];
|
||||
|
||||
/* Currently, the blocks must be buffered in order. */
|
||||
if (state->ready_blknos[i] != state->pages_written)
|
||||
elog(ERROR, "unexpected block number to flush GiST sorting build");
|
||||
|
||||
PageSetLSN(page, GistBuildLSN);
|
||||
|
||||
smgrextend(state->indexrel->rd_smgr,
|
||||
MAIN_FORKNUM,
|
||||
state->pages_written++,
|
||||
page,
|
||||
true);
|
||||
}
|
||||
|
||||
if (RelationNeedsWAL(state->indexrel))
|
||||
log_newpages(&state->indexrel->rd_node, MAIN_FORKNUM, state->ready_num_pages,
|
||||
state->ready_blknos, state->ready_pages, true);
|
||||
|
||||
for (int i = 0; i < state->ready_num_pages; i++)
|
||||
pfree(state->ready_pages[i]);
|
||||
|
||||
state->ready_num_pages = 0;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Routines for non-sorted build
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Attempt to switch to buffering mode.
|
||||
*
|
||||
@ -375,7 +717,7 @@ gistInitBuffering(GISTBuildState *buildstate)
|
||||
if (levelStep <= 0)
|
||||
{
|
||||
elog(DEBUG1, "failed to switch to buffered GiST build");
|
||||
buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
|
||||
buildstate->buildMode = GIST_BUFFERING_DISABLED;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -392,7 +734,7 @@ gistInitBuffering(GISTBuildState *buildstate)
|
||||
|
||||
gistInitParentMap(buildstate);
|
||||
|
||||
buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
|
||||
buildstate->buildMode = GIST_BUFFERING_ACTIVE;
|
||||
|
||||
elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
|
||||
levelStep, pagesPerBuffer);
|
||||
@ -453,10 +795,12 @@ gistBuildCallback(Relation index,
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
|
||||
|
||||
/* form an index tuple and point it at the heap tuple */
|
||||
itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
|
||||
itup = gistFormTuple(buildstate->giststate, index,
|
||||
values, isnull,
|
||||
true);
|
||||
itup->t_tid = *tid;
|
||||
|
||||
if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
|
||||
if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
|
||||
{
|
||||
/* We have buffers, so use them. */
|
||||
gistBufferingBuildInsert(buildstate, itup);
|
||||
@ -478,7 +822,7 @@ gistBuildCallback(Relation index,
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
MemoryContextReset(buildstate->giststate->tempCxt);
|
||||
|
||||
if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
|
||||
if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
|
||||
buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
|
||||
{
|
||||
/* Adjust the target buffer size now */
|
||||
@ -493,10 +837,10 @@ gistBuildCallback(Relation index,
|
||||
* To avoid excessive calls to smgrnblocks(), only check this every
|
||||
* BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
|
||||
*/
|
||||
if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
|
||||
if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
|
||||
buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
|
||||
effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
|
||||
(buildstate->bufferingMode == GIST_BUFFERING_STATS &&
|
||||
(buildstate->buildMode == GIST_BUFFERING_STATS &&
|
||||
buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
|
||||
{
|
||||
/*
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/geo_decls.h"
|
||||
#include "utils/sortsupport.h"
|
||||
|
||||
|
||||
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
|
||||
@ -31,6 +32,15 @@ static bool gist_box_leaf_consistent(BOX *key, BOX *query,
|
||||
static bool rtree_internal_consistent(BOX *key, BOX *query,
|
||||
StrategyNumber strategy);
|
||||
|
||||
static uint64 point_zorder_internal(float4 x, float4 y);
|
||||
static uint64 part_bits32_by2(uint32 x);
|
||||
static uint32 ieee_float32_to_uint32(float f);
|
||||
static int gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup);
|
||||
static Datum gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup);
|
||||
static int gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup);
|
||||
static bool gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup);
|
||||
|
||||
|
||||
/* Minimum accepted ratio of split */
|
||||
#define LIMIT_RATIO 0.3
|
||||
|
||||
@ -1540,3 +1550,222 @@ gist_poly_distance(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_FLOAT8(distance);
|
||||
}
|
||||
|
||||
/*
|
||||
* Z-order routines for fast index build
|
||||
*/
|
||||
|
||||
/*
|
||||
* Compute Z-value of a point
|
||||
*
|
||||
* Z-order (also known as Morton Code) maps a two-dimensional point to a
|
||||
* single integer, in a way that preserves locality. Points that are close in
|
||||
* the two-dimensional space are mapped to integer that are not far from each
|
||||
* other. We do that by interleaving the bits in the X and Y components.
|
||||
*
|
||||
* Morton Code is normally defined only for integers, but the X and Y values
|
||||
* of a point are floating point. We expect floats to be in IEEE format.
|
||||
*/
|
||||
static uint64
|
||||
point_zorder_internal(float4 x, float4 y)
|
||||
{
|
||||
uint32 ix = ieee_float32_to_uint32(x);
|
||||
uint32 iy = ieee_float32_to_uint32(y);
|
||||
|
||||
/* Interleave the bits */
|
||||
return part_bits32_by2(ix) | (part_bits32_by2(iy) << 1);
|
||||
}
|
||||
|
||||
/* Interleave 32 bits with zeroes */
|
||||
static uint64
|
||||
part_bits32_by2(uint32 x)
|
||||
{
|
||||
uint64 n = x;
|
||||
|
||||
n = (n | (n << 16)) & UINT64CONST(0x0000FFFF0000FFFF);
|
||||
n = (n | (n << 8)) & UINT64CONST(0x00FF00FF00FF00FF);
|
||||
n = (n | (n << 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
|
||||
n = (n | (n << 2)) & UINT64CONST(0x3333333333333333);
|
||||
n = (n | (n << 1)) & UINT64CONST(0x5555555555555555);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a 32-bit IEEE float to uint32 in a way that preserves the ordering
|
||||
*/
|
||||
static uint32
|
||||
ieee_float32_to_uint32(float f)
|
||||
{
|
||||
/*----
|
||||
*
|
||||
* IEEE 754 floating point format
|
||||
* ------------------------------
|
||||
*
|
||||
* IEEE 754 floating point numbers have this format:
|
||||
*
|
||||
* exponent (8 bits)
|
||||
* |
|
||||
* s eeeeeeee mmmmmmmmmmmmmmmmmmmmmmm
|
||||
* | |
|
||||
* sign mantissa (23 bits)
|
||||
*
|
||||
* Infinity has all bits in the exponent set and the mantissa is all
|
||||
* zeros. Negative infinity is the same but with the sign bit set.
|
||||
*
|
||||
* NaNs are represented with all bits in the exponent set, and the least
|
||||
* significant bit in the mantissa also set. The rest of the mantissa bits
|
||||
* can be used to distinguish different kinds of NaNs.
|
||||
*
|
||||
* The IEEE format has the nice property that when you take the bit
|
||||
* representation and interpret it as an integer, the order is preserved,
|
||||
* except for the sign. That holds for the +-Infinity values too.
|
||||
*
|
||||
* Mapping to uint32
|
||||
* -----------------
|
||||
*
|
||||
* In order to have a smooth transition from negative to positive numbers,
|
||||
* we map floats to unsigned integers like this:
|
||||
*
|
||||
* x < 0 to range 0-7FFFFFFF
|
||||
* x = 0 to value 8000000 (both positive and negative zero)
|
||||
* x > 0 to range 8000001-FFFFFFFF
|
||||
*
|
||||
* We don't care to distinguish different kind of NaNs, so they are all
|
||||
* mapped to the same arbitrary value, FFFFFFFF. Because of the IEEE bit
|
||||
* representation of NaNs, there aren't any non-NaN values that would be
|
||||
* mapped to FFFFFFFF. In fact, there is a range of unused values on both
|
||||
* ends of the uint32 space.
|
||||
*/
|
||||
if (isnan(f))
|
||||
return 0xFFFFFFFF;
|
||||
else
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
uint32 i;
|
||||
} u;
|
||||
|
||||
u.f = f;
|
||||
|
||||
/* Check the sign bit */
|
||||
if ((u.i & 0x80000000) != 0)
|
||||
{
|
||||
/*
|
||||
* Map the negative value to range 0-7FFFFFFF. This flips the sign
|
||||
* bit to 0 in the same instruction.
|
||||
*/
|
||||
Assert(f <= 0); /* can be -0 */
|
||||
u.i ^= 0xFFFFFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Map the positive value (or 0) to range 80000000-FFFFFFFF */
|
||||
u.i |= 0x80000000;
|
||||
}
|
||||
|
||||
return u.i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare the Z-order of points
|
||||
*/
|
||||
static int
|
||||
gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
|
||||
{
|
||||
Point *p1 = &(DatumGetBoxP(a)->low);
|
||||
Point *p2 = &(DatumGetBoxP(b)->low);
|
||||
uint64 z1;
|
||||
uint64 z2;
|
||||
|
||||
/*
|
||||
* Do a quick check for equality first. It's not clear if this is worth it
|
||||
* in general, but certainly is when used as tie-breaker with abbreviated
|
||||
* keys,
|
||||
*/
|
||||
if (p1->x == p2->x && p1->y == p2->y)
|
||||
return 0;
|
||||
|
||||
z1 = point_zorder_internal(p1->x, p1->y);
|
||||
z2 = point_zorder_internal(p2->x, p2->y);
|
||||
if (z1 > z2)
|
||||
return 1;
|
||||
else if (z1 < z2)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Abbreviated version of Z-order comparison
|
||||
*
|
||||
* The abbreviated format is a Z-order value computed from the two 32-bit
|
||||
* floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
|
||||
* abbreviated Datum, otherwise use its most significant bits.
|
||||
*/
|
||||
static Datum
|
||||
gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
|
||||
{
|
||||
Point *p = &(DatumGetBoxP(original)->low);
|
||||
uint64 z;
|
||||
|
||||
z = point_zorder_internal(p->x, p->y);
|
||||
|
||||
#if SIZEOF_DATUM == 8
|
||||
return (Datum) z;
|
||||
#else
|
||||
return (Datum) (z >> 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup)
|
||||
{
|
||||
/*
|
||||
* Compare the pre-computed Z-orders as unsigned integers. Datum is a
|
||||
* typedef for 'uintptr_t', so no casting is required.
|
||||
*/
|
||||
if (z1 > z2)
|
||||
return 1;
|
||||
else if (z1 < z2)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We never consider aborting the abbreviation.
|
||||
*
|
||||
* On 64-bit systems, the abbreviation is not lossy so it is always
|
||||
* worthwhile. (Perhaps it's not on 32-bit systems, but we don't bother
|
||||
* with logic to decide.)
|
||||
*/
|
||||
static bool
|
||||
gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort support routine for fast GiST index build by sorting.
|
||||
*/
|
||||
Datum
|
||||
gist_point_sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
if (ssup->abbreviate)
|
||||
{
|
||||
ssup->comparator = gist_bbox_zorder_cmp_abbrev;
|
||||
ssup->abbrev_converter = gist_bbox_zorder_abbrev_convert;
|
||||
ssup->abbrev_abort = gist_bbox_zorder_abbrev_abort;
|
||||
ssup->abbrev_full_comparator = gist_bbox_zorder_cmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
ssup->comparator = gist_bbox_zorder_cmp;
|
||||
}
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -572,12 +572,31 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
|
||||
|
||||
IndexTuple
|
||||
gistFormTuple(GISTSTATE *giststate, Relation r,
|
||||
Datum attdata[], bool isnull[], bool isleaf)
|
||||
Datum *attdata, bool *isnull, bool isleaf)
|
||||
{
|
||||
Datum compatt[INDEX_MAX_KEYS];
|
||||
int i;
|
||||
IndexTuple res;
|
||||
|
||||
gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
|
||||
|
||||
res = index_form_tuple(isleaf ? giststate->leafTupdesc :
|
||||
giststate->nonLeafTupdesc,
|
||||
compatt, isnull);
|
||||
|
||||
/*
|
||||
* The offset number on tuples on internal pages is unused. For historical
|
||||
* reasons, it is set to 0xffff.
|
||||
*/
|
||||
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
gistCompressValues(GISTSTATE *giststate, Relation r,
|
||||
Datum *attdata, bool *isnull, bool isleaf, Datum *compatt)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Call the compress method on each attribute.
|
||||
*/
|
||||
@ -617,17 +636,6 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
|
||||
compatt[i] = attdata[i];
|
||||
}
|
||||
}
|
||||
|
||||
res = index_form_tuple(isleaf ? giststate->leafTupdesc :
|
||||
giststate->nonLeafTupdesc,
|
||||
compatt, isnull);
|
||||
|
||||
/*
|
||||
* The offset number on tuples on internal pages is unused. For historical
|
||||
* reasons, it is set to 0xffff.
|
||||
*/
|
||||
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -745,14 +753,11 @@ gistpenalty(GISTSTATE *giststate, int attno,
|
||||
* Initialize a new index page
|
||||
*/
|
||||
void
|
||||
GISTInitBuffer(Buffer b, uint32 f)
|
||||
gistinitpage(Page page, uint32 f)
|
||||
{
|
||||
GISTPageOpaque opaque;
|
||||
Page page;
|
||||
Size pageSize;
|
||||
Size pageSize = BLCKSZ;
|
||||
|
||||
pageSize = BufferGetPageSize(b);
|
||||
page = BufferGetPage(b);
|
||||
PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
|
||||
|
||||
opaque = GistPageGetOpaque(page);
|
||||
@ -763,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f)
|
||||
opaque->gist_page_id = GIST_PAGE_ID;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a new index buffer
|
||||
*/
|
||||
void
|
||||
GISTInitBuffer(Buffer b, uint32 f)
|
||||
{
|
||||
Page page;
|
||||
|
||||
page = BufferGetPage(b);
|
||||
gistinitpage(page, f);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that a freshly-read page looks sane.
|
||||
*/
|
||||
|
@ -143,6 +143,10 @@ gistvalidate(Oid opclassoid)
|
||||
case GIST_OPTIONS_PROC:
|
||||
ok = check_amoptsproc_signature(procform->amproc);
|
||||
break;
|
||||
case GIST_SORTSUPPORT_PROC:
|
||||
ok = check_amproc_signature(procform->amproc, VOIDOID, true,
|
||||
1, 1, INTERNALOID);
|
||||
break;
|
||||
default:
|
||||
ereport(INFO,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
@ -263,7 +267,7 @@ gistvalidate(Oid opclassoid)
|
||||
continue; /* got it */
|
||||
if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
|
||||
i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
|
||||
i == GIST_OPTIONS_PROC)
|
||||
i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC)
|
||||
continue; /* optional methods */
|
||||
ereport(INFO,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
|
@ -1019,6 +1019,63 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
return recptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like log_newpage(), but allows logging multiple pages in one operation.
|
||||
* It is more efficient than calling log_newpage() for each page separately,
|
||||
* because we can write multiple pages in a single WAL record.
|
||||
*/
|
||||
void
|
||||
log_newpages(RelFileNode *rnode, ForkNumber forkNum, int num_pages,
|
||||
BlockNumber *blknos, Page *pages, bool page_std)
|
||||
{
|
||||
int flags;
|
||||
XLogRecPtr recptr;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
flags = REGBUF_FORCE_IMAGE;
|
||||
if (page_std)
|
||||
flags |= REGBUF_STANDARD;
|
||||
|
||||
/*
|
||||
* Iterate over all the pages. They are collected into batches of
|
||||
* XLR_MAX_BLOCK_ID pages, and a single WAL-record is written for each
|
||||
* batch.
|
||||
*/
|
||||
XLogEnsureRecordSpace(XLR_MAX_BLOCK_ID - 1, 0);
|
||||
|
||||
i = 0;
|
||||
while (i < num_pages)
|
||||
{
|
||||
int batch_start = i;
|
||||
int nbatch;
|
||||
|
||||
XLogBeginInsert();
|
||||
|
||||
nbatch = 0;
|
||||
while (nbatch < XLR_MAX_BLOCK_ID && i < num_pages)
|
||||
{
|
||||
XLogRegisterBlock(nbatch, rnode, forkNum, blknos[i], pages[i], flags);
|
||||
i++;
|
||||
nbatch++;
|
||||
}
|
||||
|
||||
recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
|
||||
|
||||
for (j = batch_start; j < i; j++)
|
||||
{
|
||||
/*
|
||||
* The page may be uninitialized. If so, we can't set the LSN because that
|
||||
* would corrupt the page.
|
||||
*/
|
||||
if (!PageIsNew(pages[j]))
|
||||
{
|
||||
PageSetLSN(pages[j], recptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Write a WAL record containing a full image of a page.
|
||||
*
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "fmgr.h"
|
||||
@ -175,3 +176,36 @@ PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy,
|
||||
|
||||
FinishSortSupportFunction(opfamily, opcintype, ssup);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in SortSupport given a GiST index relation
|
||||
*
|
||||
* Caller must previously have zeroed the SortSupportData structure and then
|
||||
* filled in ssup_cxt, ssup_attno, ssup_collation, and ssup_nulls_first. This
|
||||
* will fill in ssup_reverse (always false for GiST index build), as well as
|
||||
* the comparator function pointer.
|
||||
*/
|
||||
void
|
||||
PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup)
|
||||
{
|
||||
Oid opfamily = indexRel->rd_opfamily[ssup->ssup_attno - 1];
|
||||
Oid opcintype = indexRel->rd_opcintype[ssup->ssup_attno - 1];
|
||||
Oid sortSupportFunction;
|
||||
|
||||
Assert(ssup->comparator == NULL);
|
||||
|
||||
if (indexRel->rd_rel->relam != GIST_AM_OID)
|
||||
elog(ERROR, "unexpected non-gist AM: %u", indexRel->rd_rel->relam);
|
||||
ssup->ssup_reverse = false;
|
||||
|
||||
/*
|
||||
* Look up the sort support function. This is simpler than for B-tree
|
||||
* indexes because we don't support the old-style btree comparators.
|
||||
*/
|
||||
sortSupportFunction = get_opfamily_proc(opfamily, opcintype, opcintype,
|
||||
GIST_SORTSUPPORT_PROC);
|
||||
if (!OidIsValid(sortSupportFunction))
|
||||
elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
|
||||
GIST_SORTSUPPORT_PROC, opcintype, opcintype, opfamily);
|
||||
OidFunctionCall1(sortSupportFunction, PointerGetDatum(ssup));
|
||||
}
|
||||
|
@ -1167,6 +1167,63 @@ tuplesort_begin_index_hash(Relation heapRel,
|
||||
return state;
|
||||
}
|
||||
|
||||
Tuplesortstate *
|
||||
tuplesort_begin_index_gist(Relation heapRel,
|
||||
Relation indexRel,
|
||||
int workMem,
|
||||
SortCoordinate coordinate,
|
||||
bool randomAccess)
|
||||
{
|
||||
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
|
||||
randomAccess);
|
||||
MemoryContext oldcontext;
|
||||
int i;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(state->sortcontext);
|
||||
|
||||
#ifdef TRACE_SORT
|
||||
if (trace_sort)
|
||||
elog(LOG,
|
||||
"begin index sort: workMem = %d, randomAccess = %c",
|
||||
workMem, randomAccess ? 't' : 'f');
|
||||
#endif
|
||||
|
||||
state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
|
||||
|
||||
state->comparetup = comparetup_index_btree;
|
||||
state->copytup = copytup_index;
|
||||
state->writetup = writetup_index;
|
||||
state->readtup = readtup_index;
|
||||
|
||||
state->heapRel = heapRel;
|
||||
state->indexRel = indexRel;
|
||||
|
||||
/* Prepare SortSupport data for each column */
|
||||
state->sortKeys = (SortSupport) palloc0(state->nKeys *
|
||||
sizeof(SortSupportData));
|
||||
|
||||
for (i = 0; i < state->nKeys; i++)
|
||||
{
|
||||
SortSupport sortKey = state->sortKeys + i;
|
||||
|
||||
sortKey->ssup_cxt = CurrentMemoryContext;
|
||||
sortKey->ssup_collation = indexRel->rd_indcollation[i];
|
||||
sortKey->ssup_nulls_first = false;
|
||||
sortKey->ssup_attno = i + 1;
|
||||
/* Convey if abbreviation optimization is applicable in principle */
|
||||
sortKey->abbreviate = (i == 0);
|
||||
|
||||
AssertState(sortKey->ssup_attno != 0);
|
||||
|
||||
/* Look for a sort support function */
|
||||
PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
Tuplesortstate *
|
||||
tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
|
||||
bool nullsFirstFlag, int workMem,
|
||||
|
@ -37,7 +37,8 @@
|
||||
#define GIST_DISTANCE_PROC 8
|
||||
#define GIST_FETCH_PROC 9
|
||||
#define GIST_OPTIONS_PROC 10
|
||||
#define GISTNProcs 10
|
||||
#define GIST_SORTSUPPORT_PROC 11
|
||||
#define GISTNProcs 11
|
||||
|
||||
/*
|
||||
* Page opaque data in a GiST index page.
|
||||
|
@ -501,12 +501,15 @@ extern IndexTuple gistgetadjusted(Relation r,
|
||||
GISTSTATE *giststate);
|
||||
extern IndexTuple gistFormTuple(GISTSTATE *giststate,
|
||||
Relation r, Datum *attdata, bool *isnull, bool isleaf);
|
||||
extern void gistCompressValues(GISTSTATE *giststate, Relation r,
|
||||
Datum *attdata, bool *isnull, bool isleaf, Datum *compatt);
|
||||
|
||||
extern OffsetNumber gistchoose(Relation r, Page p,
|
||||
IndexTuple it,
|
||||
GISTSTATE *giststate);
|
||||
|
||||
extern void GISTInitBuffer(Buffer b, uint32 f);
|
||||
extern void gistinitpage(Page page, uint32 f);
|
||||
extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
|
||||
Datum k, Relation r, Page pg, OffsetNumber o,
|
||||
bool l, bool isNull);
|
||||
|
@ -54,6 +54,8 @@ extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
|
||||
|
||||
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
|
||||
BlockNumber blk, char *page, bool page_std);
|
||||
extern void log_newpages(RelFileNode *rnode, ForkNumber forkNum, int num_pages,
|
||||
BlockNumber *blknos, char **pages, bool page_std);
|
||||
extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
|
||||
extern void log_newpage_range(Relation rel, ForkNumber forkNum,
|
||||
BlockNumber startblk, BlockNumber endblk, bool page_std);
|
||||
|
@ -53,6 +53,7 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
/* FIXME: bump this before pushing! */
|
||||
#define CATALOG_VERSION_NO 202009031
|
||||
|
||||
#endif
|
||||
|
@ -480,6 +480,8 @@
|
||||
amproc => 'gist_point_distance' },
|
||||
{ amprocfamily => 'gist/point_ops', amproclefttype => 'point',
|
||||
amprocrighttype => 'point', amprocnum => '9', amproc => 'gist_point_fetch' },
|
||||
{ amprocfamily => 'gist/point_ops', amproclefttype => 'point',
|
||||
amprocrighttype => 'point', amprocnum => '11', amproc => 'gist_point_sortsupport' },
|
||||
{ amprocfamily => 'gist/box_ops', amproclefttype => 'box',
|
||||
amprocrighttype => 'box', amprocnum => '1', amproc => 'gist_box_consistent' },
|
||||
{ amprocfamily => 'gist/box_ops', amproclefttype => 'box',
|
||||
|
@ -8062,6 +8062,9 @@
|
||||
proname => 'gist_poly_distance', prorettype => 'float8',
|
||||
proargtypes => 'internal polygon int2 oid internal',
|
||||
prosrc => 'gist_poly_distance' },
|
||||
{ oid => '3435', descr => 'sort support',
|
||||
proname => 'gist_point_sortsupport', prorettype => 'void',
|
||||
proargtypes => 'internal', prosrc => 'gist_point_sortsupport' },
|
||||
|
||||
# GIN array support
|
||||
{ oid => '2743', descr => 'GIN array support',
|
||||
|
@ -272,5 +272,6 @@ extern void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup);
|
||||
extern void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup);
|
||||
extern void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy,
|
||||
SortSupport ssup);
|
||||
extern void PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup);
|
||||
|
||||
#endif /* SORTSUPPORT_H */
|
||||
|
@ -217,6 +217,10 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
|
||||
uint32 max_buckets,
|
||||
int workMem, SortCoordinate coordinate,
|
||||
bool randomAccess);
|
||||
extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
|
||||
Relation indexRel,
|
||||
int workMem, SortCoordinate coordinate,
|
||||
bool randomAccess);
|
||||
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
|
||||
Oid sortOperator, Oid sortCollation,
|
||||
bool nullsFirstFlag,
|
||||
|
@ -523,8 +523,8 @@ SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
|
||||
SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
|
||||
f1
|
||||
-------------------
|
||||
(0,0)
|
||||
(1e-300,-1e-300)
|
||||
(0,0)
|
||||
(-3,4)
|
||||
(-10,0)
|
||||
(10,10)
|
||||
@ -561,8 +561,8 @@ SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
|
||||
SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
|
||||
f1
|
||||
-------------------
|
||||
(0,0)
|
||||
(1e-300,-1e-300)
|
||||
(0,0)
|
||||
(-3,4)
|
||||
(-10,0)
|
||||
(10,10)
|
||||
@ -584,8 +584,8 @@ SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0
|
||||
SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
|
||||
f1
|
||||
------------------
|
||||
(0,0)
|
||||
(1e-300,-1e-300)
|
||||
(0,0)
|
||||
(-3,4)
|
||||
(-10,0)
|
||||
(10,10)
|
||||
|
Loading…
Reference in New Issue
Block a user