mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
Optimize nbtree backward scan boundary cases.
Teach _bt_binsrch (and related helper routines like _bt_search and
_bt_compare) about the initial positioning requirements of backward
scans. Routines like _bt_binsrch already know all about "nextkey"
searches, so it seems natural to teach them about "goback"/backward
searches, too. These concepts are closely related, and are much easier
to understand when discussed together.
Now that certain implementation details are hidden from _bt_first, it's
straightforward to add a new optimization: backward scans using the <
strategy now avoid extra leaf page accesses in certain "boundary cases".
Consider the following example, which uses the tenk1 table (and its
tenk1_hundred index) from the standard regression tests:
SELECT * FROM tenk1 WHERE hundred < 12 ORDER BY hundred DESC LIMIT 1;
Before this commit, nbtree would scan two leaf pages, even though it was
only really necessary to scan one leaf page. We'll now descend straight
to the leaf page containing a (12, -inf) high key instead. The scan
will locate matching non-pivot tuples with "hundred" values starting
from the value 11. The scan won't waste a page access on the right
sibling leaf page, which cannot possibly contain any matching tuples.
You can think of the optimization added by this commit as disabling an
optimization (the _bt_compare "!pivotsearch" behavior that was added to
Postgres 12 in commit dd299df8
) for a small subset of cases where it was
always counterproductive.
Equivalently, you can think of the new optimization as extending the
"pivotsearch" behavior that page deletion by VACUUM has long required
(since the aforementioned Postgres 12 commit went in) to other, similar
cases. Obviously, this isn't strictly necessary for these new cases
(unlike VACUUM, _bt_first is prepared to move the scan to the left once
on the leaf level), but the underlying principle is the same.
Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-Wz=XPzM8HzaLPq278Vms420mVSHfgs9wi5tjFKHcapZCEw@mail.gmail.com
This commit is contained in:
parent
b437571714
commit
c9c0589fda
@ -3165,7 +3165,7 @@ invariant_l_offset(BtreeCheckState *state, BTScanInsert key,
|
||||
ItemId itemid;
|
||||
int32 cmp;
|
||||
|
||||
Assert(key->pivotsearch);
|
||||
Assert(!key->nextkey && key->backward);
|
||||
|
||||
/* Verify line pointer before checking tuple */
|
||||
itemid = PageGetItemIdCareful(state, state->targetblock, state->target,
|
||||
@ -3227,7 +3227,7 @@ invariant_leq_offset(BtreeCheckState *state, BTScanInsert key,
|
||||
{
|
||||
int32 cmp;
|
||||
|
||||
Assert(key->pivotsearch);
|
||||
Assert(!key->nextkey && key->backward);
|
||||
|
||||
cmp = _bt_compare(state->rel, key, state->target, upperbound);
|
||||
|
||||
@ -3250,7 +3250,7 @@ invariant_g_offset(BtreeCheckState *state, BTScanInsert key,
|
||||
{
|
||||
int32 cmp;
|
||||
|
||||
Assert(key->pivotsearch);
|
||||
Assert(!key->nextkey && key->backward);
|
||||
|
||||
cmp = _bt_compare(state->rel, key, state->target, lowerbound);
|
||||
|
||||
@ -3288,7 +3288,7 @@ invariant_l_nontarget_offset(BtreeCheckState *state, BTScanInsert key,
|
||||
ItemId itemid;
|
||||
int32 cmp;
|
||||
|
||||
Assert(key->pivotsearch);
|
||||
Assert(!key->nextkey && key->backward);
|
||||
|
||||
/* Verify line pointer before checking tuple */
|
||||
itemid = PageGetItemIdCareful(state, nontargetblock, nontarget,
|
||||
@ -3514,9 +3514,9 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum)
|
||||
* For example, invariant_g_offset() might miss a cross-page invariant failure
|
||||
* on an internal level if the scankey built from the first item on the
|
||||
* target's right sibling page happened to be equal to (not greater than) the
|
||||
* last item on target page. The !pivotsearch tiebreaker in _bt_compare()
|
||||
* might otherwise cause amcheck to assume (rather than actually verify) that
|
||||
* the scankey is greater.
|
||||
* last item on target page. The !backward tiebreaker in _bt_compare() might
|
||||
* otherwise cause amcheck to assume (rather than actually verify) that the
|
||||
* scankey is greater.
|
||||
*/
|
||||
static inline BTScanInsert
|
||||
bt_mkscankey_pivotsearch(Relation rel, IndexTuple itup)
|
||||
@ -3524,7 +3524,7 @@ bt_mkscankey_pivotsearch(Relation rel, IndexTuple itup)
|
||||
BTScanInsert skey;
|
||||
|
||||
skey = _bt_mkscankey(rel, itup);
|
||||
skey->pivotsearch = true;
|
||||
skey->backward = true;
|
||||
|
||||
return skey;
|
||||
}
|
||||
|
@ -1958,10 +1958,20 @@ _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
|
||||
return;
|
||||
}
|
||||
|
||||
/* we need an insertion scan key for the search, so build one */
|
||||
/*
|
||||
* We need an insertion scan key, so build one.
|
||||
*
|
||||
* _bt_search searches for the leaf page that contains any
|
||||
* matching non-pivot tuples, but we need it to "search" for
|
||||
* the high key pivot from the page that we're set to delete.
|
||||
* Compensate for the mismatch by having _bt_search locate the
|
||||
* last position < equal-to-untruncated-prefix non-pivots.
|
||||
*/
|
||||
itup_key = _bt_mkscankey(rel, targetkey);
|
||||
/* find the leftmost leaf page with matching pivot/high key */
|
||||
itup_key->pivotsearch = true;
|
||||
|
||||
/* Set up a BTLessStrategyNumber-like insertion scan key */
|
||||
itup_key->nextkey = false;
|
||||
itup_key->backward = true;
|
||||
stack = _bt_search(rel, NULL, itup_key, &sleafbuf, BT_READ);
|
||||
/* won't need a second lock or pin on leafbuf */
|
||||
_bt_relbuf(rel, sleafbuf);
|
||||
|
@ -317,18 +317,17 @@ _bt_moveright(Relation rel,
|
||||
/*
|
||||
* _bt_binsrch() -- Do a binary search for a key on a particular page.
|
||||
*
|
||||
* On a leaf page, _bt_binsrch() returns the OffsetNumber of the first
|
||||
* key >= given scankey, or > scankey if nextkey is true. (NOTE: in
|
||||
* particular, this means it is possible to return a value 1 greater than the
|
||||
* number of keys on the page, if the scankey is > all keys on the page.)
|
||||
*
|
||||
* On an internal (non-leaf) page, _bt_binsrch() returns the OffsetNumber
|
||||
* of the last key < given scankey, or last key <= given scankey if nextkey
|
||||
* is true. (Since _bt_compare treats the first data key of such a page as
|
||||
* minus infinity, there will be at least one key < scankey, so the result
|
||||
* always points at one of the keys on the page.) This key indicates the
|
||||
* right place to descend to be sure we find all leaf keys >= given scankey
|
||||
* (or leaf keys > given scankey when nextkey is true).
|
||||
* always points at one of the keys on the page.)
|
||||
*
|
||||
* On a leaf page, _bt_binsrch() returns the final result of the initial
|
||||
* positioning process that started with _bt_first's call to _bt_search.
|
||||
* We're returning a non-pivot tuple offset, so things are a little different.
|
||||
* It is possible that we'll return an offset that's either past the last
|
||||
* non-pivot slot, or (in the case of a backward scan) before the first slot.
|
||||
*
|
||||
* This procedure is not responsible for walking right, it just examines
|
||||
* the given page. _bt_binsrch() has no lock or refcount side effects
|
||||
@ -362,7 +361,7 @@ _bt_binsrch(Relation rel,
|
||||
* this covers two cases: the page is really empty (no keys), or it
|
||||
* contains only a high key. The latter case is possible after vacuuming.
|
||||
* This can never happen on an internal page, however, since they are
|
||||
* never empty (an internal page must have children).
|
||||
* never empty (an internal page must have at least one child).
|
||||
*/
|
||||
if (unlikely(high < low))
|
||||
return low;
|
||||
@ -398,18 +397,45 @@ _bt_binsrch(Relation rel,
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have high == low, but be careful: they could point
|
||||
* past the last slot on the page.
|
||||
* At this point we have high == low.
|
||||
*
|
||||
* On a leaf page, we always return the first key >= scan key (resp. >
|
||||
* scan key), which could be the last slot + 1.
|
||||
* On a leaf page we always return the first non-pivot tuple >= scan key
|
||||
* (resp. > scan key) for forward scan callers. For backward scans, it's
|
||||
* always the _last_ non-pivot tuple < scan key (resp. <= scan key).
|
||||
*/
|
||||
if (P_ISLEAF(opaque))
|
||||
{
|
||||
/*
|
||||
* In the backward scan case we're supposed to locate the last
|
||||
* matching tuple on the leaf level -- not the first matching tuple
|
||||
* (the last tuple will be the first one returned by the scan).
|
||||
*
|
||||
* At this point we've located the first non-pivot tuple immediately
|
||||
* after the last matching tuple (which might just be maxoff + 1).
|
||||
* Compensate by stepping back.
|
||||
*/
|
||||
if (key->backward)
|
||||
return OffsetNumberPrev(low);
|
||||
|
||||
return low;
|
||||
}
|
||||
|
||||
/*
|
||||
* On a non-leaf page, return the last key < scan key (resp. <= scan key).
|
||||
* There must be one if _bt_compare() is playing by the rules.
|
||||
*
|
||||
* _bt_compare() will seldom see any exactly-matching pivot tuples, since
|
||||
* a truncated -inf heap TID is usually enough to prevent it altogether.
|
||||
* Even omitted scan key entries are treated as > truncated attributes.
|
||||
*
|
||||
* However, during backward scans _bt_compare() interprets omitted scan
|
||||
* key attributes as == corresponding truncated -inf attributes instead.
|
||||
* This works just like < would work here. Under this scheme, < strategy
|
||||
* backward scans will always directly descend to the correct leaf page.
|
||||
* In particular, they will never incur an "extra" leaf page access with a
|
||||
* scan key that happens to contain the same prefix of values as some
|
||||
* pivot tuple's untruncated prefix. VACUUM relies on this guarantee when
|
||||
* it uses a leaf page high key to "re-find" a page undergoing deletion.
|
||||
*/
|
||||
Assert(low > P_FIRSTDATAKEY(opaque));
|
||||
|
||||
@ -767,10 +793,12 @@ _bt_compare(Relation rel,
|
||||
if (key->scantid == NULL)
|
||||
{
|
||||
/*
|
||||
* Most searches have a scankey that is considered greater than a
|
||||
* Forward scans have a scankey that is considered greater than a
|
||||
* truncated pivot tuple if and when the scankey has equal values for
|
||||
* attributes up to and including the least significant untruncated
|
||||
* attribute in tuple.
|
||||
* attribute in tuple. Even attributes that were omitted from the
|
||||
* scan key are considered greater than -inf truncated attributes.
|
||||
* (See _bt_binsrch for an explanation of our backward scan behavior.)
|
||||
*
|
||||
* For example, if an index has the minimum two attributes (single
|
||||
* user key attribute, plus heap TID attribute), and a page's high key
|
||||
@ -782,25 +810,13 @@ _bt_compare(Relation rel,
|
||||
* doesn't have to descend left because it isn't interested in a match
|
||||
* that has a heap TID value of -inf.
|
||||
*
|
||||
* However, some searches (pivotsearch searches) actually require that
|
||||
* we descend left when this happens. -inf is treated as a possible
|
||||
* match for omitted scankey attribute(s). This is needed by page
|
||||
* deletion, which must re-find leaf pages that are targets for
|
||||
* deletion using their high keys.
|
||||
*
|
||||
* Note: the heap TID part of the test ensures that scankey is being
|
||||
* compared to a pivot tuple with one or more truncated key
|
||||
* attributes.
|
||||
*
|
||||
* Note: pg_upgrade'd !heapkeyspace indexes must always descend to the
|
||||
* left here, since they have no heap TID attribute (and cannot have
|
||||
* any -inf key values in any case, since truncation can only remove
|
||||
* non-key attributes). !heapkeyspace searches must always be
|
||||
* prepared to deal with matches on both sides of the pivot once the
|
||||
* leaf level is reached.
|
||||
* compared to a pivot tuple with one or more truncated -inf key
|
||||
* attributes. The heap TID attribute is the last key attribute in
|
||||
* every index, of course, but other than that it isn't special.
|
||||
*/
|
||||
if (key->heapkeyspace && !key->pivotsearch &&
|
||||
key->keysz == ntupatts && heapTid == NULL)
|
||||
if (!key->backward && key->keysz == ntupatts && heapTid == NULL &&
|
||||
key->heapkeyspace)
|
||||
return 1;
|
||||
|
||||
/* All provided scankey arguments found to be equal */
|
||||
@ -865,12 +881,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
BTStack stack;
|
||||
OffsetNumber offnum;
|
||||
StrategyNumber strat;
|
||||
bool nextkey;
|
||||
bool goback;
|
||||
BTScanInsertData inskey;
|
||||
ScanKey startKeys[INDEX_MAX_KEYS];
|
||||
ScanKeyData notnullkeys[INDEX_MAX_KEYS];
|
||||
int keysCount = 0;
|
||||
int keysz = 0;
|
||||
int i;
|
||||
bool status;
|
||||
StrategyNumber strat_total;
|
||||
@ -1005,7 +1019,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
ScanDirectionIsBackward(dir)))
|
||||
{
|
||||
/* Yes, so build the key in notnullkeys[keysCount] */
|
||||
chosen = ¬nullkeys[keysCount];
|
||||
chosen = ¬nullkeys[keysz];
|
||||
ScanKeyEntryInitialize(chosen,
|
||||
(SK_SEARCHNOTNULL | SK_ISNULL |
|
||||
(impliesNN->sk_flags &
|
||||
@ -1026,7 +1040,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
*/
|
||||
if (chosen == NULL)
|
||||
break;
|
||||
startKeys[keysCount++] = chosen;
|
||||
startKeys[keysz++] = chosen;
|
||||
|
||||
/*
|
||||
* Adjust strat_total, and quit if we have stored a > or <
|
||||
@ -1100,7 +1114,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* the tree. Walk down that edge to the first or last key, and scan from
|
||||
* there.
|
||||
*/
|
||||
if (keysCount == 0)
|
||||
if (keysz == 0)
|
||||
{
|
||||
bool match;
|
||||
|
||||
@ -1122,8 +1136,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* identified by startKeys[]. (Remaining insertion scankey fields are
|
||||
* initialized after initial-positioning strategy is finalized.)
|
||||
*/
|
||||
Assert(keysCount <= INDEX_MAX_KEYS);
|
||||
for (i = 0; i < keysCount; i++)
|
||||
Assert(keysz <= INDEX_MAX_KEYS);
|
||||
for (i = 0; i < keysz; i++)
|
||||
{
|
||||
ScanKey cur = startKeys[i];
|
||||
|
||||
@ -1164,7 +1178,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* did use has to be treated as just a ">=" or "<=" condition, and
|
||||
* so we'd better adjust strat_total accordingly.
|
||||
*/
|
||||
if (i == keysCount - 1)
|
||||
if (i == keysz - 1)
|
||||
{
|
||||
bool used_all_subkeys = false;
|
||||
|
||||
@ -1173,16 +1187,16 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
subkey++;
|
||||
Assert(subkey->sk_flags & SK_ROW_MEMBER);
|
||||
if (subkey->sk_attno != keysCount + 1)
|
||||
if (subkey->sk_attno != keysz + 1)
|
||||
break; /* out-of-sequence, can't use it */
|
||||
if (subkey->sk_strategy != cur->sk_strategy)
|
||||
break; /* wrong direction, can't use it */
|
||||
if (subkey->sk_flags & SK_ISNULL)
|
||||
break; /* can't use null keys */
|
||||
Assert(keysCount < INDEX_MAX_KEYS);
|
||||
memcpy(inskey.scankeys + keysCount, subkey,
|
||||
Assert(keysz < INDEX_MAX_KEYS);
|
||||
memcpy(inskey.scankeys + keysz, subkey,
|
||||
sizeof(ScanKeyData));
|
||||
keysCount++;
|
||||
keysz++;
|
||||
if (subkey->sk_flags & SK_ROW_END)
|
||||
{
|
||||
used_all_subkeys = true;
|
||||
@ -1263,40 +1277,26 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
/*----------
|
||||
* Examine the selected initial-positioning strategy to determine exactly
|
||||
* where we need to start the scan, and set flag variables to control the
|
||||
* code below.
|
||||
*
|
||||
* If nextkey = false, _bt_search and _bt_binsrch will locate the first
|
||||
* item >= scan key. If nextkey = true, they will locate the first
|
||||
* item > scan key.
|
||||
*
|
||||
* If goback = true, we will then step back one item, while if
|
||||
* goback = false, we will start the scan on the located item.
|
||||
* initial descent by _bt_search (and our _bt_binsrch call for the leaf
|
||||
* page _bt_search returns).
|
||||
*----------
|
||||
*/
|
||||
_bt_metaversion(rel, &inskey.heapkeyspace, &inskey.allequalimage);
|
||||
inskey.anynullkeys = false; /* unused */
|
||||
inskey.scantid = NULL;
|
||||
inskey.keysz = keysz;
|
||||
switch (strat_total)
|
||||
{
|
||||
case BTLessStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item >= scankey, then back up one to arrive at last
|
||||
* item < scankey. (Note: this positioning strategy is only used
|
||||
* for a backward scan, so that is always the correct starting
|
||||
* position.)
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = true;
|
||||
inskey.nextkey = false;
|
||||
inskey.backward = true;
|
||||
break;
|
||||
|
||||
case BTLessEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item > scankey, then back up one to arrive at last
|
||||
* item <= scankey. (Note: this positioning strategy is only used
|
||||
* for a backward scan, so that is always the correct starting
|
||||
* position.)
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = true;
|
||||
inskey.nextkey = true;
|
||||
inskey.backward = true;
|
||||
break;
|
||||
|
||||
case BTEqualStrategyNumber:
|
||||
@ -1308,41 +1308,37 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
if (ScanDirectionIsBackward(dir))
|
||||
{
|
||||
/*
|
||||
* This is the same as the <= strategy. We will check at the
|
||||
* end whether the found item is actually =.
|
||||
* This is the same as the <= strategy
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = true;
|
||||
inskey.nextkey = true;
|
||||
inskey.backward = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* This is the same as the >= strategy. We will check at the
|
||||
* end whether the found item is actually =.
|
||||
* This is the same as the >= strategy
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = false;
|
||||
inskey.nextkey = false;
|
||||
inskey.backward = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case BTGreaterEqualStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item >= scankey. (This is only used for forward
|
||||
* scans.)
|
||||
* Find first item >= scankey
|
||||
*/
|
||||
nextkey = false;
|
||||
goback = false;
|
||||
inskey.nextkey = false;
|
||||
inskey.backward = false;
|
||||
break;
|
||||
|
||||
case BTGreaterStrategyNumber:
|
||||
|
||||
/*
|
||||
* Find first item > scankey. (This is only used for forward
|
||||
* scans.)
|
||||
* Find first item > scankey
|
||||
*/
|
||||
nextkey = true;
|
||||
goback = false;
|
||||
inskey.nextkey = true;
|
||||
inskey.backward = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -1351,18 +1347,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Initialize remaining insertion scan key fields */
|
||||
_bt_metaversion(rel, &inskey.heapkeyspace, &inskey.allequalimage);
|
||||
inskey.anynullkeys = false; /* unused */
|
||||
inskey.nextkey = nextkey;
|
||||
inskey.pivotsearch = false;
|
||||
inskey.scantid = NULL;
|
||||
inskey.keysz = keysCount;
|
||||
|
||||
/*
|
||||
* Use the manufactured insertion scan key to descend the tree and
|
||||
* position ourselves on the target leaf page.
|
||||
*/
|
||||
Assert(ScanDirectionIsBackward(dir) == inskey.backward);
|
||||
stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ);
|
||||
|
||||
/* don't need to keep the stack around... */
|
||||
@ -1404,35 +1393,28 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
/* position to the precise item on the page */
|
||||
offnum = _bt_binsrch(rel, &inskey, buf);
|
||||
|
||||
/*
|
||||
* If nextkey = false, we are positioned at the first item >= scan key, or
|
||||
* possibly at the end of a page on which all the existing items are less
|
||||
* than the scan key and we know that everything on later pages is greater
|
||||
* than or equal to scan key.
|
||||
*
|
||||
* If nextkey = true, we are positioned at the first item > scan key, or
|
||||
* possibly at the end of a page on which all the existing items are less
|
||||
* than or equal to the scan key and we know that everything on later
|
||||
* pages is greater than scan key.
|
||||
*
|
||||
* The actually desired starting point is either this item or the prior
|
||||
* one, or in the end-of-page case it's the first item on the next page or
|
||||
* the last item on this page. Adjust the starting offset if needed. (If
|
||||
* this results in an offset before the first item or after the last one,
|
||||
* _bt_readpage will report no items found, and then we'll step to the
|
||||
* next page as needed.)
|
||||
*/
|
||||
if (goback)
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
|
||||
/* remember which buffer we have pinned, if any */
|
||||
Assert(!BTScanPosIsValid(so->currPos));
|
||||
so->currPos.buf = buf;
|
||||
so->firstPage = true;
|
||||
|
||||
/*
|
||||
* Now load data from the first page of the scan.
|
||||
*
|
||||
* If inskey.nextkey = false and inskey.backward = false, offnum is
|
||||
* positioned at the first non-pivot tuple >= inskey.scankeys.
|
||||
*
|
||||
* If inskey.nextkey = false and inskey.backward = true, offnum is
|
||||
* positioned at the last non-pivot tuple < inskey.scankeys.
|
||||
*
|
||||
* If inskey.nextkey = true and inskey.backward = false, offnum is
|
||||
* positioned at the first non-pivot tuple > inskey.scankeys.
|
||||
*
|
||||
* If inskey.nextkey = true and inskey.backward = true, offnum is
|
||||
* positioned at the last non-pivot tuple <= inskey.scankeys.
|
||||
*
|
||||
* It's possible that _bt_binsrch returned an offnum that is out of bounds
|
||||
* for the page. For example, when inskey is both < the leaf page's high
|
||||
* key and > all of its non-pivot tuples, offnum will be "maxoff + 1".
|
||||
*/
|
||||
if (!_bt_readpage(scan, dir, offnum))
|
||||
{
|
||||
@ -1446,7 +1428,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Drop the lock, and maybe the pin, on the current page */
|
||||
/* We have at least one item to return as scan's first item */
|
||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
||||
}
|
||||
|
||||
@ -1523,6 +1505,14 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
|
||||
* moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
|
||||
* that there can be no more matching tuples in the current scan direction.
|
||||
*
|
||||
* _bt_first caller passes us an offnum returned by _bt_binsrch, which might
|
||||
* be an out of bounds offnum such as "maxoff + 1" in certain corner cases.
|
||||
* _bt_checkkeys will stop the scan as soon as an equality qual fails (when
|
||||
* its scan key was marked required), so _bt_first _must_ pass us an offnum
|
||||
* exactly at the beginning of where equal tuples are to be found. When we're
|
||||
* passed an offnum past the end of the page, we might still manage to stop
|
||||
* the scan on this page by calling _bt_checkkeys against the high key.
|
||||
*
|
||||
* In the case of a parallel scan, caller must have called _bt_parallel_seize
|
||||
* prior to calling this function; this function will invoke
|
||||
* _bt_parallel_release before returning.
|
||||
@ -1658,6 +1648,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
|
||||
}
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, iid);
|
||||
Assert(!BTreeTupleIsPivot(itup));
|
||||
|
||||
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
|
||||
&continuescan, requiredMatchedByPrecheck);
|
||||
@ -1777,6 +1768,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
|
||||
tuple_alive = true;
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, iid);
|
||||
Assert(!BTreeTupleIsPivot(itup));
|
||||
|
||||
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
|
||||
&continuescan, requiredMatchedByPrecheck);
|
||||
@ -2034,7 +2026,7 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
if (!_bt_readnextpage(scan, blkno, dir))
|
||||
return false;
|
||||
|
||||
/* Drop the lock, and maybe the pin, on the current page */
|
||||
/* We have at least one item to return as scan's next item */
|
||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
||||
|
||||
return true;
|
||||
@ -2235,7 +2227,7 @@ _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
|
||||
if (!_bt_readnextpage(scan, blkno, dir))
|
||||
return false;
|
||||
|
||||
/* Drop the lock, and maybe the pin, on the current page */
|
||||
/* We have at least one item to return as scan's next item */
|
||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
||||
|
||||
return true;
|
||||
@ -2525,7 +2517,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Drop the lock, and maybe the pin, on the current page */
|
||||
/* We have at least one item to return as scan's first item */
|
||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
||||
}
|
||||
|
||||
|
@ -62,14 +62,6 @@ static int _bt_keep_natts(Relation rel, IndexTuple lastleft,
|
||||
* Build an insertion scan key that contains comparison data from itup
|
||||
* as well as comparator routines appropriate to the key datatypes.
|
||||
*
|
||||
* When itup is a non-pivot tuple, the returned insertion scan key is
|
||||
* suitable for finding a place for it to go on the leaf level. Pivot
|
||||
* tuples can be used to re-find leaf page with matching high key, but
|
||||
* then caller needs to set scan key's pivotsearch field to true. This
|
||||
* allows caller to search for a leaf page with a matching high key,
|
||||
* which is usually to the left of the first leaf page a non-pivot match
|
||||
* might appear on.
|
||||
*
|
||||
* The result is intended for use with _bt_compare() and _bt_truncate().
|
||||
* Callers that don't need to fill out the insertion scankey arguments
|
||||
* (e.g. they use an ad-hoc comparison routine, or only need a scankey
|
||||
@ -120,8 +112,8 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
|
||||
key->allequalimage = false;
|
||||
}
|
||||
key->anynullkeys = false; /* initial assumption */
|
||||
key->nextkey = false;
|
||||
key->pivotsearch = false;
|
||||
key->nextkey = false; /* usual case, required by btinsert */
|
||||
key->backward = false; /* usual case, required by btinsert */
|
||||
key->keysz = Min(indnkeyatts, tupnatts);
|
||||
key->scantid = key->heapkeyspace && itup ?
|
||||
BTreeTupleGetHeapTID(itup) : NULL;
|
||||
|
@ -763,13 +763,8 @@ typedef BTStackData *BTStack;
|
||||
* bit, but may not when inserting into an INCLUDE index (tuple header value
|
||||
* is affected by the NULL-ness of both key and non-key attributes).
|
||||
*
|
||||
* When nextkey is false (the usual case), _bt_search and _bt_binsrch will
|
||||
* locate the first item >= scankey. When nextkey is true, they will locate
|
||||
* the first item > scan key.
|
||||
*
|
||||
* pivotsearch is set to true by callers that want to re-find a leaf page
|
||||
* using a scankey built from a leaf page's high key. Most callers set this
|
||||
* to false.
|
||||
* See comments in _bt_first for an explanation of the nextkey and backward
|
||||
* fields.
|
||||
*
|
||||
* scantid is the heap TID that is used as a final tiebreaker attribute. It
|
||||
* is set to NULL when index scan doesn't need to find a position for a
|
||||
@ -792,7 +787,7 @@ typedef struct BTScanInsertData
|
||||
bool allequalimage;
|
||||
bool anynullkeys;
|
||||
bool nextkey;
|
||||
bool pivotsearch;
|
||||
bool backward; /* backward index scan? */
|
||||
ItemPointer scantid; /* tiebreaker for scankeys */
|
||||
int keysz; /* Size of scankeys array */
|
||||
ScanKeyData scankeys[INDEX_MAX_KEYS]; /* Must appear last */
|
||||
|
@ -142,6 +142,53 @@ SELECT b.*
|
||||
4500 | 2080851358
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- Add coverage for optimization of backwards scan index descents
|
||||
--
|
||||
-- Here we expect _bt_search to descend straight to a leaf page containing a
|
||||
-- non-pivot tuple with the value '47', which comes last (after 11 similar
|
||||
-- non-pivot tuples). Query execution should only need to visit a single
|
||||
-- leaf page here.
|
||||
--
|
||||
-- Test case relies on tenk1_hundred index having a leaf page whose high key
|
||||
-- is '(48, -inf)'. We use a low cardinality index to make our test case less
|
||||
-- sensitive to implementation details that may change in the future.
|
||||
set enable_seqscan to false;
|
||||
set enable_indexscan to true;
|
||||
set enable_bitmapscan to false;
|
||||
explain (costs off)
|
||||
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Limit
|
||||
-> Index Scan Backward using tenk1_hundred on tenk1
|
||||
Index Cond: (hundred < 48)
|
||||
(3 rows)
|
||||
|
||||
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
|
||||
hundred | twenty
|
||||
---------+--------
|
||||
47 | 7
|
||||
(1 row)
|
||||
|
||||
-- This variant of the query need only return a single tuple located to the immediate
|
||||
-- right of the '(48, -inf)' high key. It also only needs to scan one single
|
||||
-- leaf page (the right sibling of the page scanned by the last test case):
|
||||
explain (costs off)
|
||||
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Limit
|
||||
-> Index Scan Backward using tenk1_hundred on tenk1
|
||||
Index Cond: (hundred <= 48)
|
||||
(3 rows)
|
||||
|
||||
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
|
||||
hundred | twenty
|
||||
---------+--------
|
||||
48 | 8
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- Check correct optimization of LIKE (special index operator support)
|
||||
-- for both indexscan and bitmapscan cases
|
||||
|
@ -110,6 +110,31 @@ SELECT b.*
|
||||
FROM bt_f8_heap b
|
||||
WHERE b.seqno = '4500'::float8;
|
||||
|
||||
--
|
||||
-- Add coverage for optimization of backwards scan index descents
|
||||
--
|
||||
-- Here we expect _bt_search to descend straight to a leaf page containing a
|
||||
-- non-pivot tuple with the value '47', which comes last (after 11 similar
|
||||
-- non-pivot tuples). Query execution should only need to visit a single
|
||||
-- leaf page here.
|
||||
--
|
||||
-- Test case relies on tenk1_hundred index having a leaf page whose high key
|
||||
-- is '(48, -inf)'. We use a low cardinality index to make our test case less
|
||||
-- sensitive to implementation details that may change in the future.
|
||||
set enable_seqscan to false;
|
||||
set enable_indexscan to true;
|
||||
set enable_bitmapscan to false;
|
||||
explain (costs off)
|
||||
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
|
||||
select hundred, twenty from tenk1 where hundred < 48 order by hundred desc limit 1;
|
||||
|
||||
-- This variant of the query need only return a single tuple located to the immediate
|
||||
-- right of the '(48, -inf)' high key. It also only needs to scan one single
|
||||
-- leaf page (the right sibling of the page scanned by the last test case):
|
||||
explain (costs off)
|
||||
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
|
||||
select hundred, twenty from tenk1 where hundred <= 48 order by hundred desc limit 1;
|
||||
|
||||
--
|
||||
-- Check correct optimization of LIKE (special index operator support)
|
||||
-- for both indexscan and bitmapscan cases
|
||||
|
Loading…
Reference in New Issue
Block a user