2017-02-03 03:12:58 +08:00
|
|
|
/*
|
|
|
|
* hashfuncs.c
|
|
|
|
* Functions to investigate the content of HASH indexes
|
|
|
|
*
|
2022-01-08 08:04:57 +08:00
|
|
|
* Copyright (c) 2017-2022, PostgreSQL Global Development Group
|
2017-02-03 03:12:58 +08:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* contrib/pageinspect/hashfuncs.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/hash.h"
|
|
|
|
#include "access/htup_details.h"
|
|
|
|
#include "catalog/pg_am.h"
|
2019-10-23 11:56:22 +08:00
|
|
|
#include "catalog/pg_type.h"
|
2017-02-03 03:12:58 +08:00
|
|
|
#include "funcapi.h"
|
|
|
|
#include "miscadmin.h"
|
2019-10-23 11:56:22 +08:00
|
|
|
#include "pageinspect.h"
|
2019-08-17 01:33:30 +08:00
|
|
|
#include "utils/array.h"
|
2017-02-03 03:12:58 +08:00
|
|
|
#include "utils/builtins.h"
|
2018-04-15 08:12:14 +08:00
|
|
|
#include "utils/rel.h"
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(hash_page_type);
|
|
|
|
PG_FUNCTION_INFO_V1(hash_page_stats);
|
|
|
|
PG_FUNCTION_INFO_V1(hash_page_items);
|
|
|
|
PG_FUNCTION_INFO_V1(hash_bitmap_info);
|
|
|
|
PG_FUNCTION_INFO_V1(hash_metapage_info);
|
|
|
|
|
|
|
|
#define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
|
|
|
|
|
|
|
|
/* ------------------------------------------------
|
|
|
|
* structure for single hash page statistics
|
|
|
|
* ------------------------------------------------
|
|
|
|
*/
|
|
|
|
typedef struct HashPageStat
|
|
|
|
{
|
2017-02-03 11:29:29 +08:00
|
|
|
int live_items;
|
|
|
|
int dead_items;
|
|
|
|
int page_size;
|
|
|
|
int free_size;
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
/* opaque data */
|
|
|
|
BlockNumber hasho_prevblkno;
|
|
|
|
BlockNumber hasho_nextblkno;
|
|
|
|
Bucket hasho_bucket;
|
|
|
|
uint16 hasho_flag;
|
|
|
|
uint16 hasho_page_id;
|
|
|
|
} HashPageStat;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that the given bytea contains a HASH page, or die in the attempt.
|
2017-02-04 00:34:41 +08:00
|
|
|
* A pointer to a palloc'd, properly aligned copy of the page is returned.
|
2017-02-03 03:12:58 +08:00
|
|
|
*/
|
|
|
|
static Page
|
|
|
|
verify_hash_page(bytea *raw_page, int flags)
|
|
|
|
{
|
2017-02-04 00:34:41 +08:00
|
|
|
Page page = get_page_from_raw(raw_page);
|
2017-04-06 02:17:23 +08:00
|
|
|
int pagetype = LH_UNUSED_PAGE;
|
2017-02-03 03:12:58 +08:00
|
|
|
|
2017-04-06 02:17:23 +08:00
|
|
|
/* Treat new pages as unused. */
|
|
|
|
if (!PageIsNew(page))
|
|
|
|
{
|
|
|
|
HashPageOpaque pageopaque;
|
2017-02-03 03:12:58 +08:00
|
|
|
|
2017-04-06 02:17:23 +08:00
|
|
|
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
|
|
|
|
ereport(ERROR,
|
pageinspect: Add more sanity checks to prevent out-of-bound reads
A couple of code paths use the special area on the page passed by the
function caller, expecting to find some data in it. However, feeding
an incorrect page can lead to out-of-bound reads when trying to access
the page special area (like a heap page that has no special area,
leading PageGetSpecialPointer() to grab a pointer outside the allocated
page).
The functions used for hash and btree indexes have some protection
already against that, while some other functions using a relation OID
as argument would make sure that the access method involved is correct,
but functions taking in input a raw page without knowing the relation
the page is attached to would run into problems.
This commit improves the set of checks used in the code paths of BRIN,
btree (including one check if a leaf page is found with a non-zero
level), GIN and GiST to verify that the page given in input has a
special area size that fits with each access method, which is done
though PageGetSpecialSize(), becore calling PageGetSpecialPointer().
The scope of the checks done is limited to work with pages that one
would pass after getting a block with get_raw_page(), as it is possible
to craft byteas that could bypass existing code paths. Having too many
checks would also impact the usability of pageinspect, as the existing
code is very useful to look at the content details in a corrupted page,
so the focus is really to avoid out-of-bound reads as this is never a
good thing even with functions whose execution is limited to
superusers.
The safest approach could be to rework the functions so as these fetch a
block using a relation OID and a block number, but there are also cases
where using a raw page is useful.
Tests are added to cover all the code paths that needed such checks, and
an error message for hash indexes is reworded to fit better with what
this commit adds.
Reported-By: Alexander Lakhin
Author: Julien Rouhaud, Michael Paquier
Discussion: https://postgr.es/m/16527-ef7606186f0610a1@postgresql.org
Discussion: https://postgr.es/m/561e187b-3549-c8d5-03f5-525c14e65bd0@postgrespro.ru
Backpatch-through: 10
2022-03-27 16:53:40 +08:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("input page is not a valid %s page", "hash"),
|
|
|
|
errdetail("Expected special size %d, got %d.",
|
|
|
|
(int) MAXALIGN(sizeof(HashPageOpaqueData)),
|
|
|
|
(int) PageGetSpecialSize(page))));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
2017-04-06 02:17:23 +08:00
|
|
|
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
pageinspect: Add more sanity checks to prevent out-of-bound reads
A couple of code paths use the special area on the page passed by the
function caller, expecting to find some data in it. However, feeding
an incorrect page can lead to out-of-bound reads when trying to access
the page special area (like a heap page that has no special area,
leading PageGetSpecialPointer() to grab a pointer outside the allocated
page).
The functions used for hash and btree indexes have some protection
already against that, while some other functions using a relation OID
as argument would make sure that the access method involved is correct,
but functions taking in input a raw page without knowing the relation
the page is attached to would run into problems.
This commit improves the set of checks used in the code paths of BRIN,
btree (including one check if a leaf page is found with a non-zero
level), GIN and GiST to verify that the page given in input has a
special area size that fits with each access method, which is done
though PageGetSpecialSize(), becore calling PageGetSpecialPointer().
The scope of the checks done is limited to work with pages that one
would pass after getting a block with get_raw_page(), as it is possible
to craft byteas that could bypass existing code paths. Having too many
checks would also impact the usability of pageinspect, as the existing
code is very useful to look at the content details in a corrupted page,
so the focus is really to avoid out-of-bound reads as this is never a
good thing even with functions whose execution is limited to
superusers.
The safest approach could be to rework the functions so as these fetch a
block using a relation OID and a block number, but there are also cases
where using a raw page is useful.
Tests are added to cover all the code paths that needed such checks, and
an error message for hash indexes is reworded to fit better with what
this commit adds.
Reported-By: Alexander Lakhin
Author: Julien Rouhaud, Michael Paquier
Discussion: https://postgr.es/m/16527-ef7606186f0610a1@postgresql.org
Discussion: https://postgr.es/m/561e187b-3549-c8d5-03f5-525c14e65bd0@postgrespro.ru
Backpatch-through: 10
2022-03-27 16:53:40 +08:00
|
|
|
errmsg("input page is not a valid %s page", "hash"),
|
2017-04-06 02:17:23 +08:00
|
|
|
errdetail("Expected %08x, got %08x.",
|
|
|
|
HASHO_PAGE_ID, pageopaque->hasho_page_id)));
|
|
|
|
|
|
|
|
pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
|
|
|
|
}
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
/* Check that page type is sane. */
|
|
|
|
if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
|
2017-04-06 02:17:23 +08:00
|
|
|
pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE &&
|
|
|
|
pagetype != LH_UNUSED_PAGE)
|
2017-02-03 03:12:58 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid hash page type %08x", pagetype)));
|
|
|
|
|
|
|
|
/* If requested, verify page type. */
|
|
|
|
if (flags != 0 && (pagetype & flags) == 0)
|
|
|
|
{
|
|
|
|
switch (flags)
|
|
|
|
{
|
|
|
|
case LH_META_PAGE:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("page is not a hash meta page")));
|
2018-05-02 07:35:08 +08:00
|
|
|
break;
|
2017-02-03 03:12:58 +08:00
|
|
|
case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("page is not a hash bucket or overflow page")));
|
2018-05-02 07:35:08 +08:00
|
|
|
break;
|
2017-02-03 03:12:58 +08:00
|
|
|
case LH_OVERFLOW_PAGE:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("page is not a hash overflow page")));
|
2018-05-02 07:35:08 +08:00
|
|
|
break;
|
2017-02-03 03:12:58 +08:00
|
|
|
default:
|
|
|
|
elog(ERROR,
|
|
|
|
"hash page of type %08x not in mask %08x",
|
|
|
|
pagetype, flags);
|
2018-05-02 07:35:08 +08:00
|
|
|
break;
|
2017-02-03 03:12:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it is the metapage, also verify magic number and version.
|
|
|
|
*/
|
|
|
|
if (pagetype == LH_META_PAGE)
|
|
|
|
{
|
|
|
|
HashMetaPage metap = HashPageGetMeta(page);
|
|
|
|
|
|
|
|
if (metap->hashm_magic != HASH_MAGIC)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
|
|
errmsg("invalid magic number for metadata"),
|
|
|
|
errdetail("Expected 0x%08x, got 0x%08x.",
|
|
|
|
HASH_MAGIC, metap->hashm_magic)));
|
|
|
|
|
|
|
|
if (metap->hashm_version != HASH_VERSION)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
|
|
errmsg("invalid version for metadata"),
|
pageinspect: Add more sanity checks to prevent out-of-bound reads
A couple of code paths use the special area on the page passed by the
function caller, expecting to find some data in it. However, feeding
an incorrect page can lead to out-of-bound reads when trying to access
the page special area (like a heap page that has no special area,
leading PageGetSpecialPointer() to grab a pointer outside the allocated
page).
The functions used for hash and btree indexes have some protection
already against that, while some other functions using a relation OID
as argument would make sure that the access method involved is correct,
but functions taking in input a raw page without knowing the relation
the page is attached to would run into problems.
This commit improves the set of checks used in the code paths of BRIN,
btree (including one check if a leaf page is found with a non-zero
level), GIN and GiST to verify that the page given in input has a
special area size that fits with each access method, which is done
though PageGetSpecialSize(), becore calling PageGetSpecialPointer().
The scope of the checks done is limited to work with pages that one
would pass after getting a block with get_raw_page(), as it is possible
to craft byteas that could bypass existing code paths. Having too many
checks would also impact the usability of pageinspect, as the existing
code is very useful to look at the content details in a corrupted page,
so the focus is really to avoid out-of-bound reads as this is never a
good thing even with functions whose execution is limited to
superusers.
The safest approach could be to rework the functions so as these fetch a
block using a relation OID and a block number, but there are also cases
where using a raw page is useful.
Tests are added to cover all the code paths that needed such checks, and
an error message for hash indexes is reworded to fit better with what
this commit adds.
Reported-By: Alexander Lakhin
Author: Julien Rouhaud, Michael Paquier
Discussion: https://postgr.es/m/16527-ef7606186f0610a1@postgresql.org
Discussion: https://postgr.es/m/561e187b-3549-c8d5-03f5-525c14e65bd0@postgrespro.ru
Backpatch-through: 10
2022-03-27 16:53:40 +08:00
|
|
|
errdetail("Expected %d, got %d.",
|
2017-02-03 03:12:58 +08:00
|
|
|
HASH_VERSION, metap->hashm_version)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* -------------------------------------------------
|
|
|
|
* GetHashPageStatistics()
|
|
|
|
*
|
|
|
|
* Collect statistics of single hash page
|
|
|
|
* -------------------------------------------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
GetHashPageStatistics(Page page, HashPageStat *stat)
|
|
|
|
{
|
|
|
|
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
int off;
|
|
|
|
|
|
|
|
stat->dead_items = stat->live_items = 0;
|
|
|
|
stat->page_size = PageGetPageSize(page);
|
|
|
|
|
|
|
|
/* hash page opaque data */
|
|
|
|
stat->hasho_prevblkno = opaque->hasho_prevblkno;
|
|
|
|
stat->hasho_nextblkno = opaque->hasho_nextblkno;
|
|
|
|
stat->hasho_bucket = opaque->hasho_bucket;
|
|
|
|
stat->hasho_flag = opaque->hasho_flag;
|
|
|
|
stat->hasho_page_id = opaque->hasho_page_id;
|
|
|
|
|
|
|
|
/* count live and dead tuples, and free space */
|
|
|
|
for (off = FirstOffsetNumber; off <= maxoff; off++)
|
|
|
|
{
|
|
|
|
ItemId id = PageGetItemId(page, off);
|
|
|
|
|
|
|
|
if (!ItemIdIsDead(id))
|
|
|
|
stat->live_items++;
|
|
|
|
else
|
|
|
|
stat->dead_items++;
|
|
|
|
}
|
|
|
|
stat->free_size = PageGetFreeSpace(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ---------------------------------------------------
|
|
|
|
* hash_page_type()
|
|
|
|
*
|
|
|
|
* Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
|
|
|
|
* ---------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_page_type(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
bytea *raw_page = PG_GETARG_BYTEA_P(0);
|
|
|
|
Page page;
|
|
|
|
HashPageOpaque opaque;
|
2017-04-15 05:04:25 +08:00
|
|
|
int pagetype;
|
|
|
|
const char *type;
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-31 00:32:04 +08:00
|
|
|
errmsg("must be superuser to use raw page functions")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
page = verify_hash_page(raw_page, 0);
|
2017-04-06 02:17:23 +08:00
|
|
|
|
|
|
|
if (PageIsNew(page))
|
2017-02-03 03:12:58 +08:00
|
|
|
type = "unused";
|
2017-04-06 02:17:23 +08:00
|
|
|
else
|
|
|
|
{
|
|
|
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
|
|
|
|
/* page type (flags) */
|
2017-04-15 05:04:25 +08:00
|
|
|
pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
|
|
|
|
if (pagetype == LH_META_PAGE)
|
2017-04-06 02:17:23 +08:00
|
|
|
type = "metapage";
|
2017-04-15 05:04:25 +08:00
|
|
|
else if (pagetype == LH_OVERFLOW_PAGE)
|
2017-04-06 02:17:23 +08:00
|
|
|
type = "overflow";
|
2017-04-15 05:04:25 +08:00
|
|
|
else if (pagetype == LH_BUCKET_PAGE)
|
2017-04-06 02:17:23 +08:00
|
|
|
type = "bucket";
|
2017-04-15 05:04:25 +08:00
|
|
|
else if (pagetype == LH_BITMAP_PAGE)
|
2017-04-06 02:17:23 +08:00
|
|
|
type = "bitmap";
|
|
|
|
else
|
|
|
|
type = "unused";
|
|
|
|
}
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text(type));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ---------------------------------------------------
|
|
|
|
* hash_page_stats()
|
|
|
|
*
|
|
|
|
* Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
|
|
|
|
* ---------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_page_stats(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
bytea *raw_page = PG_GETARG_BYTEA_P(0);
|
|
|
|
Page page;
|
|
|
|
int j;
|
|
|
|
Datum values[9];
|
|
|
|
bool nulls[9];
|
|
|
|
HashPageStat stat;
|
|
|
|
HeapTuple tuple;
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-31 00:32:04 +08:00
|
|
|
errmsg("must be superuser to use raw page functions")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
|
|
|
|
|
|
/* keep compiler quiet */
|
|
|
|
stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
|
|
|
|
stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
|
|
|
|
|
|
|
|
GetHashPageStatistics(page, &stat);
|
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
|
|
|
|
|
|
|
MemSet(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
j = 0;
|
2017-02-03 11:29:29 +08:00
|
|
|
values[j++] = Int32GetDatum(stat.live_items);
|
|
|
|
values[j++] = Int32GetDatum(stat.dead_items);
|
|
|
|
values[j++] = Int32GetDatum(stat.page_size);
|
|
|
|
values[j++] = Int32GetDatum(stat.free_size);
|
|
|
|
values[j++] = Int64GetDatum((int64) stat.hasho_prevblkno);
|
|
|
|
values[j++] = Int64GetDatum((int64) stat.hasho_nextblkno);
|
|
|
|
values[j++] = Int64GetDatum((int64) stat.hasho_bucket);
|
|
|
|
values[j++] = Int32GetDatum((int32) stat.hasho_flag);
|
|
|
|
values[j++] = Int32GetDatum((int32) stat.hasho_page_id);
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupleDesc, values, nulls);
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cross-call data structure for SRF
|
|
|
|
*/
|
|
|
|
struct user_args
|
|
|
|
{
|
|
|
|
Page page;
|
|
|
|
OffsetNumber offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*-------------------------------------------------------
|
|
|
|
* hash_page_items()
|
|
|
|
*
|
|
|
|
* Get IndexTupleData set in a hash page
|
|
|
|
*
|
|
|
|
* Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
|
|
|
|
*-------------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_page_items(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
bytea *raw_page = PG_GETARG_BYTEA_P(0);
|
|
|
|
Page page;
|
|
|
|
Datum result;
|
|
|
|
Datum values[3];
|
|
|
|
bool nulls[3];
|
|
|
|
uint32 hashkey;
|
|
|
|
HeapTuple tuple;
|
|
|
|
FuncCallContext *fctx;
|
|
|
|
MemoryContext mctx;
|
|
|
|
struct user_args *uargs;
|
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-31 00:32:04 +08:00
|
|
|
errmsg("must be superuser to use raw page functions")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
|
|
|
|
fctx = SRF_FIRSTCALL_INIT();
|
|
|
|
|
|
|
|
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
|
|
|
|
|
2018-01-26 22:51:15 +08:00
|
|
|
page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
|
|
|
2017-02-03 03:12:58 +08:00
|
|
|
uargs = palloc(sizeof(struct user_args));
|
|
|
|
|
|
|
|
uargs->page = page;
|
|
|
|
|
|
|
|
uargs->offset = FirstOffsetNumber;
|
|
|
|
|
|
|
|
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
|
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
|
|
|
|
|
|
|
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
|
|
|
|
|
|
|
|
fctx->user_fctx = uargs;
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(mctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
fctx = SRF_PERCALL_SETUP();
|
|
|
|
uargs = fctx->user_fctx;
|
|
|
|
|
|
|
|
if (fctx->call_cntr < fctx->max_calls)
|
|
|
|
{
|
|
|
|
ItemId id;
|
|
|
|
IndexTuple itup;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
id = PageGetItemId(uargs->page, uargs->offset);
|
|
|
|
|
|
|
|
if (!ItemIdIsValid(id))
|
|
|
|
elog(ERROR, "invalid ItemId");
|
|
|
|
|
|
|
|
itup = (IndexTuple) PageGetItem(uargs->page, id);
|
|
|
|
|
|
|
|
MemSet(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
j = 0;
|
2017-02-04 05:28:13 +08:00
|
|
|
values[j++] = Int32GetDatum((int32) uargs->offset);
|
2017-02-03 03:12:58 +08:00
|
|
|
values[j++] = PointerGetDatum(&itup->t_tid);
|
|
|
|
|
|
|
|
hashkey = _hash_get_indextuple_hashkey(itup);
|
2017-02-04 05:28:13 +08:00
|
|
|
values[j] = Int64GetDatum((int64) hashkey);
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
uargs->offset = uargs->offset + 1;
|
|
|
|
|
|
|
|
SRF_RETURN_NEXT(fctx, result);
|
|
|
|
}
|
2020-03-17 09:36:53 +08:00
|
|
|
|
|
|
|
SRF_RETURN_DONE(fctx);
|
2017-02-03 03:12:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ------------------------------------------------
|
|
|
|
* hash_bitmap_info()
|
|
|
|
*
|
|
|
|
* Get bitmap information for a particular overflow page
|
|
|
|
*
|
|
|
|
* Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
|
|
|
|
* ------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_bitmap_info(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Oid indexRelid = PG_GETARG_OID(0);
|
2021-01-19 17:28:05 +08:00
|
|
|
int64 ovflblkno = PG_GETARG_INT64(1);
|
2017-02-03 03:12:58 +08:00
|
|
|
HashMetaPage metap;
|
2017-02-10 03:02:58 +08:00
|
|
|
Buffer metabuf,
|
|
|
|
mapbuf;
|
2017-02-03 03:12:58 +08:00
|
|
|
BlockNumber bitmapblkno;
|
2017-02-10 03:02:58 +08:00
|
|
|
Page mappage;
|
2017-02-03 03:12:58 +08:00
|
|
|
bool bit = false;
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
Relation indexRel;
|
|
|
|
uint32 ovflbitno;
|
|
|
|
int32 bitmappage,
|
|
|
|
bitmapbit;
|
|
|
|
HeapTuple tuple;
|
2017-02-10 03:02:58 +08:00
|
|
|
int i,
|
|
|
|
j;
|
2017-02-03 03:12:58 +08:00
|
|
|
Datum values[3];
|
|
|
|
bool nulls[3];
|
2017-02-10 03:02:58 +08:00
|
|
|
uint32 *freep;
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-31 00:32:04 +08:00
|
|
|
errmsg("must be superuser to use raw page functions")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
indexRel = index_open(indexRelid, AccessShareLock);
|
|
|
|
|
|
|
|
if (!IS_HASH(indexRel))
|
pageinspect: Fix handling of page sizes and AM types
This commit fixes a set of issues related to the use of the SQL
functions in this module when the caller is able to pass down raw page
data as input argument:
- The page size check was fuzzy in a couple of places, sometimes
looking after only a sub-range, but what we are looking for is an exact
match on BLCKSZ. After considering a few options here, I have settled
down to do a generalization of get_page_from_raw(). Most of the SQL
functions already used that, and this is not strictly required if not
accessing an 8-byte-wide value from a raw page, but this feels safer in
the long run for alignment-picky environment, particularly if a code
path begins to access such values. This also reduces the number of
strings that need to be translated.
- The BRIN function brin_page_items() uses a Relation but it did not
check the access method of the opened index, potentially leading to
crashes. All the other functions in need of a Relation already did
that.
- Some code paths could fail on elog(), but we should to use ereport()
for failures that can be triggered by the user.
Tests are added to stress all the cases that are fixed as of this
commit, with some junk raw pages (\set VERBOSITY ensures that this works
across all page sizes) and unexpected index types when functions open
relations.
Author: Michael Paquier, Justin Prysby
Discussion: https://postgr.es/m/20220218030020.GA1137@telsasoft.com
Backpatch-through: 10
2022-03-16 10:19:39 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("\"%s\" is not a %s index",
|
|
|
|
RelationGetRelationName(indexRel), "hash")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
if (RELATION_IS_OTHER_TEMP(indexRel))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot access temporary tables of other sessions")));
|
|
|
|
|
2021-01-19 17:28:05 +08:00
|
|
|
if (ovflblkno < 0 || ovflblkno > MaxBlockNumber)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid block number")));
|
|
|
|
|
2017-02-03 12:11:08 +08:00
|
|
|
if (ovflblkno >= RelationGetNumberOfBlocks(indexRel))
|
2017-02-03 03:12:58 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2021-01-19 17:28:05 +08:00
|
|
|
errmsg("block number %lld is out of range for relation \"%s\"",
|
|
|
|
(long long int) ovflblkno, RelationGetRelationName(indexRel))));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
/* Read the metapage so we can determine which bitmap page to use */
|
|
|
|
metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
|
|
|
metap = HashPageGetMeta(BufferGetPage(metabuf));
|
|
|
|
|
2017-02-10 03:02:58 +08:00
|
|
|
/*
|
|
|
|
* Reject attempt to read the bit for a metapage or bitmap page; this is
|
|
|
|
* only meaningful for overflow pages.
|
|
|
|
*/
|
|
|
|
if (ovflblkno == 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid overflow block number %u",
|
|
|
|
(BlockNumber) ovflblkno)));
|
|
|
|
for (i = 0; i < metap->hashm_nmaps; i++)
|
|
|
|
if (metap->hashm_mapp[i] == ovflblkno)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid overflow block number %u",
|
|
|
|
(BlockNumber) ovflblkno)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Identify overflow bit number. This will error out for primary bucket
|
|
|
|
* pages, and we've already rejected the metapage and bitmap pages above.
|
|
|
|
*/
|
2017-02-03 12:11:08 +08:00
|
|
|
ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno);
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
bitmappage = ovflbitno >> BMPG_SHIFT(metap);
|
|
|
|
bitmapbit = ovflbitno & BMPG_MASK(metap);
|
|
|
|
|
|
|
|
if (bitmappage >= metap->hashm_nmaps)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2017-02-10 03:02:58 +08:00
|
|
|
errmsg("invalid overflow block number %u",
|
|
|
|
(BlockNumber) ovflblkno)));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
bitmapblkno = metap->hashm_mapp[bitmappage];
|
|
|
|
|
|
|
|
_hash_relbuf(indexRel, metabuf);
|
|
|
|
|
2017-02-10 03:02:58 +08:00
|
|
|
/* Check the status of bitmap bit for overflow page */
|
|
|
|
mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE);
|
|
|
|
mappage = BufferGetPage(mapbuf);
|
|
|
|
freep = HashPageGetBitmap(mappage);
|
|
|
|
|
|
|
|
bit = ISSET(freep, bitmapbit) != 0;
|
|
|
|
|
|
|
|
_hash_relbuf(indexRel, mapbuf);
|
2017-02-03 03:12:58 +08:00
|
|
|
index_close(indexRel, AccessShareLock);
|
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
|
|
|
|
|
|
|
MemSet(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
j = 0;
|
2017-02-04 05:28:13 +08:00
|
|
|
values[j++] = Int64GetDatum((int64) bitmapblkno);
|
2017-02-03 03:12:58 +08:00
|
|
|
values[j++] = Int32GetDatum(bitmapbit);
|
|
|
|
values[j++] = BoolGetDatum(bit);
|
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupleDesc, values, nulls);
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ------------------------------------------------
|
|
|
|
* hash_metapage_info()
|
|
|
|
*
|
|
|
|
* Get the meta-page information for a hash index
|
|
|
|
*
|
|
|
|
* Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
|
|
|
|
* ------------------------------------------------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_metapage_info(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
bytea *raw_page = PG_GETARG_BYTEA_P(0);
|
|
|
|
Page page;
|
|
|
|
HashMetaPageData *metad;
|
|
|
|
TupleDesc tupleDesc;
|
|
|
|
HeapTuple tuple;
|
|
|
|
int i,
|
|
|
|
j;
|
|
|
|
Datum values[16];
|
|
|
|
bool nulls[16];
|
|
|
|
Datum spares[HASH_MAX_SPLITPOINTS];
|
|
|
|
Datum mapp[HASH_MAX_BITMAPS];
|
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-31 00:32:04 +08:00
|
|
|
errmsg("must be superuser to use raw page functions")));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
page = verify_hash_page(raw_page, LH_META_PAGE);
|
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
tupleDesc = BlessTupleDesc(tupleDesc);
|
|
|
|
|
|
|
|
metad = HashPageGetMeta(page);
|
|
|
|
|
|
|
|
MemSet(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
j = 0;
|
2017-02-04 05:28:13 +08:00
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_magic);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_version);
|
2017-02-03 03:12:58 +08:00
|
|
|
values[j++] = Float8GetDatum(metad->hashm_ntuples);
|
2017-02-04 05:28:13 +08:00
|
|
|
values[j++] = Int32GetDatum((int32) metad->hashm_ffactor);
|
|
|
|
values[j++] = Int32GetDatum((int32) metad->hashm_bsize);
|
|
|
|
values[j++] = Int32GetDatum((int32) metad->hashm_bmsize);
|
|
|
|
values[j++] = Int32GetDatum((int32) metad->hashm_bmshift);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_maxbucket);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_highmask);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_lowmask);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_ovflpoint);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_firstfree);
|
|
|
|
values[j++] = Int64GetDatum((int64) metad->hashm_nmaps);
|
|
|
|
values[j++] = ObjectIdGetDatum((Oid) metad->hashm_procid);
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
|
2017-02-22 14:35:42 +08:00
|
|
|
spares[i] = Int64GetDatum((int64) metad->hashm_spares[i]);
|
2017-02-03 03:12:58 +08:00
|
|
|
values[j++] = PointerGetDatum(construct_array(spares,
|
|
|
|
HASH_MAX_SPLITPOINTS,
|
|
|
|
INT8OID,
|
2020-03-04 23:34:25 +08:00
|
|
|
sizeof(int64),
|
|
|
|
FLOAT8PASSBYVAL,
|
|
|
|
TYPALIGN_DOUBLE));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
for (i = 0; i < HASH_MAX_BITMAPS; i++)
|
2017-02-04 05:28:13 +08:00
|
|
|
mapp[i] = Int64GetDatum((int64) metad->hashm_mapp[i]);
|
2017-02-03 03:12:58 +08:00
|
|
|
values[j++] = PointerGetDatum(construct_array(mapp,
|
|
|
|
HASH_MAX_BITMAPS,
|
|
|
|
INT8OID,
|
2020-03-04 23:34:25 +08:00
|
|
|
sizeof(int64),
|
|
|
|
FLOAT8PASSBYVAL,
|
|
|
|
TYPALIGN_DOUBLE));
|
2017-02-03 03:12:58 +08:00
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupleDesc, values, nulls);
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
|
|
|
}
|