Get rid of our dependency on type "long" for memory size calculations.

Consistently use "Size" (or size_t, or in some places int64 or double)
as the type for variables holding memory allocation sizes.  In most
places variables' data types were fine already, but we had an ancient
habit of computing bytes from kilobytes-units GUCs with code like
"work_mem * 1024L".  That risks overflow on Win64 where they did not
make "long" as wide as "size_t".  We worked around that by restricting
such GUCs' ranges, so you couldn't set work_mem et al higher than 2GB
on Win64.  This patch removes that restriction, after replacing such
calculations with "work_mem * (Size) 1024" or variants of that.

It should be noted that this patch was constructed by searching
outwards from the GUCs that have MAX_KILOBYTES as upper limit.
So I can't positively guarantee there are no other places doing
memory-size arithmetic in int or long variables.  I do however feel
pretty confident that increasing MAX_KILOBYTES on Win64 is safe now.
Also, nothing in our code should be dealing in multiple-gigabyte
allocations without authorization from a relevant GUC, so it seems
pretty likely that this search caught everything that could be at
risk of overflow.

Author: Vladlen Popolitov <v.popolitov@postgrespro.ru>
Co-authored-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/1a01f0-66ec2d80-3b-68487680@27595217
This commit is contained in:
Tom Lane 2025-01-31 13:52:40 -05:00
parent f8d8581ed8
commit 041e8b95b8
20 changed files with 50 additions and 46 deletions

View File

@ -39,7 +39,7 @@
int gin_pending_list_limit = 0;
#define GIN_PAGE_FREESIZE \
( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
( (Size) BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
typedef struct KeyArray
{
@ -456,7 +456,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
* ginInsertCleanup() should not be called inside our CRIT_SECTION.
*/
cleanupSize = GinGetPendingListCleanupSize(index);
if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * 1024L)
if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * (Size) 1024)
needCleanup = true;
UnlockReleaseBuffer(metabuffer);
@ -795,7 +795,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
blknoFinish;
bool cleanupFinish = false;
bool fsm_vac = false;
Size workMemory;
int workMemory;
/*
* We would like to prevent concurrent cleanup process. For that we will
@ -901,7 +901,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
*/
if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
(GinPageHasFullRow(page) &&
(accum.allocatedMemory >= workMemory * 1024L)))
accum.allocatedMemory >= workMemory * (Size) 1024))
{
ItemPointerData *list;
uint32 nlist;

View File

@ -125,7 +125,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
CompactAttribute *attr;
/* Initialize empty bitmap result */
scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
scanEntry->matchBitmap = tbm_create(work_mem * (Size) 1024, NULL);
/* Null query cannot partial-match anything */
if (scanEntry->isPartialMatch &&

View File

@ -288,7 +288,7 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
values[i], isnull[i], tid);
/* If we've maxed out our available memory, dump everything to the index */
if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
{
ItemPointerData *list;
Datum key;

View File

@ -120,7 +120,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
double reltuples;
double allvisfrac;
uint32 num_buckets;
long sort_threshold;
Size sort_threshold;
HashBuildState buildstate;
/*
@ -155,13 +155,13 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* one page. Also, "initial index size" accounting does not include the
* metapage, nor the first bitmap page.
*/
sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
sort_threshold = Min(sort_threshold, NBuffers);
else
sort_threshold = Min(sort_threshold, NLocBuffer);
if (num_buckets >= (uint32) sort_threshold)
if (num_buckets >= sort_threshold)
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
else
buildstate.spool = NULL;

View File

@ -2070,7 +2070,7 @@ lazy_vacuum(LVRelState *vacrel)
*/
threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
bypass = (vacrel->lpdead_item_pages < threshold &&
(TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
}
if (bypass)
@ -3037,7 +3037,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
*/
dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
dead_items_info->max_bytes = vac_work_mem * 1024L;
dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
dead_items_info->num_items = 0;
vacrel->dead_items_info = dead_items_info;

View File

@ -2953,7 +2953,7 @@ _bt_lock_subtree_parent(Relation rel, Relation heaprel, BlockNumber child,
void
_bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
{
int64 maxbufsize;
Size maxbufsize;
/*
* Don't bother with optimization in cleanup-only case -- we don't expect
@ -2969,12 +2969,13 @@ _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
* int overflow here.
*/
vstate->bufsize = 256;
maxbufsize = (work_mem * 1024L) / sizeof(BTPendingFSM);
maxbufsize = Min(maxbufsize, INT_MAX);
maxbufsize = (work_mem * (Size) 1024) / sizeof(BTPendingFSM);
maxbufsize = Min(maxbufsize, MaxAllocSize / sizeof(BTPendingFSM));
/* BTVacState.maxbufsize has type int */
maxbufsize = Min(maxbufsize, INT_MAX);
/* Stay sane with small work_mem */
maxbufsize = Max(maxbufsize, vstate->bufsize);
vstate->maxbufsize = maxbufsize;
vstate->maxbufsize = (int) maxbufsize;
/* Allocate buffer, indicate that there are currently 0 pending pages */
vstate->pendingpages = palloc(sizeof(BTPendingFSM) * vstate->bufsize);

View File

@ -375,7 +375,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
(nindexes_mwm > 0) ?
maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
maintenance_work_mem;
shared->dead_items_info.max_bytes = vac_work_mem * 1024L;
shared->dead_items_info.max_bytes = vac_work_mem * (size_t) 1024;
/* Prepare DSA space for dead items */
dead_items = TidStoreCreateShared(shared->dead_items_info.max_bytes,

View File

@ -326,7 +326,7 @@ CreateWorkExprContext(EState *estate)
Size maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;
/* choose the maxBlockSize to be no larger than 1/16 of work_mem */
while (16 * maxBlockSize > work_mem * 1024L)
while (maxBlockSize > work_mem * (Size) 1024 / 16)
maxBlockSize >>= 1;
if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)

View File

@ -91,7 +91,7 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
else
{
/* XXX should we use less than work_mem for this? */
tbm = tbm_create(work_mem * 1024L,
tbm = tbm_create(work_mem * (Size) 1024,
((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
node->ss.ps.state->es_query_dsa : NULL);
}

View File

@ -143,7 +143,7 @@ MultiExecBitmapOr(BitmapOrState *node)
if (result == NULL) /* first subplan */
{
/* XXX should we use less than work_mem for this? */
result = tbm_create(work_mem * 1024L,
result = tbm_create(work_mem * (Size) 1024,
((BitmapOr *) node->ps.plan)->isshared ?
node->ps.state->es_query_dsa : NULL);
}

View File

@ -263,7 +263,7 @@ static int tbm_shared_comparator(const void *left, const void *right,
* be allocated from the DSA.
*/
TIDBitmap *
tbm_create(long maxbytes, dsa_area *dsa)
tbm_create(Size maxbytes, dsa_area *dsa)
{
TIDBitmap *tbm;
@ -273,7 +273,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
tbm->mcxt = CurrentMemoryContext;
tbm->status = TBM_EMPTY;
tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
tbm->maxentries = tbm_calculate_entries(maxbytes);
tbm->lossify_start = 0;
tbm->dsa = dsa;
tbm->dsapagetable = InvalidDsaPointer;
@ -1539,10 +1539,10 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
*
* Estimate number of hashtable entries we can have within maxbytes.
*/
long
tbm_calculate_entries(double maxbytes)
int
tbm_calculate_entries(Size maxbytes)
{
long nbuckets;
Size nbuckets;
/*
* Estimate number of hashtable entries we can have within maxbytes. This
@ -1555,7 +1555,7 @@ tbm_calculate_entries(double maxbytes)
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
nbuckets = Max(nbuckets, 16); /* sanity limit */
return nbuckets;
return (int) nbuckets;
}
/*

View File

@ -1903,7 +1903,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double input_bytes = relation_byte_size(tuples, width);
double output_bytes;
double output_tuples;
long sort_mem_bytes = sort_mem * 1024L;
int64 sort_mem_bytes = sort_mem * (int64) 1024;
/*
* We want to be sure the cost of a sort is never estimated as zero, even
@ -2488,7 +2488,7 @@ cost_material(Path *path,
Cost startup_cost = input_startup_cost;
Cost run_cost = input_total_cost - input_startup_cost;
double nbytes = relation_byte_size(tuples, width);
long work_mem_bytes = work_mem * 1024L;
double work_mem_bytes = work_mem * (Size) 1024;
path->rows = tuples;
@ -4028,7 +4028,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
else if (enable_material && innersortkeys != NIL &&
relation_byte_size(inner_path_rows,
inner_path->pathtarget->width) >
(work_mem * 1024L))
work_mem * (Size) 1024)
path->materialize_inner = true;
else
path->materialize_inner = false;
@ -4663,7 +4663,7 @@ cost_rescan(PlannerInfo *root, Path *path,
Cost run_cost = cpu_tuple_cost * path->rows;
double nbytes = relation_byte_size(path->rows,
path->pathtarget->width);
long work_mem_bytes = work_mem * 1024L;
double work_mem_bytes = work_mem * (Size) 1024;
if (nbytes > work_mem_bytes)
{
@ -4690,7 +4690,7 @@ cost_rescan(PlannerInfo *root, Path *path,
Cost run_cost = cpu_operator_cost * path->rows;
double nbytes = relation_byte_size(path->rows,
path->pathtarget->width);
long work_mem_bytes = work_mem * 1024L;
double work_mem_bytes = work_mem * (Size) 1024;
if (nbytes > work_mem_bytes)
{
@ -6496,7 +6496,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
double pages_fetched;
double tuples_fetched;
double heap_pages;
long maxentries;
double maxentries;
/*
* Fetch total cost of obtaining the bitmap, as well as its total
@ -6527,7 +6527,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
* the bitmap at one time.)
*/
heap_pages = Min(pages_fetched, baserel->pages);
maxentries = tbm_calculate_entries(work_mem * 1024L);
maxentries = tbm_calculate_entries(work_mem * (Size) 1024);
if (loop_count > 1)
{

View File

@ -6887,7 +6887,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid)
* parallel worker to sort.
*/
while (parallel_workers > 0 &&
maintenance_work_mem / (parallel_workers + 1) < 32768L)
maintenance_work_mem / (parallel_workers + 1) < 32 * 1024)
parallel_workers--;
done:

View File

@ -3643,7 +3643,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
* haven't exceeded the memory limit.
*/
if (debug_logical_replication_streaming == DEBUG_LOGICAL_REP_STREAMING_BUFFERED &&
rb->size < logical_decoding_work_mem * 1024L)
rb->size < logical_decoding_work_mem * (Size) 1024)
return;
/*
@ -3656,7 +3656,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
* because a user can reduce the logical_decoding_work_mem to a smaller
* value before the most recent change.
*/
while (rb->size >= logical_decoding_work_mem * 1024L ||
while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
(debug_logical_replication_streaming == DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE &&
rb->size > 0))
{
@ -3699,8 +3699,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
}
/* We must be under the memory limit now. */
Assert(rb->size < logical_decoding_work_mem * 1024L);
Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
}
/*

View File

@ -265,7 +265,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
state->truncated = false;
state->usedDisk = false;
state->maxSpace = 0;
state->allowedMem = maxKBytes * 1024L;
state->allowedMem = maxKBytes * (int64) 1024;
state->availMem = state->allowedMem;
state->myfile = NULL;

View File

@ -147,7 +147,7 @@ typedef struct HashMemoryChunkData
typedef struct HashMemoryChunkData *HashMemoryChunk;
#define HASH_CHUNK_SIZE (32 * 1024L)
#define HASH_CHUNK_SIZE ((Size) (32 * 1024))
#define HASH_CHUNK_HEADER_SIZE MAXALIGN(sizeof(HashMemoryChunkData))
#define HASH_CHUNK_DATA(hc) (((char *) (hc)) + HASH_CHUNK_HEADER_SIZE)
/* tuples exceeding HASH_CHUNK_THRESHOLD bytes are put in their own chunk */

View File

@ -62,7 +62,7 @@ typedef struct TBMIterateResult
/* function prototypes in nodes/tidbitmap.c */
extern TIDBitmap *tbm_create(long maxbytes, dsa_area *dsa);
extern TIDBitmap *tbm_create(Size maxbytes, dsa_area *dsa);
extern void tbm_free(TIDBitmap *tbm);
extern void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp);
@ -84,7 +84,7 @@ extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
dsa_pointer dp);
extern long tbm_calculate_entries(double maxbytes);
extern int tbm_calculate_entries(Size maxbytes);
extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
dsa_area *dsa, dsa_pointer dsp);

View File

@ -97,7 +97,7 @@ typedef pg_atomic_uint64 dsa_pointer_atomic;
#define DSA_DEFAULT_INIT_SEGMENT_SIZE ((size_t) (1 * 1024 * 1024))
/* The minimum size of a DSM segment. */
#define DSA_MIN_SEGMENT_SIZE ((size_t) (256 * 1024L))
#define DSA_MIN_SEGMENT_SIZE ((size_t) (256 * 1024))
/* The maximum size of a DSM segment. */
#define DSA_MAX_SEGMENT_SIZE ((size_t) 1 << DSA_OFFSET_WIDTH)

View File

@ -17,9 +17,13 @@
#include "utils/array.h"
/* upper limit for GUC variables measured in kilobytes of memory */
/* note that various places assume the byte size fits in a "long" variable */
#if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4
/*
* Maximum for integer GUC variables that are measured in kilobytes of memory.
* This value is chosen to ensure that the corresponding number of bytes fits
* into a variable of type size_t or ssize_t. Be sure to compute the number
* of bytes like "guc_var * (Size) 1024" to avoid int-width overflow.
*/
#if SIZEOF_SIZE_T > 4
#define MAX_KILOBYTES INT_MAX
#else
#define MAX_KILOBYTES (INT_MAX / 1024)

View File

@ -76,7 +76,7 @@ create_and_test_bloom(int power, int64 nelements, int callerseed)
int64 nfalsepos;
bloom_filter *filter;
bloom_work_mem = (1L << power) / 8L / 1024L;
bloom_work_mem = ((int64) 1 << power) / (8 * 1024);
elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);