Get rid of our dependency on type "long" for memory size calculations.

Consistently use "Size" (or size_t, or in some places int64 or double) as the type for variables holding memory allocation sizes. In most places variables' data types were fine already, but we had an ancient habit of computing bytes from kilobytes-units GUCs with code like "work_mem * 1024L". That risks overflow on Win64 where they did not make "long" as wide as "size_t". We worked around that by restricting such GUCs' ranges, so you couldn't set work_mem et al higher than 2GB on Win64. This patch removes that restriction, after replacing such calculations with "work_mem * (Size) 1024" or variants of that. It should be noted that this patch was constructed by searching outwards from the GUCs that have MAX_KILOBYTES as upper limit. So I can't positively guarantee there are no other places doing memory-size arithmetic in int or long variables. I do however feel pretty confident that increasing MAX_KILOBYTES on Win64 is safe now. Also, nothing in our code should be dealing in multiple-gigabyte allocations without authorization from a relevant GUC, so it seems pretty likely that this search caught everything that could be at risk of overflow. Author: Vladlen Popolitov <v.popolitov@postgrespro.ru> Co-authored-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/1a01f0-66ec2d80-3b-68487680@27595217
2025-04-06 20:31:03 +08:00 · 2025-01-31 13:52:40 -05:00 · 2025-01-31 13:52:40 -05:00 · 041e8b95b8
commit 041e8b95b8
parent f8d8581ed8
20 changed files with 50 additions and 46 deletions
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@ -39,7 +39,7 @@
 int			gin_pending_list_limit = 0;

 #define GIN_PAGE_FREESIZE \
-	( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+	( (Size) BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )

 typedef struct KeyArray
 {
@ -456,7 +456,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
 	 * ginInsertCleanup() should not be called inside our CRIT_SECTION.
 	 */
 	cleanupSize = GinGetPendingListCleanupSize(index);
-	if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * 1024L)
+	if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * (Size) 1024)
 		needCleanup = true;

 	UnlockReleaseBuffer(metabuffer);
@ -795,7 +795,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
 				blknoFinish;
 	bool		cleanupFinish = false;
 	bool		fsm_vac = false;
-	Size		workMemory;
+	int			workMemory;

 	/*
 	 * We would like to prevent concurrent cleanup process. For that we will
@ -901,7 +901,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
 		 */
 		if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
 			(GinPageHasFullRow(page) &&
-			 (accum.allocatedMemory >= workMemory * 1024L)))
+			 accum.allocatedMemory >= workMemory * (Size) 1024))
 		{
 			ItemPointerData *list;
 			uint32		nlist;
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@ -125,7 +125,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
 	CompactAttribute *attr;

 	/* Initialize empty bitmap result */
-	scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
+	scanEntry->matchBitmap = tbm_create(work_mem * (Size) 1024, NULL);

 	/* Null query cannot partial-match anything */
 	if (scanEntry->isPartialMatch &&
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@ -288,7 +288,7 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
 							   values[i], isnull[i], tid);

 	/* If we've maxed out our available memory, dump everything to the index */
-	if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
+	if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
 	{
 		ItemPointerData *list;
 		Datum		key;
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@ -120,7 +120,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	double		reltuples;
 	double		allvisfrac;
 	uint32		num_buckets;
-	long		sort_threshold;
+	Size		sort_threshold;
 	HashBuildState buildstate;

 	/*
@ -155,13 +155,13 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	 * one page.  Also, "initial index size" accounting does not include the
 	 * metapage, nor the first bitmap page.
 	 */
-	sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
+	sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
 	if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
 		sort_threshold = Min(sort_threshold, NBuffers);
 	else
 		sort_threshold = Min(sort_threshold, NLocBuffer);

-	if (num_buckets >= (uint32) sort_threshold)
+	if (num_buckets >= sort_threshold)
 		buildstate.spool = _h_spoolinit(heap, index, num_buckets);
 	else
 		buildstate.spool = NULL;
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@ -2070,7 +2070,7 @@ lazy_vacuum(LVRelState *vacrel)
 		 */
 		threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
 		bypass = (vacrel->lpdead_item_pages < threshold &&
-				  (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
+				  TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
 	}

 	if (bypass)
@ -3037,7 +3037,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 	 */

 	dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
-	dead_items_info->max_bytes = vac_work_mem * 1024L;
+	dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
 	dead_items_info->num_items = 0;
 	vacrel->dead_items_info = dead_items_info;

--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@ -2953,7 +2953,7 @@ _bt_lock_subtree_parent(Relation rel, Relation heaprel, BlockNumber child,
 void
 _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
 {
-	int64		maxbufsize;
+	Size		maxbufsize;

 	/*
 	 * Don't bother with optimization in cleanup-only case -- we don't expect
@ -2969,12 +2969,13 @@ _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
 	 * int overflow here.
 	 */
 	vstate->bufsize = 256;
-	maxbufsize = (work_mem * 1024L) / sizeof(BTPendingFSM);
-	maxbufsize = Min(maxbufsize, INT_MAX);
+	maxbufsize = (work_mem * (Size) 1024) / sizeof(BTPendingFSM);
 	maxbufsize = Min(maxbufsize, MaxAllocSize / sizeof(BTPendingFSM));
+	/* BTVacState.maxbufsize has type int */
+	maxbufsize = Min(maxbufsize, INT_MAX);
 	/* Stay sane with small work_mem */
 	maxbufsize = Max(maxbufsize, vstate->bufsize);
-	vstate->maxbufsize = maxbufsize;
+	vstate->maxbufsize = (int) maxbufsize;

 	/* Allocate buffer, indicate that there are currently 0 pending pages */
 	vstate->pendingpages = palloc(sizeof(BTPendingFSM) * vstate->bufsize);
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@ -375,7 +375,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 		(nindexes_mwm > 0) ?
 		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
 		maintenance_work_mem;
-	shared->dead_items_info.max_bytes = vac_work_mem * 1024L;
+	shared->dead_items_info.max_bytes = vac_work_mem * (size_t) 1024;

 	/* Prepare DSA space for dead items */
 	dead_items = TidStoreCreateShared(shared->dead_items_info.max_bytes,
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@ -326,7 +326,7 @@ CreateWorkExprContext(EState *estate)
 	Size		maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;

 	/* choose the maxBlockSize to be no larger than 1/16 of work_mem */
-	while (16 * maxBlockSize > work_mem * 1024L)
+	while (maxBlockSize > work_mem * (Size) 1024 / 16)
 		maxBlockSize >>= 1;

 	if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@ -91,7 +91,7 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
 	else
 	{
 		/* XXX should we use less than work_mem for this? */
-		tbm = tbm_create(work_mem * 1024L,
+		tbm = tbm_create(work_mem * (Size) 1024,
 						 ((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
 						 node->ss.ps.state->es_query_dsa : NULL);
 	}
--- a/src/backend/executor/nodeBitmapOr.c
+++ b/src/backend/executor/nodeBitmapOr.c
@ -143,7 +143,7 @@ MultiExecBitmapOr(BitmapOrState *node)
 			if (result == NULL) /* first subplan */
 			{
 				/* XXX should we use less than work_mem for this? */
-				result = tbm_create(work_mem * 1024L,
+				result = tbm_create(work_mem * (Size) 1024,
 									((BitmapOr *) node->ps.plan)->isshared ?
 									node->ps.state->es_query_dsa : NULL);
 			}
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@ -263,7 +263,7 @@ static int	tbm_shared_comparator(const void *left, const void *right,
 * be allocated from the DSA.
 */
 TIDBitmap *
-tbm_create(long maxbytes, dsa_area *dsa)
+tbm_create(Size maxbytes, dsa_area *dsa)
 {
 	TIDBitmap  *tbm;

@ -273,7 +273,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
 	tbm->mcxt = CurrentMemoryContext;
 	tbm->status = TBM_EMPTY;

-	tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
+	tbm->maxentries = tbm_calculate_entries(maxbytes);
 	tbm->lossify_start = 0;
 	tbm->dsa = dsa;
 	tbm->dsapagetable = InvalidDsaPointer;
@ -1539,10 +1539,10 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
 *
 * Estimate number of hashtable entries we can have within maxbytes.
 */
-long
-tbm_calculate_entries(double maxbytes)
+int
+tbm_calculate_entries(Size maxbytes)
 {
-	long		nbuckets;
+	Size		nbuckets;

 	/*
 	 * Estimate number of hashtable entries we can have within maxbytes. This
@ -1555,7 +1555,7 @@ tbm_calculate_entries(double maxbytes)
 	nbuckets = Min(nbuckets, INT_MAX - 1);	/* safety limit */
 	nbuckets = Max(nbuckets, 16);	/* sanity limit */

-	return nbuckets;
+	return (int) nbuckets;
 }

 /*
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -1903,7 +1903,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	double		input_bytes = relation_byte_size(tuples, width);
 	double		output_bytes;
 	double		output_tuples;
-	long		sort_mem_bytes = sort_mem * 1024L;
+	int64		sort_mem_bytes = sort_mem * (int64) 1024;

 	/*
 	 * We want to be sure the cost of a sort is never estimated as zero, even
@ -2488,7 +2488,7 @@ cost_material(Path *path,
 	Cost		startup_cost = input_startup_cost;
 	Cost		run_cost = input_total_cost - input_startup_cost;
 	double		nbytes = relation_byte_size(tuples, width);
-	long		work_mem_bytes = work_mem * 1024L;
+	double		work_mem_bytes = work_mem * (Size) 1024;

 	path->rows = tuples;

@ -4028,7 +4028,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
 	else if (enable_material && innersortkeys != NIL &&
 			 relation_byte_size(inner_path_rows,
 								inner_path->pathtarget->width) >
-			 (work_mem * 1024L))
+			 work_mem * (Size) 1024)
 		path->materialize_inner = true;
 	else
 		path->materialize_inner = false;
@ -4663,7 +4663,7 @@ cost_rescan(PlannerInfo *root, Path *path,
 				Cost		run_cost = cpu_tuple_cost * path->rows;
 				double		nbytes = relation_byte_size(path->rows,
 														path->pathtarget->width);
-				long		work_mem_bytes = work_mem * 1024L;
+				double		work_mem_bytes = work_mem * (Size) 1024;

 				if (nbytes > work_mem_bytes)
 				{
@ -4690,7 +4690,7 @@ cost_rescan(PlannerInfo *root, Path *path,
 				Cost		run_cost = cpu_operator_cost * path->rows;
 				double		nbytes = relation_byte_size(path->rows,
 														path->pathtarget->width);
-				long		work_mem_bytes = work_mem * 1024L;
+				double		work_mem_bytes = work_mem * (Size) 1024;

 				if (nbytes > work_mem_bytes)
 				{
@ -6496,7 +6496,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
 	double		pages_fetched;
 	double		tuples_fetched;
 	double		heap_pages;
-	long		maxentries;
+	double		maxentries;

 	/*
 	 * Fetch total cost of obtaining the bitmap, as well as its total
@ -6527,7 +6527,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
 	 * the bitmap at one time.)
 	 */
 	heap_pages = Min(pages_fetched, baserel->pages);
-	maxentries = tbm_calculate_entries(work_mem * 1024L);
+	maxentries = tbm_calculate_entries(work_mem * (Size) 1024);

 	if (loop_count > 1)
 	{
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -6887,7 +6887,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid)
 	 * parallel worker to sort.
 	 */
 	while (parallel_workers > 0 &&
-		   maintenance_work_mem / (parallel_workers + 1) < 32768L)
+		   maintenance_work_mem / (parallel_workers + 1) < 32 * 1024)
 		parallel_workers--;

 done:
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@ -3643,7 +3643,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
 	 * haven't exceeded the memory limit.
 	 */
 	if (debug_logical_replication_streaming == DEBUG_LOGICAL_REP_STREAMING_BUFFERED &&
-		rb->size < logical_decoding_work_mem * 1024L)
+		rb->size < logical_decoding_work_mem * (Size) 1024)
 		return;

 	/*
@ -3656,7 +3656,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
 	 * because a user can reduce the logical_decoding_work_mem to a smaller
 	 * value before the most recent change.
 	 */
-	while (rb->size >= logical_decoding_work_mem * 1024L ||
+	while (rb->size >= logical_decoding_work_mem * (Size) 1024 ||
 		   (debug_logical_replication_streaming == DEBUG_LOGICAL_REP_STREAMING_IMMEDIATE &&
 			rb->size > 0))
 	{
@ -3699,8 +3699,7 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
 	}

 	/* We must be under the memory limit now. */
-	Assert(rb->size < logical_decoding_work_mem * 1024L);
-
+	Assert(rb->size < logical_decoding_work_mem * (Size) 1024);
 }

 /*
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@ -265,7 +265,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 	state->truncated = false;
 	state->usedDisk = false;
 	state->maxSpace = 0;
-	state->allowedMem = maxKBytes * 1024L;
+	state->allowedMem = maxKBytes * (int64) 1024;
 	state->availMem = state->allowedMem;
 	state->myfile = NULL;

--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@ -147,7 +147,7 @@ typedef struct HashMemoryChunkData

 typedef struct HashMemoryChunkData *HashMemoryChunk;

-#define HASH_CHUNK_SIZE			(32 * 1024L)
+#define HASH_CHUNK_SIZE			((Size) (32 * 1024))
 #define HASH_CHUNK_HEADER_SIZE	MAXALIGN(sizeof(HashMemoryChunkData))
 #define HASH_CHUNK_DATA(hc)		(((char *) (hc)) + HASH_CHUNK_HEADER_SIZE)
 /* tuples exceeding HASH_CHUNK_THRESHOLD bytes are put in their own chunk */
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@ -62,7 +62,7 @@ typedef struct TBMIterateResult

 /* function prototypes in nodes/tidbitmap.c */

-extern TIDBitmap *tbm_create(long maxbytes, dsa_area *dsa);
+extern TIDBitmap *tbm_create(Size maxbytes, dsa_area *dsa);
 extern void tbm_free(TIDBitmap *tbm);
 extern void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp);

@ -84,7 +84,7 @@ extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
 													dsa_pointer dp);
-extern long tbm_calculate_entries(double maxbytes);
+extern int	tbm_calculate_entries(Size maxbytes);

 extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
 									 dsa_area *dsa, dsa_pointer dsp);
--- a/src/include/utils/dsa.h
+++ b/src/include/utils/dsa.h
@ -97,7 +97,7 @@ typedef pg_atomic_uint64 dsa_pointer_atomic;
 #define DSA_DEFAULT_INIT_SEGMENT_SIZE ((size_t) (1 * 1024 * 1024))

 /* The minimum size of a DSM segment. */
-#define DSA_MIN_SEGMENT_SIZE	((size_t) (256 * 1024L))
+#define DSA_MIN_SEGMENT_SIZE	((size_t) (256 * 1024))

 /* The maximum size of a DSM segment. */
 #define DSA_MAX_SEGMENT_SIZE ((size_t) 1 << DSA_OFFSET_WIDTH)
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@ -17,9 +17,13 @@
 #include "utils/array.h"


-/* upper limit for GUC variables measured in kilobytes of memory */
-/* note that various places assume the byte size fits in a "long" variable */
-#if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4
+/*
+ * Maximum for integer GUC variables that are measured in kilobytes of memory.
+ * This value is chosen to ensure that the corresponding number of bytes fits
+ * into a variable of type size_t or ssize_t.  Be sure to compute the number
+ * of bytes like "guc_var * (Size) 1024" to avoid int-width overflow.
+ */
+#if SIZEOF_SIZE_T > 4
 #define MAX_KILOBYTES	INT_MAX
 #else
 #define MAX_KILOBYTES	(INT_MAX / 1024)
--- a/src/test/modules/test_bloomfilter/test_bloomfilter.c
+++ b/src/test/modules/test_bloomfilter/test_bloomfilter.c
@ -76,7 +76,7 @@ create_and_test_bloom(int power, int64 nelements, int callerseed)
 	int64		nfalsepos;
 	bloom_filter *filter;

-	bloom_work_mem = (1L << power) / 8L / 1024L;
+	bloom_work_mem = ((int64) 1 << power) / (8 * 1024);

 	elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);