mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
Restore pg_pread and friends.
Commitscf112c12
anda0dc8271
were a little too hasty in getting rid of the pg_ prefixes where we use pread(), pwrite() and vectored variants. We dropped support for ancient Unixes where we needed to use lseek() to implement replacements for those, but it turns out that Windows also changes the current position even when you pass in an offset to ReadFile() and WriteFile() if the file handle is synchronous, despite its documentation saying otherwise. Switching to asynchronous file handles would fix that, but have other complications. For now let's just put back the pg_ prefix and add some comments to highlight the non-standard side-effect, which we can now describe as Windows-only. Reported-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Discussion: https://postgr.es/m/20220923202439.GA1156054%40nathanxps13
This commit is contained in:
parent
3a5817695a
commit
b6d8a60aba
@ -2103,9 +2103,9 @@ qtext_store(const char *query, int query_len,
|
||||
if (fd < 0)
|
||||
goto error;
|
||||
|
||||
if (pwrite(fd, query, query_len, off) != query_len)
|
||||
if (pg_pwrite(fd, query, query_len, off) != query_len)
|
||||
goto error;
|
||||
if (pwrite(fd, "\0", 1, off + query_len) != 1)
|
||||
if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
|
||||
goto error;
|
||||
|
||||
CloseTransientFile(fd);
|
||||
|
@ -1150,7 +1150,7 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
|
||||
/* write out tail end of mapping file (again) */
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
|
||||
if (pwrite(fd, data, len, xlrec->offset) != len)
|
||||
if (pg_pwrite(fd, data, len, xlrec->offset) != len)
|
||||
{
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
if (errno == 0)
|
||||
|
@ -718,7 +718,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
|
||||
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
|
||||
if (pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
|
||||
if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
|
||||
{
|
||||
pgstat_report_wait_end();
|
||||
slru_errcause = SLRU_READ_FAILED;
|
||||
@ -873,7 +873,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
|
||||
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
|
||||
if (pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
|
||||
if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
|
||||
{
|
||||
pgstat_report_wait_end();
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
|
@ -2196,7 +2196,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
|
||||
INSTR_TIME_SET_CURRENT(start);
|
||||
|
||||
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
|
||||
written = pwrite(openLogFile, from, nleft, startoffset);
|
||||
written = pg_pwrite(openLogFile, from, nleft, startoffset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
/*
|
||||
@ -3018,7 +3018,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
|
||||
* enough.
|
||||
*/
|
||||
errno = 0;
|
||||
if (pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
|
||||
if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
|
||||
{
|
||||
/* if write didn't set errno, assume no disk space */
|
||||
save_errno = errno ? errno : ENOSPC;
|
||||
|
@ -1544,7 +1544,7 @@ WALRead(XLogReaderState *state,
|
||||
|
||||
/* Reset errno first; eases reporting non-errno-affecting errors */
|
||||
errno = 0;
|
||||
readbytes = pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
|
||||
readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
|
||||
|
||||
#ifndef FRONTEND
|
||||
pgstat_report_wait_end();
|
||||
|
@ -3271,7 +3271,7 @@ retry:
|
||||
readOff = targetPageOff;
|
||||
|
||||
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
|
||||
r = pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
|
||||
r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
|
||||
if (r != XLOG_BLCKSZ)
|
||||
{
|
||||
char fname[MAXFNAMELEN];
|
||||
|
@ -1828,7 +1828,7 @@ basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
|
||||
int rc;
|
||||
|
||||
pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
|
||||
rc = pread(fd, buf, nbytes, offset);
|
||||
rc = pg_pread(fd, buf, nbytes, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (rc < 0)
|
||||
|
@ -915,7 +915,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
|
||||
/* OK to write the logs */
|
||||
errno = 0;
|
||||
|
||||
byteswritten = pwrite(recvFile, buf, segbytes, (off_t) startoff);
|
||||
byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff);
|
||||
if (byteswritten <= 0)
|
||||
{
|
||||
char xlogfname[MAXFNAMELEN];
|
||||
|
@ -2053,7 +2053,7 @@ FileRead(File file, char *buffer, int amount, off_t offset,
|
||||
|
||||
retry:
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
returnCode = pread(vfdP->fd, buffer, amount, offset);
|
||||
returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (returnCode < 0)
|
||||
@ -2135,7 +2135,7 @@ FileWrite(File file, char *buffer, int amount, off_t offset,
|
||||
retry:
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
returnCode = pwrite(VfdCache[file].fd, buffer, amount, offset);
|
||||
returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
@ -3740,7 +3740,7 @@ data_sync_elevel(int elevel)
|
||||
}
|
||||
|
||||
/*
|
||||
* A convenience wrapper for pwritev() that retries on partial write. If an
|
||||
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
|
||||
* error is returned, it is unspecified how much has been written.
|
||||
*/
|
||||
ssize_t
|
||||
@ -3760,7 +3760,7 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
for (;;)
|
||||
{
|
||||
/* Write as much as we can. */
|
||||
part = pwritev(fd, iov, iovcnt, offset);
|
||||
part = pg_pwritev(fd, iov, iovcnt, offset);
|
||||
if (part < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -1527,7 +1527,7 @@ AddToDataDirLockFile(int target_line, const char *str)
|
||||
len = strlen(destbuffer);
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE);
|
||||
if (pwrite(fd, destbuffer, len, 0) != len)
|
||||
if (pg_pwrite(fd, destbuffer, len, 0) != len)
|
||||
{
|
||||
pgstat_report_wait_end();
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
|
@ -312,10 +312,10 @@ test_sync(int writes_per_op)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
die("write failed");
|
||||
}
|
||||
STOP_TIMER;
|
||||
@ -337,10 +337,10 @@ test_sync(int writes_per_op)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
die("write failed");
|
||||
fdatasync(tmpfile);
|
||||
}
|
||||
@ -359,10 +359,10 @@ test_sync(int writes_per_op)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
die("write failed");
|
||||
if (fsync(tmpfile) != 0)
|
||||
die("fsync failed");
|
||||
@ -383,10 +383,10 @@ test_sync(int writes_per_op)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
die("write failed");
|
||||
if (pg_fsync_writethrough(tmpfile) != 0)
|
||||
die("fsync failed");
|
||||
@ -415,10 +415,10 @@ test_sync(int writes_per_op)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
XLOG_BLCKSZ,
|
||||
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
|
||||
/*
|
||||
* This can generate write failures if the filesystem has
|
||||
@ -480,10 +480,10 @@ test_open_sync(const char *msg, int writes_size)
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
for (writes = 0; writes < 16 / writes_size; writes++)
|
||||
if (pwrite(tmpfile,
|
||||
buf,
|
||||
writes_size * 1024,
|
||||
writes * writes_size * 1024) !=
|
||||
if (pg_pwrite(tmpfile,
|
||||
buf,
|
||||
writes_size * 1024,
|
||||
writes * writes_size * 1024) !=
|
||||
writes_size * 1024)
|
||||
die("write failed");
|
||||
}
|
||||
@ -582,7 +582,7 @@ test_non_sync(void)
|
||||
START_TIMER;
|
||||
for (ops = 0; alarm_triggered == false; ops++)
|
||||
{
|
||||
if (pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
|
||||
if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
|
||||
die("write failed");
|
||||
}
|
||||
STOP_TIMER;
|
||||
|
@ -378,11 +378,11 @@ extern void XLogReaderResetError(XLogReaderState *state);
|
||||
|
||||
/*
|
||||
* Error information from WALRead that both backend and frontend caller can
|
||||
* process. Currently only errors from pread can be reported.
|
||||
* process. Currently only errors from pg_pread can be reported.
|
||||
*/
|
||||
typedef struct WALReadError
|
||||
{
|
||||
int wre_errno; /* errno set by the last pread() */
|
||||
int wre_errno; /* errno set by the last pg_pread() */
|
||||
int wre_off; /* Offset we tried to read from. */
|
||||
int wre_req; /* Bytes requested to be read. */
|
||||
int wre_read; /* Bytes read by the last read(). */
|
||||
|
@ -213,6 +213,15 @@ extern int pg_fprintf(FILE *stream, const char *fmt,...) pg_attribute_printf(2,
|
||||
extern int pg_vprintf(const char *fmt, va_list args) pg_attribute_printf(1, 0);
|
||||
extern int pg_printf(const char *fmt,...) pg_attribute_printf(1, 2);
|
||||
|
||||
#ifndef WIN32
|
||||
/*
|
||||
* We add a pg_ prefix as a warning that the Windows implementations have the
|
||||
* non-standard side-effect of changing the current file position.
|
||||
*/
|
||||
#define pg_pread pread
|
||||
#define pg_pwrite pwrite
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We use __VA_ARGS__ for printf to prevent replacing references to
|
||||
* the "printf" format archetype in format() attribute declarations.
|
||||
|
@ -35,12 +35,21 @@ struct iovec
|
||||
/* Define a reasonable maximum that is safe to use on the stack. */
|
||||
#define PG_IOV_MAX Min(IOV_MAX, 32)
|
||||
|
||||
#if !HAVE_DECL_PREADV
|
||||
extern ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
/*
|
||||
* Note that pg_preadv and pg_writev have a pg_ prefix as a warning that the
|
||||
* Windows implementations have the side-effect of changing the file position.
|
||||
*/
|
||||
|
||||
#if HAVE_DECL_PREADV
|
||||
#define pg_preadv preadv
|
||||
#else
|
||||
extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
#endif
|
||||
|
||||
#if !HAVE_DECL_PWRITEV
|
||||
extern ssize_t pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
#if HAVE_DECL_PWRITEV
|
||||
#define pg_pwritev pwritev
|
||||
#else
|
||||
extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
#endif
|
||||
|
||||
#endif /* PG_IOVEC_H */
|
||||
|
@ -564,9 +564,9 @@ typedef unsigned short mode_t;
|
||||
#endif
|
||||
|
||||
/* in port/win32pread.c */
|
||||
extern ssize_t pread(int fd, void *buf, size_t nbyte, off_t offset);
|
||||
extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
|
||||
|
||||
/* in port/win32pwrite.c */
|
||||
extern ssize_t pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
|
||||
extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
|
||||
|
||||
#endif /* PG_WIN32_PORT_H */
|
||||
|
@ -19,14 +19,14 @@
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
ssize_t
|
||||
preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
|
@ -19,14 +19,14 @@
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
ssize_t
|
||||
pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <windows.h>
|
||||
|
||||
ssize_t
|
||||
pread(int fd, void *buf, size_t size, off_t offset)
|
||||
pg_pread(int fd, void *buf, size_t size, off_t offset)
|
||||
{
|
||||
OVERLAPPED overlapped = {0};
|
||||
HANDLE handle;
|
||||
@ -30,6 +30,7 @@ pread(int fd, void *buf, size_t size, off_t offset)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Note that this changes the file position, despite not using it. */
|
||||
overlapped.Offset = offset;
|
||||
if (!ReadFile(handle, buf, size, &result, &overlapped))
|
||||
{
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <windows.h>
|
||||
|
||||
ssize_t
|
||||
pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
{
|
||||
OVERLAPPED overlapped = {0};
|
||||
HANDLE handle;
|
||||
@ -30,6 +30,7 @@ pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Note that this changes the file position, despite not using it. */
|
||||
overlapped.Offset = offset;
|
||||
if (!WriteFile(handle, buf, size, &result, &overlapped))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user