mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Speed up CREATE DATABASE by deferring the fsyncs until after copying
all the data and using posix_fadvise to nudge the OS into flushing it earlier. This also hopefully makes CREATE DATABASE avoid spamming the cache. Tests show a big speedup on Linux at least on some filesystems. Idea and patch from Andres Freund.
This commit is contained in:
parent
e26c539e9f
commit
f8c183a1ac
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.153 2010/01/12 02:42:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.154 2010/02/15 00:50:57 stark Exp $
|
||||||
*
|
*
|
||||||
* NOTES:
|
* NOTES:
|
||||||
*
|
*
|
||||||
@ -319,6 +319,22 @@ pg_fdatasync(int fd)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_flush_data --- advise OS that the data described won't be needed soon
|
||||||
|
*
|
||||||
|
* Not all platforms have posix_fadvise; treat as noop if not available.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
pg_flush_data(int fd, off_t offset, off_t amount)
|
||||||
|
{
|
||||||
|
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
||||||
|
return posix_fadvise(fd, offset, amount, POSIX_FADV_DONTNEED);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* InitFileAccess --- initialize this module during backend startup
|
* InitFileAccess --- initialize this module during backend startup
|
||||||
*
|
*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.66 2010/01/02 16:58:08 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.67 2010/02/15 00:50:57 stark Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -98,6 +98,7 @@ extern int pg_fsync(int fd);
|
|||||||
extern int pg_fsync_no_writethrough(int fd);
|
extern int pg_fsync_no_writethrough(int fd);
|
||||||
extern int pg_fsync_writethrough(int fd);
|
extern int pg_fsync_writethrough(int fd);
|
||||||
extern int pg_fdatasync(int fd);
|
extern int pg_fdatasync(int fd);
|
||||||
|
extern int pg_flush_data(int fd, off_t offset, off_t amount);
|
||||||
|
|
||||||
/* Filename components for OpenTemporaryFile */
|
/* Filename components for OpenTemporaryFile */
|
||||||
#define PG_TEMP_FILES_DIR "pgsql_tmp"
|
#define PG_TEMP_FILES_DIR "pgsql_tmp"
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* as a service.
|
* as a service.
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.25 2010/02/14 17:50:52 stark Exp $
|
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.26 2010/02/15 00:50:57 stark Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
|
|
||||||
static void copy_file(char *fromfile, char *tofile);
|
static void copy_file(char *fromfile, char *tofile);
|
||||||
|
static void fsync_fname(char *fname);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -91,27 +92,32 @@ copydir(char *fromdir, char *todir, bool recurse)
|
|||||||
copy_file(fromfile, tofile);
|
copy_file(fromfile, tofile);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeDir(xldir);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* fsync the directory to make sure not just the data but also the
|
* Be paranoid here and fsync all files to ensure we catch problems.
|
||||||
* new directory file entries have reached the disk. While needed
|
|
||||||
* by most filesystems, the window got bigger with newer ones like
|
|
||||||
* ext4.
|
|
||||||
*/
|
*/
|
||||||
dirfd = BasicOpenFile(todir,
|
if (xldir == NULL)
|
||||||
O_RDONLY | PG_BINARY,
|
|
||||||
S_IRUSR | S_IWUSR);
|
|
||||||
if(dirfd == -1)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not open directory for fsync \"%s\": %m", todir)));
|
|
||||||
|
|
||||||
if(pg_fsync(dirfd) == -1)
|
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not fsync directory \"%s\": %m", todir)));
|
errmsg("could not open directory \"%s\": %m", fromdir)));
|
||||||
close(dirfd);
|
|
||||||
|
while ((xlde = ReadDir(xldir, fromdir)) != NULL)
|
||||||
|
{
|
||||||
|
if (strcmp(xlde->d_name, ".") == 0 ||
|
||||||
|
strcmp(xlde->d_name, "..") == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name);
|
||||||
|
fsync_fname(tofile);
|
||||||
|
}
|
||||||
|
FreeDir(xldir);
|
||||||
|
|
||||||
|
/* It's important to fsync the destination directory itself as
|
||||||
|
* individual file fsyncs don't guarantee that the directory entry
|
||||||
|
* for the file is synced. Recent versions of ext4 have made the
|
||||||
|
* window much wider but it's been true for ext3 and other
|
||||||
|
* filesyetems in the past
|
||||||
|
*/
|
||||||
|
fsync_fname(todir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -124,6 +130,7 @@ copy_file(char *fromfile, char *tofile)
|
|||||||
int srcfd;
|
int srcfd;
|
||||||
int dstfd;
|
int dstfd;
|
||||||
int nbytes;
|
int nbytes;
|
||||||
|
off_t offset;
|
||||||
|
|
||||||
/* Use palloc to ensure we get a maxaligned buffer */
|
/* Use palloc to ensure we get a maxaligned buffer */
|
||||||
#define COPY_BUF_SIZE (8 * BLCKSZ)
|
#define COPY_BUF_SIZE (8 * BLCKSZ)
|
||||||
@ -149,7 +156,7 @@ copy_file(char *fromfile, char *tofile)
|
|||||||
/*
|
/*
|
||||||
* Do the data copying.
|
* Do the data copying.
|
||||||
*/
|
*/
|
||||||
for (;;)
|
for (offset=0; ; offset+=nbytes)
|
||||||
{
|
{
|
||||||
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
|
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
|
||||||
if (nbytes < 0)
|
if (nbytes < 0)
|
||||||
@ -168,15 +175,14 @@ copy_file(char *fromfile, char *tofile)
|
|||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not write to file \"%s\": %m", tofile)));
|
errmsg("could not write to file \"%s\": %m", tofile)));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Be paranoid here to ensure we catch problems.
|
* We fsync the files later but first flush them to avoid spamming
|
||||||
*/
|
* the cache and hopefully get the kernel to start writing them
|
||||||
if (pg_fsync(dstfd) != 0)
|
* out before the fsync comes.
|
||||||
ereport(ERROR,
|
*/
|
||||||
(errcode_for_file_access(),
|
pg_flush_data(dstfd, offset, nbytes);
|
||||||
errmsg("could not fsync file \"%s\": %m", tofile)));
|
}
|
||||||
|
|
||||||
if (close(dstfd))
|
if (close(dstfd))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
@ -187,3 +193,27 @@ copy_file(char *fromfile, char *tofile)
|
|||||||
|
|
||||||
pfree(buffer);
|
pfree(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fsync a file
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
fsync_fname(char *fname)
|
||||||
|
{
|
||||||
|
int fd = BasicOpenFile(fname,
|
||||||
|
O_RDONLY | PG_BINARY,
|
||||||
|
S_IRUSR | S_IWUSR);
|
||||||
|
|
||||||
|
if (fd < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not open file \"%s\": %m", fname)));
|
||||||
|
|
||||||
|
if (pg_fsync(fd) != 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not fsync file \"%s\": %m", fname)));
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user