mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Speed up CREATE DATABASE by deferring the fsyncs until after copying
all the data and using posix_fadvise to nudge the OS into flushing it earlier. This also hopefully makes CREATE DATABASE avoid spamming the cache. Tests show a big speedup on Linux at least on some filesystems. Idea and patch from Andres Freund.
This commit is contained in:
parent
e26c539e9f
commit
f8c183a1ac
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.153 2010/01/12 02:42:52 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.154 2010/02/15 00:50:57 stark Exp $
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
@ -319,6 +319,22 @@ pg_fdatasync(int fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_flush_data --- advise OS that the data described won't be needed soon
|
||||
*
|
||||
* Not all platforms have posix_fadvise; treat as noop if not available.
|
||||
*/
|
||||
int
|
||||
pg_flush_data(int fd, off_t offset, off_t amount)
|
||||
{
|
||||
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
||||
return posix_fadvise(fd, offset, amount, POSIX_FADV_DONTNEED);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InitFileAccess --- initialize this module during backend startup
|
||||
*
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.66 2010/01/02 16:58:08 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.67 2010/02/15 00:50:57 stark Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -98,6 +98,7 @@ extern int pg_fsync(int fd);
|
||||
extern int pg_fsync_no_writethrough(int fd);
|
||||
extern int pg_fsync_writethrough(int fd);
|
||||
extern int pg_fdatasync(int fd);
|
||||
extern int pg_flush_data(int fd, off_t offset, off_t amount);
|
||||
|
||||
/* Filename components for OpenTemporaryFile */
|
||||
#define PG_TEMP_FILES_DIR "pgsql_tmp"
|
||||
|
@ -11,7 +11,7 @@
|
||||
* as a service.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.25 2010/02/14 17:50:52 stark Exp $
|
||||
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.26 2010/02/15 00:50:57 stark Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -37,6 +37,7 @@
|
||||
|
||||
|
||||
static void copy_file(char *fromfile, char *tofile);
|
||||
static void fsync_fname(char *fname);
|
||||
|
||||
|
||||
/*
|
||||
@ -91,27 +92,32 @@ copydir(char *fromdir, char *todir, bool recurse)
|
||||
copy_file(fromfile, tofile);
|
||||
}
|
||||
|
||||
/*
|
||||
* Be paranoid here and fsync all files to ensure we catch problems.
|
||||
*/
|
||||
if (xldir == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory \"%s\": %m", fromdir)));
|
||||
|
||||
while ((xlde = ReadDir(xldir, fromdir)) != NULL)
|
||||
{
|
||||
if (strcmp(xlde->d_name, ".") == 0 ||
|
||||
strcmp(xlde->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name);
|
||||
fsync_fname(tofile);
|
||||
}
|
||||
FreeDir(xldir);
|
||||
|
||||
/*
|
||||
* fsync the directory to make sure not just the data but also the
|
||||
* new directory file entries have reached the disk. While needed
|
||||
* by most filesystems, the window got bigger with newer ones like
|
||||
* ext4.
|
||||
/* It's important to fsync the destination directory itself as
|
||||
* individual file fsyncs don't guarantee that the directory entry
|
||||
* for the file is synced. Recent versions of ext4 have made the
|
||||
* window much wider but it's been true for ext3 and other
|
||||
* filesyetems in the past
|
||||
*/
|
||||
dirfd = BasicOpenFile(todir,
|
||||
O_RDONLY | PG_BINARY,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if(dirfd == -1)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open directory for fsync \"%s\": %m", todir)));
|
||||
|
||||
if(pg_fsync(dirfd) == -1)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync directory \"%s\": %m", todir)));
|
||||
close(dirfd);
|
||||
fsync_fname(todir);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -124,6 +130,7 @@ copy_file(char *fromfile, char *tofile)
|
||||
int srcfd;
|
||||
int dstfd;
|
||||
int nbytes;
|
||||
off_t offset;
|
||||
|
||||
/* Use palloc to ensure we get a maxaligned buffer */
|
||||
#define COPY_BUF_SIZE (8 * BLCKSZ)
|
||||
@ -149,7 +156,7 @@ copy_file(char *fromfile, char *tofile)
|
||||
/*
|
||||
* Do the data copying.
|
||||
*/
|
||||
for (;;)
|
||||
for (offset=0; ; offset+=nbytes)
|
||||
{
|
||||
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
|
||||
if (nbytes < 0)
|
||||
@ -168,15 +175,14 @@ copy_file(char *fromfile, char *tofile)
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to file \"%s\": %m", tofile)));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Be paranoid here to ensure we catch problems.
|
||||
* We fsync the files later but first flush them to avoid spamming
|
||||
* the cache and hopefully get the kernel to start writing them
|
||||
* out before the fsync comes.
|
||||
*/
|
||||
if (pg_fsync(dstfd) != 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync file \"%s\": %m", tofile)));
|
||||
pg_flush_data(dstfd, offset, nbytes);
|
||||
}
|
||||
|
||||
if (close(dstfd))
|
||||
ereport(ERROR,
|
||||
@ -187,3 +193,27 @@ copy_file(char *fromfile, char *tofile)
|
||||
|
||||
pfree(buffer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* fsync a file
|
||||
*/
|
||||
static void
|
||||
fsync_fname(char *fname)
|
||||
{
|
||||
int fd = BasicOpenFile(fname,
|
||||
O_RDONLY | PG_BINARY,
|
||||
S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m", fname)));
|
||||
|
||||
if (pg_fsync(fd) != 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync file \"%s\": %m", fname)));
|
||||
close(fd);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user