mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-24 18:55:04 +08:00
Add --sampling-rate option to pgbench.
This allows logging only some fraction of transactions, greatly reducing the amount of log generated. Tomas Vondra, reviewed by Robert Haas and Jeff Janes.
This commit is contained in:
parent
7ae1815961
commit
e1be1df51f
@ -129,6 +129,11 @@ int foreign_keys = 0;
|
||||
*/
|
||||
int unlogged_tables = 0;
|
||||
|
||||
/*
|
||||
* log sampling rate (1.0 = log everything, 0.0 = option not given)
|
||||
*/
|
||||
double sample_rate = 0.0;
|
||||
|
||||
/*
|
||||
* tablespace selection
|
||||
*/
|
||||
@ -370,6 +375,8 @@ usage(void)
|
||||
" -f FILENAME read transaction script from FILENAME\n"
|
||||
" -j NUM number of threads (default: 1)\n"
|
||||
" -l write transaction times to log file\n"
|
||||
" --sampling-rate NUM\n"
|
||||
" fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
|
||||
" -M simple|extended|prepared\n"
|
||||
" protocol for submitting queries to server (default: simple)\n"
|
||||
" -n do not run VACUUM before tests\n"
|
||||
@ -883,21 +890,30 @@ top:
|
||||
instr_time diff;
|
||||
double usec;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
diff = now;
|
||||
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
|
||||
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
|
||||
/*
|
||||
* write the log entry if this row belongs to the random sample,
|
||||
* or no sampling rate was given which means log everything.
|
||||
*/
|
||||
if (sample_rate == 0.0 ||
|
||||
pg_erand48(thread->random_state) <= sample_rate)
|
||||
{
|
||||
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
diff = now;
|
||||
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
|
||||
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
|
||||
|
||||
#ifndef WIN32
|
||||
/* This is more than we really ought to know about instr_time */
|
||||
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
|
||||
st->id, st->cnt, usec, st->use_file,
|
||||
(long) now.tv_sec, (long) now.tv_usec);
|
||||
/* This is more than we really ought to know about instr_time */
|
||||
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
|
||||
st->id, st->cnt, usec, st->use_file,
|
||||
(long) now.tv_sec, (long) now.tv_usec);
|
||||
#else
|
||||
/* On Windows, instr_time doesn't provide a timestamp anyway */
|
||||
fprintf(logfile, "%d %d %.0f %d 0 0\n",
|
||||
st->id, st->cnt, usec, st->use_file);
|
||||
/* On Windows, instr_time doesn't provide a timestamp anyway */
|
||||
fprintf(logfile, "%d %d %.0f %d 0 0\n",
|
||||
st->id, st->cnt, usec, st->use_file);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (commands[st->state]->type == SQL_COMMAND)
|
||||
@ -1926,6 +1942,7 @@ main(int argc, char **argv)
|
||||
{"index-tablespace", required_argument, NULL, 3},
|
||||
{"tablespace", required_argument, NULL, 2},
|
||||
{"unlogged-tables", no_argument, &unlogged_tables, 1},
|
||||
{"sampling-rate", required_argument, NULL, 4},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
@ -2131,6 +2148,14 @@ main(int argc, char **argv)
|
||||
case 3: /* index-tablespace */
|
||||
index_tablespace = optarg;
|
||||
break;
|
||||
case 4:
|
||||
sample_rate = atof(optarg);
|
||||
if (sample_rate <= 0.0 || sample_rate > 1.0)
|
||||
{
|
||||
fprintf(stderr, "invalid sampling rate: %f\n", sample_rate);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||
exit(1);
|
||||
@ -2166,6 +2191,13 @@ main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* --sampling-rate may be used only with -l */
|
||||
if (sample_rate > 0.0 && !use_log)
|
||||
{
|
||||
fprintf(stderr, "log sampling rate is allowed only when logging transactions (-l) \n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* is_latencies only works with multiple threads in thread-based
|
||||
* implementations, not fork-based ones, because it supposes that the
|
||||
|
@ -316,6 +316,24 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--sampling-rate</option> <replaceable>rate</></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Sampling rate, used when writing data into the log, to reduce the
|
||||
amount of log generated. If this option is given, only the specified
|
||||
fraction of transactions are logged. 1.0 means all transactions will
|
||||
be logged, 0.05 means only 5% of the transactions will be logged.
|
||||
</para>
|
||||
<para>
|
||||
Remember to take the sampling rate into account when processing the
|
||||
log file. For example, when computing tps values, you need to multiply
|
||||
the numbers accordingly (e.g. with 0.01 sample rate, you'll only get
|
||||
1/100 of the actual tps).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-M</option> <replaceable>querymode</></term>
|
||||
<listitem>
|
||||
@ -750,6 +768,12 @@ END;
|
||||
0 201 2513 0 1175850569 608
|
||||
0 202 2038 0 1175850569 2663
|
||||
</screen></para>
|
||||
|
||||
<para>
|
||||
When running a long test on hardware that can handle a lot of transactions,
|
||||
the log files can become very large. The <option>--sampling-rate</> option
|
||||
can be used to log only a random sample of transactions.
|
||||
</para>
|
||||
</refsect2>
|
||||
|
||||
<refsect2>
|
||||
|
Loading…
Reference in New Issue
Block a user