diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index 241c02587bc..b2bd8794d2a 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions + parallel cleanup oldextversions squashing # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out new file mode 100644 index 00000000000..55aa5109433 --- /dev/null +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -0,0 +1,464 @@ +-- +-- Const squashing functionality +-- +CREATE EXTENSION pg_stat_statements; +CREATE TABLE test_squash (id int, data int); +-- IN queries +-- No squashing is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1 + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(4 rows) + +-- Normal scenario, too many simple constants for an IN query +SET query_id_squash_values = on; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT * FROM test_squash WHERE id IN ($1) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 4 + SELECT * FROM test_squash WHERE id IN ($1) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) AND data = $2 | 3 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Multiple squashed intervals +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)+| 3 + AND data IN ($2 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if squashed, even though +-- the normalized query will be the same +SELECT * FROM test_squash WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN +| 1 + ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | + SELECT * FROM test_squash WHERE id IN +| 1 + (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +-- FuncExpr +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT data FROM test_float WHERE data IN (1, 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1, '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1.0, 1.0); + data +------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------------------------------------------------------+------- + SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Numeric type, implicit cast is squashed +CREATE TABLE test_squash_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------------------------------------------------------------+------- + SELECT * FROM test_squash_numeric WHERE data IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, implicit cast is squashed +CREATE TABLE test_squash_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE data IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, explicit cast is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE data IN +| 1 + ($1 /*, ... */::bigint) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE id IN +| 1 + (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| + abs($8), abs($9), abs($10), ((abs($11)))) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_squash_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_squash_jsonb WHERE data IN +| 1 + ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| + (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| + (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| + (SELECT $10)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: return type casttesttype is only a shell +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: argument type casttesttype is only a shell +LINE 1: CREATE FUNCTION casttesttype_out(casttesttype) + ^ +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); +CREATE CAST (int4 AS casttesttype) WITH INOUT; +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); +CREATE TABLE test_squash_cast (id int, data casttesttype); +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_cast WHERE data IN +| 1 + ($1 /*, ... */::int4::casttesttype) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_jsonb WHERE data IN +| 1 + (($1 /*, ... */)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_squash +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + result +-------- +(0 rows) + +-- Simple array would be squashed as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + array +------------------------ + {1,2,3,4,5,6,7,8,9,10} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +RESET query_id_squash_values; diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 4446af58c58..01a6cbdcf61 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -56,6 +56,7 @@ tests += { 'parallel', 'cleanup', 'oldextversions', + 'squashing', ], 'regress_args': ['--temp-config', files('pg_stat_statements.conf')], # Disabled because these tests require diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index b245d04097d..8ab9ad58e1c 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -296,7 +296,6 @@ static bool pgss_track_planning = false; /* whether to track planning * duration */ static bool pgss_save = true; /* whether to save stats across shutdown */ - #define pgss_enabled(level) \ (!IsParallelWorker() && \ (pgss_track == PGSS_TRACK_ALL || \ @@ -2823,6 +2822,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool in_squashed = false; /* in a run of squashed consts? */ + int skipped_constants = 0; /* Position adjustment of later + * constants after squashed ones */ + /* * Get constants' lengths (core system only gives us locations). Note @@ -2836,6 +2839,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We * could refine that limit based on the max value of n for the current * query, but it hardly seems worth any extra effort to do so. + * + * Note this also gives enough room for the commented-out ", ..." list + * syntax used by constant squashing. */ norm_query_buflen = query_len + jstate->clocations_count * 10; @@ -2848,6 +2854,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, tok_len; /* Length (in bytes) of that tok */ off = jstate->clocations[i].location; + /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2856,18 +2863,67 @@ generate_normalized_query(JumbleState *jstate, const char *query, if (tok_len < 0) continue; /* ignore any duplicates */ - /* Copy next chunk (what precedes the next constant) */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; + /* + * What to do next depends on whether we're squashing constant lists, + * and whether we're already in a run of such constants. + */ + if (!jstate->clocations[i].squashed) + { + /* + * This location corresponds to a constant not to be squashed. + * Print what comes before the constant ... + */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; - Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; + Assert(len_to_wrt >= 0); - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + /* ... and then a param symbol replacing the constant itself */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id - skipped_constants); + + /* In case previous constants were merged away, stop doing that */ + in_squashed = false; + } + else if (!in_squashed) + { + /* + * This location is the start position of a run of constants to be + * squashed, so we need to print the representation of starting a + * group of stashed constants. + * + * Print what comes before the constant ... + */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + Assert(len_to_wrt >= 0); + Assert(i + 1 < jstate->clocations_count); + Assert(jstate->clocations[i + 1].squashed); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* ... and then start a run of squashed constants */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", + i + 1 + jstate->highest_extern_param_id - skipped_constants); + + /* The next location will match the block below, to end the run */ + in_squashed = true; + + skipped_constants++; + } + else + { + /* + * The second location of a run of squashable elements; this + * indicates its end. + */ + in_squashed = false; + } + + /* Otherwise the constant is squashed away -- move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql new file mode 100644 index 00000000000..56ee8ccb9a1 --- /dev/null +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -0,0 +1,180 @@ +-- +-- Const squashing functionality +-- +CREATE EXTENSION pg_stat_statements; + +CREATE TABLE test_squash (id int, data int); + +-- IN queries + +-- No squashing is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Normal scenario, too many simple constants for an IN query +SET query_id_squash_values = on; + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1); +SELECT * FROM test_squash WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Multiple squashed intervals +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if squashed, even though +-- the normalized query will be the same +SELECT * FROM test_squash WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); +SELECT * FROM test_squash WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- FuncExpr + +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT data FROM test_float WHERE data IN (1, 2); +SELECT data FROM test_float WHERE data IN (1, '2'); +SELECT data FROM test_float WHERE data IN ('1', 2); +SELECT data FROM test_float WHERE data IN ('1', '2'); +SELECT data FROM test_float WHERE data IN (1.0, 1.0); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Numeric type, implicit cast is squashed +CREATE TABLE test_squash_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, implicit cast is squashed +CREATE TABLE test_squash_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, explicit cast is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_squash_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO + +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; + +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); + +CREATE CAST (int4 AS casttesttype) WITH INOUT; + +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; + +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); + +CREATE TABLE test_squash_cast (id int, data casttesttype); + +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_squash +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + +-- Simple array would be squashed as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +RESET query_id_squash_values; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index cd889142773..9e9c02cde83 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8701,6 +8701,36 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + query_id_squash_values (bool) + + query_id_squash_values configuration parameter + + + + + Specifies how a list of constants (e.g., for an IN + clause) contributes to the query identifier computation. + Normally, every element of such a list contributes to the query + identifier separately, which means that two queries that only differ + in the number of elements in such a list would get different query + identifiers. + If this parameter is on, a list of constants will not contribute + to the query identifier. This means that two queries whose only + difference is the number of constants in such a list are going to get the + same query identifier. + + + Only constants are affected; bind parameters do not benefit from this + functionality. The default value is off. + + + This parameter also affects how + generates normalized query texts. + + + + log_statement_stats (boolean) diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index e2ac1c2d501..f4e384e95ae 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -630,9 +630,27 @@ In some cases, queries with visibly different texts might get merged into a - single pg_stat_statements entry. Normally this will happen - only for semantically equivalent queries, but there is a small chance of - hash collisions causing unrelated queries to be merged into one entry. + single pg_stat_statements entry; as explained above, + this is expected to happen for semantically equivalent queries. + In addition, if query_id_squash_values is enabled + and the only difference between queries is the number of elements in a list + of constants, the list will get squashed down to a single element but shown + with a commented-out list indicator: + + +=# SET query_id_squash_values = on; +=# SELECT pg_stat_statements_reset(); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8); +=# SELECT query, calls FROM pg_stat_statements + WHERE query LIKE 'SELECT%'; +-[ RECORD 1 ]------------------------------ +query | SELECT * FROM test WHERE a IN ($1 /*, ... */) +calls | 2 + + + In addition to these cases, there is a small chance of hash collisions + causing unrelated queries to be merged into one entry. (This cannot happen for queries belonging to different users or databases, however.) diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 1a657f7e0ae..7e3f335ac09 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -476,6 +476,7 @@ foreach my $infile (@ARGV) equal_ignore_if_zero query_jumble_ignore query_jumble_location + query_jumble_squash read_write_ignore write_only_relids write_only_nondefault_pathtarget @@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node) my @a = @{ $node_type_info{$n}->{field_attrs}{$f} }; my $query_jumble_ignore = $struct_no_query_jumble; my $query_jumble_location = 0; + my $query_jumble_squash = 0; # extract per-field attributes foreach my $a (@a) @@ -1295,14 +1297,27 @@ _jumble${n}(JumbleState *jstate, Node *node) { $query_jumble_location = 1; } + elsif ($a eq 'query_jumble_squash') + { + $query_jumble_squash = 1; + } } # node type if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/) and elem $1, @node_types) { - print $jff "\tJUMBLE_NODE($f);\n" - unless $query_jumble_ignore; + # Squash constants if requested. + if ($query_jumble_squash) + { + print $jff "\tJUMBLE_ELEMENTS($f);\n" + unless $query_jumble_ignore; + } + else + { + print $jff "\tJUMBLE_NODE($f);\n" + unless $query_jumble_ignore; + } } elsif ($t eq 'ParseLoc') { diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index b103a281936..189bfda610a 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -32,9 +32,13 @@ */ #include "postgres.h" +#include "access/transam.h" +#include "catalog/pg_proc.h" #include "common/hashfn.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "nodes/queryjumble.h" +#include "utils/lsyscache.h" #include "parser/scansup.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -42,6 +46,9 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +/* Whether to merge constants in a list when computing query_id */ +bool query_id_squash_values = false; + /* * True when compute_query_id is ON or AUTO, and a module requests them. * @@ -53,8 +60,10 @@ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); -static void RecordConstLocation(JumbleState *jstate, int location); +static void RecordConstLocation(JumbleState *jstate, + int location, bool merged); static void _jumbleNode(JumbleState *jstate, Node *node); +static void _jumbleElements(JumbleState *jstate, List *elements); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) } /* - * Record location of constant within query string of query tree - * that is currently being walked. + * Record location of constant within query string of query tree that is + * currently being walked. + * + * 'squashed' signals that the constant represents the first or the last + * element in a series of merged constants, and everything but the first/last + * element contributes nothing to the jumble hash. */ static void -RecordConstLocation(JumbleState *jstate, int location) +RecordConstLocation(JumbleState *jstate, int location, bool squashed) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -218,15 +231,99 @@ RecordConstLocation(JumbleState *jstate, int location) } jstate->clocations[jstate->clocations_count].location = location; /* initialize lengths to -1 to simplify third-party module usage */ + jstate->clocations[jstate->clocations_count].squashed = squashed; jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; } } +/* + * Subroutine for _jumbleElements: Verify a few simple cases where we can + * deduce that the expression is a constant: + * + * - Ignore a possible wrapping RelabelType and CoerceViaIO. + * - If it's a FuncExpr, check that the function is an implicit + * cast and its arguments are Const. + * - Otherwise test if the expression is a simple Const. + */ +static bool +IsSquashableConst(Node *element) +{ + if (IsA(element, RelabelType)) + element = (Node *) ((RelabelType *) element)->arg; + + if (IsA(element, CoerceViaIO)) + element = (Node *) ((CoerceViaIO *) element)->arg; + + if (IsA(element, FuncExpr)) + { + FuncExpr *func = (FuncExpr *) element; + ListCell *temp; + + if (func->funcformat != COERCE_IMPLICIT_CAST && + func->funcformat != COERCE_EXPLICIT_CAST) + return false; + + if (func->funcid > FirstGenbkiObjectId) + return false; + + foreach(temp, func->args) + { + Node *arg = lfirst(temp); + + if (!IsA(arg, Const)) /* XXX we could recurse here instead */ + return false; + } + + return true; + } + + if (!IsA(element, Const)) + return false; + + return true; +} + +/* + * Subroutine for _jumbleElements: Verify whether the provided list + * can be squashed, meaning it contains only constant expressions. + * + * Return value indicates if squashing is possible. + * + * Note that this function searches only for explicit Const nodes with + * possibly very simple decorations on top, and does not try to simplify + * expressions. + */ +static bool +IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +{ + ListCell *temp; + + /* + * If squashing is disabled, or the list is too short, we don't try to + * squash it. + */ + if (!query_id_squash_values || list_length(elements) < 2) + return false; + + foreach(temp, elements) + { + if (!IsSquashableConst(lfirst(temp))) + return false; + } + + *firstExpr = linitial(elements); + *lastExpr = llast(elements); + + return true; +} + #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) +#define JUMBLE_ELEMENTS(list) \ + _jumbleElements(jstate, (List *) expr->list) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location) + RecordConstLocation(jstate, expr->location, false) #define JUMBLE_FIELD(item) \ AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) #define JUMBLE_FIELD_SINGLE(item) \ @@ -239,6 +336,45 @@ do { \ #include "queryjumblefuncs.funcs.c" +/* + * When query_id_squash_values is enabled, we jumble lists of constant + * elements as one individual item regardless of how many elements are + * in the list. This means different queries jumble to the same query_id, + * if the only difference is the number of elements in the list. + * + * If query_id_squash_values is disabled or the list is not "simple + * enough", we jumble each element normally. + */ +static void +_jumbleElements(JumbleState *jstate, List *elements) +{ + Node *first, + *last; + + if (IsSquashableConstList(elements, &first, &last)) + { + /* + * If this list of elements is squashable, keep track of the location + * of its first and last elements. When reading back the locations + * array, we'll see two consecutive locations with ->squashed set to + * true, indicating the location of initial and final elements of this + * list. + * + * For the limited set of cases we support now (implicit coerce via + * FuncExpr, Const) it's fine to use exprLocation of the 'last' + * expression, but if more complex composite expressions are to be + * supported (e.g., OpExpr or FuncExpr as an explicit call), more + * sophisticated tracking will be needed. + */ + RecordConstLocation(jstate, exprLocation(first), true); + RecordConstLocation(jstate, exprLocation(last), true); + } + else + { + _jumbleNode(jstate, (Node *) elements); + } +} + static void _jumbleNode(JumbleState *jstate, Node *node) { diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index bf6b55ee830..9e6c900d146 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -116,6 +116,7 @@ typedef struct bool redirection_done; bool IsBinaryUpgrade; bool query_id_enabled; + bool query_id_squash_values; int max_safe_fds; int MaxBackends; int num_pmchild_slots; @@ -777,6 +778,7 @@ save_backend_variables(BackendParameters *param, param->redirection_done = redirection_done; param->IsBinaryUpgrade = IsBinaryUpgrade; param->query_id_enabled = query_id_enabled; + param->query_id_squash_values = query_id_squash_values; param->max_safe_fds = max_safe_fds; param->MaxBackends = MaxBackends; @@ -1037,6 +1039,7 @@ restore_backend_variables(BackendParameters *param) redirection_done = param->redirection_done; IsBinaryUpgrade = param->IsBinaryUpgrade; query_id_enabled = param->query_id_enabled; + query_id_squash_values = param->query_id_squash_values; max_safe_fds = param->max_safe_fds; MaxBackends = param->MaxBackends; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index c89316ce294..60a40ed445a 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2120,6 +2120,16 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"query_id_squash_values", PGC_USERSET, STATS_MONITORING, + gettext_noop("Allows to merge constants in a list when computing " + "query_id."), + }, + &query_id_squash_values, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4c55d0c1383..beb05a89501 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -659,6 +659,7 @@ # - Monitoring - #compute_query_id = auto +#query_id_squash_values = off #log_statement_stats = off #log_parser_stats = off #log_planner_stats = off diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 580238bfab1..d18044b4e65 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -105,6 +105,8 @@ typedef enum NodeTag * that typmod and collation information are usually irrelevant for the * query jumbling. * + * - query_jumble_squash: Squash multiple values during query jumbling. + * * - query_jumble_location: Mark the field as a location to track. This is * only allowed for integer fields that include "location" in their name. * diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index d0576da3e25..7d3b4198f26 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1394,7 +1394,7 @@ typedef struct ArrayExpr /* common type of array elements */ Oid element_typeid pg_node_attr(query_jumble_ignore); /* the array elements or sub-arrays */ - List *elements; + List *elements pg_node_attr(query_jumble_squash); /* true if elements are sub-arrays */ bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 50eb9566587..905f66bc0bd 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -23,6 +23,12 @@ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + + /* + * Indicates that this location represents the beginning or end of a run + * of squashed constants. + */ + bool squashed; } LocationLen; /* @@ -68,6 +74,7 @@ extern JumbleState *JumbleQuery(Query *query); extern void EnableQueryId(void); extern PGDLLIMPORT bool query_id_enabled; +extern PGDLLIMPORT bool query_id_squash_values; /* * Returns whether query identifier computation has been enabled, either