postgresql/contrib/tablefunc/tablefunc--1.0.sql

86 lines
2.0 KiB
MySQL
Raw Normal View History

/* contrib/tablefunc/tablefunc--1.0.sql */
CREATE FUNCTION normal_rand(int4, float8, float8)
RETURNS setof float8
AS 'MODULE_PATHNAME','normal_rand'
LANGUAGE C VOLATILE STRICT;
-- the generic crosstab function:
CREATE FUNCTION crosstab(text)
RETURNS setof record
AS 'MODULE_PATHNAME','crosstab'
LANGUAGE C STABLE STRICT;
-- examples of building custom type-specific crosstab functions:
CREATE TYPE tablefunc_crosstab_2 AS
(
row_name TEXT,
category_1 TEXT,
category_2 TEXT
);
CREATE TYPE tablefunc_crosstab_3 AS
(
row_name TEXT,
category_1 TEXT,
category_2 TEXT,
category_3 TEXT
);
CREATE TYPE tablefunc_crosstab_4 AS
(
row_name TEXT,
category_1 TEXT,
category_2 TEXT,
category_3 TEXT,
category_4 TEXT
);
CREATE FUNCTION crosstab2(text)
RETURNS setof tablefunc_crosstab_2
AS 'MODULE_PATHNAME','crosstab'
LANGUAGE C STABLE STRICT;
CREATE FUNCTION crosstab3(text)
RETURNS setof tablefunc_crosstab_3
AS 'MODULE_PATHNAME','crosstab'
LANGUAGE C STABLE STRICT;
CREATE FUNCTION crosstab4(text)
RETURNS setof tablefunc_crosstab_4
AS 'MODULE_PATHNAME','crosstab'
LANGUAGE C STABLE STRICT;
-- obsolete:
CREATE FUNCTION crosstab(text,int)
RETURNS setof record
AS 'MODULE_PATHNAME','crosstab'
LANGUAGE C STABLE STRICT;
CREATE FUNCTION crosstab(text,text)
Attached is an update to contrib/tablefunc. It implements a new hashed version of crosstab. This fixes a major deficiency in real-world use of the original version. Easiest to undestand with an illustration: Data: ------------------------------------------------------------------- select * from cth; id | rowid | rowdt | attribute | val ----+-------+---------------------+----------------+--------------- 1 | test1 | 2003-03-01 00:00:00 | temperature | 42 2 | test1 | 2003-03-01 00:00:00 | test_result | PASS 3 | test1 | 2003-03-01 00:00:00 | volts | 2.6987 4 | test2 | 2003-03-02 00:00:00 | temperature | 53 5 | test2 | 2003-03-02 00:00:00 | test_result | FAIL 6 | test2 | 2003-03-02 00:00:00 | test_startdate | 01 March 2003 7 | test2 | 2003-03-02 00:00:00 | volts | 3.1234 (7 rows) Original crosstab: ------------------------------------------------------------------- SELECT * FROM crosstab( 'SELECT rowid, attribute, val FROM cth ORDER BY 1,2',4) AS c(rowid text, temperature text, test_result text, test_startdate text, volts text); rowid | temperature | test_result | test_startdate | volts -------+-------------+-------------+----------------+-------- test1 | 42 | PASS | 2.6987 | test2 | 53 | FAIL | 01 March 2003 | 3.1234 (2 rows) Hashed crosstab: ------------------------------------------------------------------- SELECT * FROM crosstab( 'SELECT rowid, attribute, val FROM cth ORDER BY 1', 'SELECT DISTINCT attribute FROM cth ORDER BY 1') AS c(rowid text, temperature int4, test_result text, test_startdate timestamp, volts float8); rowid | temperature | test_result | test_startdate | volts -------+-------------+-------------+---------------------+-------- test1 | 42 | PASS | | 2.6987 test2 | 53 | FAIL | 2003-03-01 00:00:00 | 3.1234 (2 rows) Notice that the original crosstab slides data over to the left in the result tuple when it encounters missing data. In order to work around this you have to be make your source sql do all sorts of contortions (cartesian join of distinct rowid with distinct attribute; left join that back to the real source data). The new version avoids this by building a hash table using a second distinct attribute query. The new version also allows for "extra" columns (see the README) and allows the result columns to be coerced into differing datatypes if they are suitable (as shown above). In testing a "real-world" data set (69 distinct rowid's, 27 distinct categories/attributes, multiple missing data points) I saw about a 5-fold improvement in execution time (from about 2200 ms old, to 440 ms new). I left the original version intact because: 1) BC, 2) it is probably slightly faster if you know that you have no missing attributes. README and regression test adjustments included. If there are no objections, please apply. Joe Conway
2003-03-20 14:46:30 +08:00
RETURNS setof record
AS 'MODULE_PATHNAME','crosstab_hash'
LANGUAGE C STABLE STRICT;
Attached is an update to contrib/tablefunc. It implements a new hashed version of crosstab. This fixes a major deficiency in real-world use of the original version. Easiest to undestand with an illustration: Data: ------------------------------------------------------------------- select * from cth; id | rowid | rowdt | attribute | val ----+-------+---------------------+----------------+--------------- 1 | test1 | 2003-03-01 00:00:00 | temperature | 42 2 | test1 | 2003-03-01 00:00:00 | test_result | PASS 3 | test1 | 2003-03-01 00:00:00 | volts | 2.6987 4 | test2 | 2003-03-02 00:00:00 | temperature | 53 5 | test2 | 2003-03-02 00:00:00 | test_result | FAIL 6 | test2 | 2003-03-02 00:00:00 | test_startdate | 01 March 2003 7 | test2 | 2003-03-02 00:00:00 | volts | 3.1234 (7 rows) Original crosstab: ------------------------------------------------------------------- SELECT * FROM crosstab( 'SELECT rowid, attribute, val FROM cth ORDER BY 1,2',4) AS c(rowid text, temperature text, test_result text, test_startdate text, volts text); rowid | temperature | test_result | test_startdate | volts -------+-------------+-------------+----------------+-------- test1 | 42 | PASS | 2.6987 | test2 | 53 | FAIL | 01 March 2003 | 3.1234 (2 rows) Hashed crosstab: ------------------------------------------------------------------- SELECT * FROM crosstab( 'SELECT rowid, attribute, val FROM cth ORDER BY 1', 'SELECT DISTINCT attribute FROM cth ORDER BY 1') AS c(rowid text, temperature int4, test_result text, test_startdate timestamp, volts float8); rowid | temperature | test_result | test_startdate | volts -------+-------------+-------------+---------------------+-------- test1 | 42 | PASS | | 2.6987 test2 | 53 | FAIL | 2003-03-01 00:00:00 | 3.1234 (2 rows) Notice that the original crosstab slides data over to the left in the result tuple when it encounters missing data. In order to work around this you have to be make your source sql do all sorts of contortions (cartesian join of distinct rowid with distinct attribute; left join that back to the real source data). The new version avoids this by building a hash table using a second distinct attribute query. The new version also allows for "extra" columns (see the README) and allows the result columns to be coerced into differing datatypes if they are suitable (as shown above). In testing a "real-world" data set (69 distinct rowid's, 27 distinct categories/attributes, multiple missing data points) I saw about a 5-fold improvement in execution time (from about 2200 ms old, to 440 ms new). I left the original version intact because: 1) BC, 2) it is probably slightly faster if you know that you have no missing attributes. README and regression test adjustments included. If there are no objections, please apply. Joe Conway
2003-03-20 14:46:30 +08:00
CREATE FUNCTION connectby(text,text,text,text,int,text)
RETURNS setof record
AS 'MODULE_PATHNAME','connectby_text'
LANGUAGE C STABLE STRICT;
CREATE FUNCTION connectby(text,text,text,text,int)
RETURNS setof record
AS 'MODULE_PATHNAME','connectby_text'
LANGUAGE C STABLE STRICT;
-- These 2 take the name of a field to ORDER BY as 4th arg (for sorting siblings)
CREATE FUNCTION connectby(text,text,text,text,text,int,text)
RETURNS setof record
AS 'MODULE_PATHNAME','connectby_text_serial'
LANGUAGE C STABLE STRICT;
CREATE FUNCTION connectby(text,text,text,text,text,int)
RETURNS setof record
AS 'MODULE_PATHNAME','connectby_text_serial'
LANGUAGE C STABLE STRICT;