diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 84e58944ce..3a7cfa93c2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -901,25 +901,40 @@
width_bucket
- width_bucket(op numeric, b1 numeric, b2 numeric, count int)
-
+ width_bucket(operand dp, b1 dp, b2 dp, count int)
int
- return the bucket to which operand> would
- be assigned in an equidepth histogram with count>
- buckets, in the range b1> to b2>
+ return the bucket number to which operand> would
+ be assigned in a histogram having count> equal-width
+ buckets spanning the range b1> to b2>;
+ returns 0> or count>+1 for
+ an input outside the range
width_bucket(5.35, 0.024, 10.06, 5)
3
- width_bucket(op dp, b1 dp, b2 dp, count int)
+ width_bucket(operand numeric, b1 numeric, b2 numeric, count int)
int
- return the bucket to which operand> would
- be assigned in an equidepth histogram with count>
- buckets, in the range b1> to b2>
+ return the bucket number to which operand> would
+ be assigned in a histogram having count> equal-width
+ buckets spanning the range b1> to b2>;
+ returns 0> or count>+1 for
+ an input outside the range
width_bucket(5.35, 0.024, 10.06, 5)
3
+
+
+ width_bucket(operand anyelement, thresholds anyarray)
+ int
+ return the bucket number to which operand> would
+ be assigned given an array listing the lower bounds of the buckets;
+ returns 0> for an input less than the first lower bound;
+ the thresholds> array must be sorted>,
+ smallest first, or unexpected results will be obtained
+ width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])
+ 2
+
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index f8e94ec365..fafc852a4f 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -15,8 +15,10 @@
#include "postgres.h"
#include
+#include
#include "access/htup_details.h"
+#include "catalog/pg_type.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
@@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array,
Datum replace, bool replace_isnull,
bool remove, Oid collation,
FunctionCallInfo fcinfo);
+static int width_bucket_array_float8(Datum operand, ArrayType *thresholds);
+static int width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
+static int width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
/*
@@ -5502,3 +5513,235 @@ array_replace(PG_FUNCTION_ARGS)
fcinfo);
PG_RETURN_ARRAYTYPE_P(array);
}
+
+/*
+ * Implements width_bucket(anyelement, anyarray).
+ *
+ * 'thresholds' is an array containing lower bound values for each bucket;
+ * these must be sorted from smallest to largest, or bogus results will be
+ * produced. If N thresholds are supplied, the output is from 0 to N:
+ * 0 is for inputs < first threshold, N is for inputs >= last threshold.
+ */
+Datum
+width_bucket_array(PG_FUNCTION_ARGS)
+{
+ Datum operand = PG_GETARG_DATUM(0);
+ ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1);
+ Oid collation = PG_GET_COLLATION();
+ Oid element_type = ARR_ELEMTYPE(thresholds);
+ int result;
+
+ /* Check input */
+ if (ARR_NDIM(thresholds) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("thresholds must be one-dimensional array")));
+
+ if (array_contains_nulls(thresholds))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("thresholds array must not contain NULLs")));
+
+ /* We have a dedicated implementation for float8 data */
+ if (element_type == FLOAT8OID)
+ result = width_bucket_array_float8(operand, thresholds);
+ else
+ {
+ TypeCacheEntry *typentry;
+
+ /* Cache information about the input type */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ /*
+ * We have separate implementation paths for fixed- and variable-width
+ * types, since indexing the array is a lot cheaper in the first case.
+ */
+ if (typentry->typlen > 0)
+ result = width_bucket_array_fixed(operand, thresholds,
+ collation, typentry);
+ else
+ result = width_bucket_array_variable(operand, thresholds,
+ collation, typentry);
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(thresholds, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * width_bucket_array for float8 data.
+ */
+static int
+width_bucket_array_float8(Datum operand, ArrayType *thresholds)
+{
+ float8 op = DatumGetFloat8(operand);
+ float8 *thresholds_data;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (float8 *) ARR_DATA_PTR(thresholds);
+
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+
+ /*
+ * If the probe value is a NaN, it's greater than or equal to all possible
+ * threshold values (including other NaNs), so we need not search. Note
+ * that this would give the same result as searching even if the array
+ * contains multiple NaNs (as long as they're correctly sorted), since the
+ * loop logic will find the rightmost of multiple equal threshold values.
+ */
+ if (isnan(op))
+ return right;
+
+ /* Find the bucket */
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+
+ if (isnan(thresholds_data[mid]) || op < thresholds_data[mid])
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic fixed-width data types.
+ */
+static int
+width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int32 cmpresult;
+
+ ptr = thresholds_data + mid * typlen;
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic variable-width data types.
+ */
+static int
+width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ char typalign = typentry->typalign;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int i;
+ int32 cmpresult;
+
+ /* Locate mid'th array element by advancing from left element */
+ ptr = thresholds_data;
+ for (i = left; i < mid; i++)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ {
+ left = mid + 1;
+
+ /*
+ * Move the thresholds pointer to match new "left" index, so we
+ * don't have to seek over those elements again. This trick
+ * ensures we do only O(N) array indexing work, not O(N^2).
+ */
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ thresholds_data = (char *) att_align_nominal(ptr, typalign);
+ }
+ }
+
+ return left;
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index e1b62a505c..5dc0455477 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201408281
+#define CATALOG_VERSION_NO 201409091
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 5176ed04dd..bd1b41723c 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -514,7 +514,7 @@ DATA(insert OID = 308 ( float84le PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0
DATA(insert OID = 309 ( float84gt PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0 16 "701 700" _null_ _null_ _null_ _null_ float84gt _null_ _null_ _null_ ));
DATA(insert OID = 310 ( float84ge PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0 16 "701 700" _null_ _null_ _null_ _null_ float84ge _null_ _null_ _null_ ));
DATA(insert OID = 320 ( width_bucket PGNSP PGUID 12 1 0 0 0 f f f f t f i 4 0 23 "701 701 701 23" _null_ _null_ _null_ _null_ width_bucket_float8 _null_ _null_ _null_ ));
-DESCR("bucket number of operand in equidepth histogram");
+DESCR("bucket number of operand in equal-width histogram");
DATA(insert OID = 311 ( float8 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 701 "700" _null_ _null_ _null_ _null_ ftod _null_ _null_ _null_ ));
DESCR("convert float4 to float8");
@@ -885,6 +885,8 @@ DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f
DESCR("aggregate final function");
DATA(insert OID = 2335 ( array_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 2277 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
DESCR("concatenate aggregate input into an array");
+DATA(insert OID = 3218 ( width_bucket PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "2283 2277" _null_ _null_ _null_ _null_ width_bucket_array _null_ _null_ _null_ ));
+DESCR("bucket number of operand given a sorted array of bucket lower bounds");
DATA(insert OID = 3816 ( array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ ));
DESCR("array typanalyze");
DATA(insert OID = 3817 ( arraycontsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ ));
@@ -2301,7 +2303,7 @@ DESCR("trunc(x/y)");
DATA(insert OID = 1980 ( numeric_div_trunc PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1700 "1700 1700" _null_ _null_ _null_ _null_ numeric_div_trunc _null_ _null_ _null_ ));
DESCR("trunc(x/y)");
DATA(insert OID = 2170 ( width_bucket PGNSP PGUID 12 1 0 0 0 f f f f t f i 4 0 23 "1700 1700 1700 23" _null_ _null_ _null_ _null_ width_bucket_numeric _null_ _null_ _null_ ));
-DESCR("bucket number of operand in equidepth histogram");
+DESCR("bucket number of operand in equal-width histogram");
DATA(insert OID = 1747 ( time_pl_interval PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1083 "1083 1186" _null_ _null_ _null_ _null_ time_pl_interval _null_ _null_ _null_ ));
DATA(insert OID = 1748 ( time_mi_interval PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1083 "1083 1186" _null_ _null_ _null_ _null_ time_mi_interval _null_ _null_ _null_ ));
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 9bbfaae85e..e744314c51 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -214,6 +214,7 @@ extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
extern Datum array_unnest(PG_FUNCTION_ARGS);
extern Datum array_remove(PG_FUNCTION_ARGS);
extern Datum array_replace(PG_FUNCTION_ARGS);
+extern Datum width_bucket_array(PG_FUNCTION_ARGS);
extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
int arraytyplen, int elmlen, bool elmbyval, char elmalign,
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 4286691f92..46eff67b42 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1706,3 +1706,126 @@ select length(md5((f1[1]).c2)) from dest;
drop table dest;
drop type textandtext;
+-- Tests for polymorphic-array form of width_bucket()
+-- this exercises the varwidth and float8 code paths
+SELECT
+ op,
+ width_bucket(op::numeric, ARRAY[1, 3, 5, 10.0]::numeric[]) AS wb_n1,
+ width_bucket(op::numeric, ARRAY[0, 5.5, 9.99]::numeric[]) AS wb_n2,
+ width_bucket(op::numeric, ARRAY[-6, -5, 2.0]::numeric[]) AS wb_n3,
+ width_bucket(op::float8, ARRAY[1, 3, 5, 10.0]::float8[]) AS wb_f1,
+ width_bucket(op::float8, ARRAY[0, 5.5, 9.99]::float8[]) AS wb_f2,
+ width_bucket(op::float8, ARRAY[-6, -5, 2.0]::float8[]) AS wb_f3
+FROM (VALUES
+ (-5.2),
+ (-0.0000000001),
+ (0.000000000001),
+ (1),
+ (1.99999999999999),
+ (2),
+ (2.00000000000001),
+ (3),
+ (4),
+ (4.5),
+ (5),
+ (5.5),
+ (6),
+ (7),
+ (8),
+ (9),
+ (9.99999999999999),
+ (10),
+ (10.0000000000001)
+) v(op);
+ op | wb_n1 | wb_n2 | wb_n3 | wb_f1 | wb_f2 | wb_f3
+------------------+-------+-------+-------+-------+-------+-------
+ -5.2 | 0 | 0 | 1 | 0 | 0 | 1
+ -0.0000000001 | 0 | 0 | 2 | 0 | 0 | 2
+ 0.000000000001 | 0 | 1 | 2 | 0 | 1 | 2
+ 1 | 1 | 1 | 2 | 1 | 1 | 2
+ 1.99999999999999 | 1 | 1 | 2 | 1 | 1 | 2
+ 2 | 1 | 1 | 3 | 1 | 1 | 3
+ 2.00000000000001 | 1 | 1 | 3 | 1 | 1 | 3
+ 3 | 2 | 1 | 3 | 2 | 1 | 3
+ 4 | 2 | 1 | 3 | 2 | 1 | 3
+ 4.5 | 2 | 1 | 3 | 2 | 1 | 3
+ 5 | 3 | 1 | 3 | 3 | 1 | 3
+ 5.5 | 3 | 2 | 3 | 3 | 2 | 3
+ 6 | 3 | 2 | 3 | 3 | 2 | 3
+ 7 | 3 | 2 | 3 | 3 | 2 | 3
+ 8 | 3 | 2 | 3 | 3 | 2 | 3
+ 9 | 3 | 2 | 3 | 3 | 2 | 3
+ 9.99999999999999 | 3 | 3 | 3 | 3 | 3 | 3
+ 10 | 4 | 3 | 3 | 4 | 3 | 3
+ 10.0000000000001 | 4 | 3 | 3 | 4 | 3 | 3
+(19 rows)
+
+-- ensure float8 path handles NaN properly
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 9, 'NaN', 'NaN']::float8[]) AS wb
+FROM (VALUES
+ (-5.2::float8),
+ (4::float8),
+ (77::float8),
+ ('NaN'::float8)
+) v(op);
+ op | wb
+------+----
+ -5.2 | 0
+ 4 | 2
+ 77 | 3
+ NaN | 5
+(4 rows)
+
+-- these exercise the generic fixed-width code path
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1
+FROM generate_series(0,11) as op;
+ op | wb_1
+----+------
+ 0 | 0
+ 1 | 1
+ 2 | 1
+ 3 | 2
+ 4 | 2
+ 5 | 3
+ 6 | 3
+ 7 | 3
+ 8 | 3
+ 9 | 3
+ 10 | 4
+ 11 | 4
+(12 rows)
+
+SELECT width_bucket(now(),
+ array['yesterday', 'today', 'tomorrow']::timestamptz[]);
+ width_bucket
+--------------
+ 2
+(1 row)
+
+-- corner cases
+SELECT width_bucket(5, ARRAY[3]);
+ width_bucket
+--------------
+ 1
+(1 row)
+
+SELECT width_bucket(5, '{}');
+ width_bucket
+--------------
+ 0
+(1 row)
+
+-- error cases
+SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+ERROR: function width_bucket(text, integer[]) does not exist
+LINE 1: SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+ ^
+HINT: No function matches the given name and argument types. You might need to add explicit type casts.
+SELECT width_bucket(5, ARRAY[3, 4, NULL]);
+ERROR: thresholds array must not contain NULLs
+SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]);
+ERROR: thresholds must be one-dimensional array
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index d9f7cbfa8d..fa8a20ad1c 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -476,3 +476,65 @@ drop table src;
select length(md5((f1[1]).c2)) from dest;
drop table dest;
drop type textandtext;
+
+-- Tests for polymorphic-array form of width_bucket()
+
+-- this exercises the varwidth and float8 code paths
+SELECT
+ op,
+ width_bucket(op::numeric, ARRAY[1, 3, 5, 10.0]::numeric[]) AS wb_n1,
+ width_bucket(op::numeric, ARRAY[0, 5.5, 9.99]::numeric[]) AS wb_n2,
+ width_bucket(op::numeric, ARRAY[-6, -5, 2.0]::numeric[]) AS wb_n3,
+ width_bucket(op::float8, ARRAY[1, 3, 5, 10.0]::float8[]) AS wb_f1,
+ width_bucket(op::float8, ARRAY[0, 5.5, 9.99]::float8[]) AS wb_f2,
+ width_bucket(op::float8, ARRAY[-6, -5, 2.0]::float8[]) AS wb_f3
+FROM (VALUES
+ (-5.2),
+ (-0.0000000001),
+ (0.000000000001),
+ (1),
+ (1.99999999999999),
+ (2),
+ (2.00000000000001),
+ (3),
+ (4),
+ (4.5),
+ (5),
+ (5.5),
+ (6),
+ (7),
+ (8),
+ (9),
+ (9.99999999999999),
+ (10),
+ (10.0000000000001)
+) v(op);
+
+-- ensure float8 path handles NaN properly
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 9, 'NaN', 'NaN']::float8[]) AS wb
+FROM (VALUES
+ (-5.2::float8),
+ (4::float8),
+ (77::float8),
+ ('NaN'::float8)
+) v(op);
+
+-- these exercise the generic fixed-width code path
+SELECT
+ op,
+ width_bucket(op, ARRAY[1, 3, 5, 10]) AS wb_1
+FROM generate_series(0,11) as op;
+
+SELECT width_bucket(now(),
+ array['yesterday', 'today', 'tomorrow']::timestamptz[]);
+
+-- corner cases
+SELECT width_bucket(5, ARRAY[3]);
+SELECT width_bucket(5, '{}');
+
+-- error cases
+SELECT width_bucket('5'::text, ARRAY[3, 4]::integer[]);
+SELECT width_bucket(5, ARRAY[3, 4, NULL]);
+SELECT width_bucket(5, ARRAY[ARRAY[1, 2], ARRAY[3, 4]]);