mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
Don't store intermediate hash values in ExprState->resvalue
adf97c156
made it so ExprStates could support hashing and changed Hash
Join to use that instead of manually extracting Datums from tuples and
hashing them one column at a time.
When hashing multiple columns or expressions, the code added in that
commit stored the intermediate hash value in the ExprState's resvalue
field. That was a mistake as steps may be injected into the ExprState
between each hashing step that look at or overwrite the stored
intermediate hash value. EEOP_PARAM_SET is an example of such a step.
Here we fix this by adding a new dedicated field for storing
intermediate hash values and adjust the code so that all apart from the
final hashing step store their result in the intermediate field.
In passing, rename a variable so that it's more aligned to the
surrounding code and also so a few lines stay within the 80 char margin.
Reported-by: Andres Freund
Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru>
Discussion: https://postgr.es/m/CAApHDvqo9eenEFXND5zZ9JxO_k4eTA4jKMGxSyjdTrsmYvnmZw@mail.gmail.com
This commit is contained in:
parent
089aac631b
commit
9ca67658d1
@ -3996,6 +3996,7 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
|
||||
{
|
||||
ExprState *state = makeNode(ExprState);
|
||||
ExprEvalStep scratch = {0};
|
||||
NullableDatum *iresult = NULL;
|
||||
List *adjust_jumps = NIL;
|
||||
ListCell *lc;
|
||||
ListCell *lc2;
|
||||
@ -4009,6 +4010,14 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
|
||||
/* Insert setup steps as needed. */
|
||||
ExecCreateExprSetupSteps(state, (Node *) hash_exprs);
|
||||
|
||||
/*
|
||||
* When hashing more than 1 expression or if we have an init value, we
|
||||
* need somewhere to store the intermediate hash value so that it's
|
||||
* available to be combined with the result of subsequent hashing.
|
||||
*/
|
||||
if (list_length(hash_exprs) > 1 || init_value != 0)
|
||||
iresult = palloc(sizeof(NullableDatum));
|
||||
|
||||
if (init_value == 0)
|
||||
{
|
||||
/*
|
||||
@ -4024,8 +4033,8 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
|
||||
/* Set up operation to set the initial value. */
|
||||
scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
|
||||
scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
|
||||
scratch.resvalue = &state->resvalue;
|
||||
scratch.resnull = &state->resnull;
|
||||
scratch.resvalue = &iresult->value;
|
||||
scratch.resnull = &iresult->isnull;
|
||||
|
||||
ExprEvalPushStep(state, &scratch);
|
||||
|
||||
@ -4063,8 +4072,26 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
|
||||
&fcinfo->args[0].value,
|
||||
&fcinfo->args[0].isnull);
|
||||
|
||||
scratch.resvalue = &state->resvalue;
|
||||
scratch.resnull = &state->resnull;
|
||||
if (i == list_length(hash_exprs) - 1)
|
||||
{
|
||||
/* the result for hashing the final expr is stored in the state */
|
||||
scratch.resvalue = &state->resvalue;
|
||||
scratch.resnull = &state->resnull;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(iresult != NULL);
|
||||
|
||||
/* intermediate values are stored in an intermediate result */
|
||||
scratch.resvalue = &iresult->value;
|
||||
scratch.resnull = &iresult->isnull;
|
||||
}
|
||||
|
||||
/*
|
||||
* NEXT32 opcodes need to look at the intermediate result. We might
|
||||
* as well just set this for all ops. FIRSTs won't look at it.
|
||||
*/
|
||||
scratch.d.hashdatum.iresult = iresult;
|
||||
|
||||
/* Initialize function call parameter structure too */
|
||||
InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
|
||||
|
@ -1600,10 +1600,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
|
||||
EEO_CASE(EEOP_HASHDATUM_NEXT32)
|
||||
{
|
||||
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
|
||||
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
|
||||
uint32 existinghash;
|
||||
|
||||
existinghash = DatumGetUInt32(op->d.hashdatum.iresult->value);
|
||||
/* combine successive hash values by rotating */
|
||||
existing_hash = pg_rotate_left32(existing_hash, 1);
|
||||
existinghash = pg_rotate_left32(existinghash, 1);
|
||||
|
||||
/* leave the hash value alone on NULL inputs */
|
||||
if (!fcinfo->args[0].isnull)
|
||||
@ -1612,10 +1613,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
|
||||
|
||||
/* execute hash func and combine with previous hash value */
|
||||
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
|
||||
existing_hash = existing_hash ^ hashvalue;
|
||||
existinghash = existinghash ^ hashvalue;
|
||||
}
|
||||
|
||||
*op->resvalue = UInt32GetDatum(existing_hash);
|
||||
*op->resvalue = UInt32GetDatum(existinghash);
|
||||
*op->resnull = false;
|
||||
|
||||
EEO_NEXT();
|
||||
@ -1638,15 +1639,16 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
|
||||
uint32 existinghash;
|
||||
uint32 hashvalue;
|
||||
|
||||
existinghash = DatumGetUInt32(op->d.hashdatum.iresult->value);
|
||||
/* combine successive hash values by rotating */
|
||||
existing_hash = pg_rotate_left32(existing_hash, 1);
|
||||
existinghash = pg_rotate_left32(existinghash, 1);
|
||||
|
||||
/* execute hash func and combine with previous hash value */
|
||||
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
|
||||
*op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue);
|
||||
*op->resvalue = UInt32GetDatum(existinghash ^ hashvalue);
|
||||
*op->resnull = false;
|
||||
}
|
||||
|
||||
|
@ -1940,13 +1940,16 @@ llvm_compile_expr(ExprState *state)
|
||||
{
|
||||
LLVMValueRef v_tmp1;
|
||||
LLVMValueRef v_tmp2;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = l_ptr_const(&op->d.hashdatum.iresult->value,
|
||||
l_ptr(TypeSizeT));
|
||||
|
||||
/*
|
||||
* Fetch the previously hashed value from where the
|
||||
* EEOP_HASHDATUM_FIRST operation stored it.
|
||||
* previous hash operation stored it.
|
||||
*/
|
||||
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
|
||||
"prevhash");
|
||||
v_prevhash = l_load(b, TypeSizeT, tmp, "prevhash");
|
||||
|
||||
/*
|
||||
* Rotate bits left by 1 bit. Be careful not to
|
||||
@ -2062,13 +2065,16 @@ llvm_compile_expr(ExprState *state)
|
||||
{
|
||||
LLVMValueRef v_tmp1;
|
||||
LLVMValueRef v_tmp2;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = l_ptr_const(&op->d.hashdatum.iresult->value,
|
||||
l_ptr(TypeSizeT));
|
||||
|
||||
/*
|
||||
* Fetch the previously hashed value from where the
|
||||
* EEOP_HASHDATUM_FIRST_STRICT operation stored it.
|
||||
* previous hash operation stored it.
|
||||
*/
|
||||
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
|
||||
"prevhash");
|
||||
v_prevhash = l_load(b, TypeSizeT, tmp, "prevhash");
|
||||
|
||||
/*
|
||||
* Rotate bits left by 1 bit. Be careful not to
|
||||
|
@ -580,6 +580,7 @@ typedef struct ExprEvalStep
|
||||
/* faster to access without additional indirection: */
|
||||
PGFunction fn_addr; /* actual call address */
|
||||
int jumpdone; /* jump here on null */
|
||||
NullableDatum *iresult; /* intermediate hash result */
|
||||
} hashdatum;
|
||||
|
||||
/* for EEOP_CONVERT_ROWTYPE */
|
||||
|
@ -2358,6 +2358,61 @@ where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
|
||||
----+----+----------+----------
|
||||
(0 rows)
|
||||
|
||||
--
|
||||
-- Test hash joins with multiple hash keys and subplans.
|
||||
--
|
||||
-- First ensure we get a hash join with multiple hash keys.
|
||||
explain (costs off)
|
||||
select t1.unique1,t2.unique1 from tenk1 t1
|
||||
inner join tenk1 t2 on t1.two = t2.two
|
||||
and t1.unique1 = (select min(unique1) from tenk1
|
||||
where t2.unique1=unique1)
|
||||
where t1.unique1 < 10 and t2.unique1 < 10
|
||||
order by t1.unique1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: t1.unique1
|
||||
-> Hash Join
|
||||
Hash Cond: ((t1.two = t2.two) AND (t1.unique1 = (SubPlan 2)))
|
||||
-> Bitmap Heap Scan on tenk1 t1
|
||||
Recheck Cond: (unique1 < 10)
|
||||
-> Bitmap Index Scan on tenk1_unique1
|
||||
Index Cond: (unique1 < 10)
|
||||
-> Hash
|
||||
-> Bitmap Heap Scan on tenk1 t2
|
||||
Recheck Cond: (unique1 < 10)
|
||||
-> Bitmap Index Scan on tenk1_unique1
|
||||
Index Cond: (unique1 < 10)
|
||||
SubPlan 2
|
||||
-> Result
|
||||
InitPlan 1
|
||||
-> Limit
|
||||
-> Index Only Scan using tenk1_unique1 on tenk1
|
||||
Index Cond: ((unique1 IS NOT NULL) AND (unique1 = t2.unique1))
|
||||
(19 rows)
|
||||
|
||||
-- Ensure we get the expected result
|
||||
select t1.unique1,t2.unique1 from tenk1 t1
|
||||
inner join tenk1 t2 on t1.two = t2.two
|
||||
and t1.unique1 = (select min(unique1) from tenk1
|
||||
where t2.unique1=unique1)
|
||||
where t1.unique1 < 10 and t2.unique1 < 10
|
||||
order by t1.unique1;
|
||||
unique1 | unique1
|
||||
---------+---------
|
||||
0 | 0
|
||||
1 | 1
|
||||
2 | 2
|
||||
3 | 3
|
||||
4 | 4
|
||||
5 | 5
|
||||
6 | 6
|
||||
7 | 7
|
||||
8 | 8
|
||||
9 | 9
|
||||
(10 rows)
|
||||
|
||||
--
|
||||
-- checks for correct handling of quals in multiway outer joins
|
||||
--
|
||||
|
@ -441,6 +441,27 @@ select a.f1, b.f1, t.thousand, t.tenthous from
|
||||
(select sum(f1) as f1 from int4_tbl i4b) b
|
||||
where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
|
||||
|
||||
--
|
||||
-- Test hash joins with multiple hash keys and subplans.
|
||||
--
|
||||
|
||||
-- First ensure we get a hash join with multiple hash keys.
|
||||
explain (costs off)
|
||||
select t1.unique1,t2.unique1 from tenk1 t1
|
||||
inner join tenk1 t2 on t1.two = t2.two
|
||||
and t1.unique1 = (select min(unique1) from tenk1
|
||||
where t2.unique1=unique1)
|
||||
where t1.unique1 < 10 and t2.unique1 < 10
|
||||
order by t1.unique1;
|
||||
|
||||
-- Ensure we get the expected result
|
||||
select t1.unique1,t2.unique1 from tenk1 t1
|
||||
inner join tenk1 t2 on t1.two = t2.two
|
||||
and t1.unique1 = (select min(unique1) from tenk1
|
||||
where t2.unique1=unique1)
|
||||
where t1.unique1 < 10 and t2.unique1 < 10
|
||||
order by t1.unique1;
|
||||
|
||||
--
|
||||
-- checks for correct handling of quals in multiway outer joins
|
||||
--
|
||||
|
Loading…
Reference in New Issue
Block a user