mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-15 17:01:01 +08:00
Current vectoriser doesn't support masked loads for SLP.
Current vectoriser doesn't support masked loads for SLP. We should add that, to allow things like: void f (int *restrict x, int *restrict y, int *restrict z, int n) { for (int i = 0; i < n; i += 2) { x[i] = y[i] ? z[i] : 1; x[i + 1] = y[i + 1] ? z[i + 1] : 2; } } to be vectorized using contiguous loads rather than LD2 and ST2. This patch was motivated by SVE, but it is completely generic and should apply to any architecture with masked loads. From-SVN: r271704
This commit is contained in:
parent
8b4e714355
commit
997636716c
@ -1,3 +1,23 @@
|
||||
2019-05-28 Alejandro Martinez <alejandro.martinezvicente@arm.com>
|
||||
|
||||
* internal-fn.c: Marked mask_load_direct as vectorizable.
|
||||
* tree-data-ref.c (data_ref_compare_tree): Fixed comment typo.
|
||||
* tree-vect-data-refs.c (can_group_stmts_p): Allow masked loads to be
|
||||
combined even if masks different with allow_slp_p param.
|
||||
(vect_analyze_data_ref_accesses): Mark SLP only vectorizable groups.
|
||||
* tree-vect-loop.c (vect_dissolve_slp_only_groups): New function to
|
||||
dissolve SLP-only vectorizable groups when SLP has been discarded.
|
||||
(vect_analyze_loop_2): Call vect_dissolve_slp_only_groups when needed.
|
||||
* tree-vect-slp.c (vect_get_and_check_slp_defs): Check masked loads
|
||||
masks.
|
||||
(vect_build_slp_tree_1): Fixed comment typo.
|
||||
(vect_build_slp_tree_2): Include masks from masked loads in SLP tree.
|
||||
* tree-vect-stmts.c (vectorizable_load): Allow vectorizaion of masked
|
||||
loads for SLP only.
|
||||
* tree-vectorizer.h (_stmt_vec_info): Added flag for SLP-only
|
||||
vectorizable.
|
||||
* tree-vectorizer.c (vec_info::new_stmt_vec_info): Likewise.
|
||||
|
||||
2019-05-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
* config/alpha/alpha.c [TARGET_ABI_OSF] (alpha_output_mi_thunk_osf):
|
||||
|
@ -100,7 +100,7 @@ init_internal_fns ()
|
||||
/* Create static initializers for the information returned by
|
||||
direct_internal_fn. */
|
||||
#define not_direct { -2, -2, false }
|
||||
#define mask_load_direct { -1, 2, false }
|
||||
#define mask_load_direct { -1, 2, true }
|
||||
#define load_lanes_direct { -1, -1, false }
|
||||
#define mask_load_lanes_direct { -1, -1, false }
|
||||
#define gather_load_direct { -1, -1, false }
|
||||
|
@ -1,3 +1,8 @@
|
||||
2019-05-28 Alejandro Martinez <alejandro.martinezvicente@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/mask_load_slp_1.c: New test for SLP
|
||||
vectorized masked loads.
|
||||
|
||||
2019-05-28 Jeff Law <law@redhat.com>
|
||||
|
||||
* testsuite/gcc.target/sh/pr50749-qihisi-predec-3.c: Disable
|
||||
|
90
gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
Normal file
90
gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define MASK_SLP_2(TYPE_COND, ALT_VAL) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y, \
|
||||
TYPE_COND *restrict z, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i += 2) \
|
||||
{ \
|
||||
x[i] = y[i] ? z[i] : 1; \
|
||||
x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MASK_SLP_4(TYPE_COND, ALT_VAL) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y, \
|
||||
TYPE_COND *restrict z, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i += 4) \
|
||||
{ \
|
||||
x[i] = y[i] ? z[i] : 1; \
|
||||
x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
||||
x[i + 2] = y[i + 2] ? z[i + 2] : 1; \
|
||||
x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MASK_SLP_8(TYPE_COND, ALT_VAL) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y, \
|
||||
TYPE_COND *restrict z, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i += 8) \
|
||||
{ \
|
||||
x[i] = y[i] ? z[i] : 1; \
|
||||
x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
||||
x[i + 2] = y[i + 2] ? z[i + 2] : 1; \
|
||||
x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \
|
||||
x[i + 4] = y[i + 4] ? z[i + 4] : 1; \
|
||||
x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL; \
|
||||
x[i + 6] = y[i + 6] ? z[i + 6] : 1; \
|
||||
x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MASK_SLP_FAIL(TYPE_COND) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y, \
|
||||
TYPE_COND *restrict z, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i += 2) \
|
||||
{ \
|
||||
x[i] = y[i] ? z[i] : 1; \
|
||||
x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]]; \
|
||||
} \
|
||||
}
|
||||
|
||||
MASK_SLP_2(int8_t, 1)
|
||||
MASK_SLP_2(int8_t, 2)
|
||||
MASK_SLP_2(int, 1)
|
||||
MASK_SLP_2(int, 2)
|
||||
MASK_SLP_2(int64_t, 1)
|
||||
MASK_SLP_2(int64_t, 2)
|
||||
|
||||
MASK_SLP_4(int8_t, 1)
|
||||
MASK_SLP_4(int8_t, 2)
|
||||
MASK_SLP_4(int, 1)
|
||||
MASK_SLP_4(int, 2)
|
||||
MASK_SLP_4(int64_t, 1)
|
||||
MASK_SLP_4(int64_t, 2)
|
||||
|
||||
MASK_SLP_8(int8_t, 1)
|
||||
MASK_SLP_8(int8_t, 2)
|
||||
MASK_SLP_8(int, 1)
|
||||
MASK_SLP_8(int, 2)
|
||||
MASK_SLP_8(int64_t, 1)
|
||||
MASK_SLP_8(int64_t, 2)
|
||||
|
||||
MASK_SLP_FAIL(int8_t)
|
||||
MASK_SLP_FAIL(int)
|
||||
MASK_SLP_FAIL(int64_t)
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tld2w\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tst2w\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */
|
@ -1271,7 +1271,7 @@ create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
|
||||
return dr;
|
||||
}
|
||||
|
||||
/* A helper function computes order between two tree epxressions T1 and T2.
|
||||
/* A helper function computes order between two tree expressions T1 and T2.
|
||||
This is used in comparator functions sorting objects based on the order
|
||||
of tree expressions. The function returns -1, 0, or 1. */
|
||||
|
||||
|
@ -2863,10 +2863,12 @@ strip_conversion (tree op)
|
||||
}
|
||||
|
||||
/* Return true if vectorizable_* routines can handle statements STMT1_INFO
|
||||
and STMT2_INFO being in a single group. */
|
||||
and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
|
||||
be grouped in SLP mode. */
|
||||
|
||||
static bool
|
||||
can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
|
||||
can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
|
||||
bool allow_slp_p)
|
||||
{
|
||||
if (gimple_assign_single_p (stmt1_info->stmt))
|
||||
return gimple_assign_single_p (stmt2_info->stmt);
|
||||
@ -2888,7 +2890,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
|
||||
like those created by build_mask_conversion. */
|
||||
tree mask1 = gimple_call_arg (call1, 2);
|
||||
tree mask2 = gimple_call_arg (call2, 2);
|
||||
if (!operand_equal_p (mask1, mask2, 0))
|
||||
if (!operand_equal_p (mask1, mask2, 0)
|
||||
&& (ifn == IFN_MASK_STORE || !allow_slp_p))
|
||||
{
|
||||
mask1 = strip_conversion (mask1);
|
||||
if (!mask1)
|
||||
@ -2974,7 +2977,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|
||||
|| data_ref_compare_tree (DR_BASE_ADDRESS (dra),
|
||||
DR_BASE_ADDRESS (drb)) != 0
|
||||
|| data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
|
||||
|| !can_group_stmts_p (stmtinfo_a, stmtinfo_b))
|
||||
|| !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
|
||||
break;
|
||||
|
||||
/* Check that the data-refs have the same constant size. */
|
||||
@ -3059,6 +3062,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|
||||
DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
|
||||
lastinfo = stmtinfo_b;
|
||||
|
||||
STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
|
||||
= !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
|
||||
|
||||
if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Load suitable for SLP vectorization only.\n");
|
||||
|
||||
if (init_b == init_prev
|
||||
&& !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
|
||||
&& dump_enabled_p ())
|
||||
|
@ -1774,6 +1774,50 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
|
||||
return opt_result::success ();
|
||||
}
|
||||
|
||||
/* Look for SLP-only access groups and turn each individual access into its own
|
||||
group. */
|
||||
static void
|
||||
vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
|
||||
{
|
||||
unsigned int i;
|
||||
struct data_reference *dr;
|
||||
|
||||
DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
|
||||
|
||||
vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
|
||||
FOR_EACH_VEC_ELT (datarefs, i, dr)
|
||||
{
|
||||
gcc_assert (DR_REF (dr));
|
||||
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
|
||||
|
||||
/* Check if the load is a part of an interleaving chain. */
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
|
||||
unsigned int group_size = DR_GROUP_SIZE (first_element);
|
||||
|
||||
/* Check if SLP-only groups. */
|
||||
if (!STMT_SLP_TYPE (stmt_info)
|
||||
&& STMT_VINFO_SLP_VECT_ONLY (first_element))
|
||||
{
|
||||
/* Dissolve the group. */
|
||||
STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
|
||||
|
||||
stmt_vec_info vinfo = first_element;
|
||||
while (vinfo)
|
||||
{
|
||||
stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
|
||||
DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
|
||||
DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
|
||||
DR_GROUP_SIZE (vinfo) = 1;
|
||||
DR_GROUP_GAP (vinfo) = group_size - 1;
|
||||
vinfo = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function vect_analyze_loop_2.
|
||||
|
||||
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
||||
@ -1990,6 +2034,9 @@ start_over:
|
||||
}
|
||||
}
|
||||
|
||||
/* Dissolve SLP-only groups. */
|
||||
vect_dissolve_slp_only_groups (loop_vinfo);
|
||||
|
||||
/* Scan all the remaining operations in the loop that are not subject
|
||||
to SLP and make sure they are vectorizable. */
|
||||
ok = vect_analyze_loop_operations (loop_vinfo);
|
||||
|
@ -325,6 +325,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
|
||||
{
|
||||
internal_fn ifn = gimple_call_internal_fn (stmt);
|
||||
commutative_op = first_commutative_argument (ifn);
|
||||
|
||||
/* Masked load, only look at mask. */
|
||||
if (ifn == IFN_MASK_LOAD)
|
||||
{
|
||||
number_of_oprnds = 1;
|
||||
/* Mask operand index. */
|
||||
first_op_idx = 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
|
||||
@ -626,7 +634,7 @@ vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
|
||||
is false then this indicates the comparison could not be
|
||||
carried out or the stmts will never be vectorized by SLP.
|
||||
|
||||
Note COND_EXPR is possibly ismorphic to another one after swapping its
|
||||
Note COND_EXPR is possibly isomorphic to another one after swapping its
|
||||
operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
|
||||
the first stmt by swapping the two operands of comparison; set SWAP[i]
|
||||
to 2 if stmt I is isormorphic to the first stmt by inverting the code
|
||||
@ -1146,14 +1154,23 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||
&this_max_nunits, matches, &two_operators))
|
||||
return NULL;
|
||||
|
||||
/* If the SLP node is a load, terminate the recursion. */
|
||||
/* If the SLP node is a load, terminate the recursion unless masked. */
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
|
||||
{
|
||||
*max_nunits = this_max_nunits;
|
||||
(*tree_size)++;
|
||||
node = vect_create_new_slp_node (stmts);
|
||||
return node;
|
||||
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
|
||||
{
|
||||
/* Masked load. */
|
||||
gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
|
||||
nops = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*max_nunits = this_max_nunits;
|
||||
(*tree_size)++;
|
||||
node = vect_create_new_slp_node (stmts);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get at the operands, verifying they are compatible. */
|
||||
|
@ -7622,14 +7622,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
if (!scalar_dest)
|
||||
return false;
|
||||
|
||||
if (slp_node != NULL)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"SLP of masked loads not supported.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
int mask_index = internal_fn_mask_index (ifn);
|
||||
if (mask_index >= 0)
|
||||
{
|
||||
@ -7712,6 +7704,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
|
||||
group_size = DR_GROUP_SIZE (first_stmt_info);
|
||||
|
||||
/* Refuse non-SLP vectorization of SLP-only groups. */
|
||||
if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"cannot vectorize load in non-SLP mode.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
||||
slp_perm = true;
|
||||
|
||||
@ -8389,8 +8390,19 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
simd_lane_access_p,
|
||||
byte_offset, bump);
|
||||
if (mask)
|
||||
vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
|
||||
mask_vectype);
|
||||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree> ops (1);
|
||||
auto_vec<vec<tree> > vec_defs (1);
|
||||
ops.quick_push (mask);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
vec_mask = vec_defs[0][0];
|
||||
}
|
||||
else
|
||||
vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
|
||||
mask_vectype);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -641,6 +641,7 @@ vec_info::new_stmt_vec_info (gimple *stmt)
|
||||
STMT_VINFO_VECTORIZABLE (res) = true;
|
||||
STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
|
||||
STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
|
||||
STMT_VINFO_SLP_VECT_ONLY (res) = false;
|
||||
|
||||
if (gimple_code (stmt) == GIMPLE_PHI
|
||||
&& is_loop_header_bb_p (gimple_bb (stmt)))
|
||||
|
@ -396,7 +396,7 @@ typedef struct _loop_vec_info : public vec_info {
|
||||
/* Condition under which this loop is analyzed and versioned. */
|
||||
tree num_iters_assumptions;
|
||||
|
||||
/* Threshold of number of iterations below which vectorzation will not be
|
||||
/* Threshold of number of iterations below which vectorization will not be
|
||||
performed. It is calculated from MIN_PROFITABLE_ITERS and
|
||||
PARAM_MIN_VECT_LOOP_BOUND. */
|
||||
unsigned int th;
|
||||
@ -946,6 +946,9 @@ struct _stmt_vec_info {
|
||||
and OPERATION_BITS without changing the result. */
|
||||
unsigned int operation_precision;
|
||||
signop operation_sign;
|
||||
|
||||
/* True if this is only suitable for SLP vectorization. */
|
||||
bool slp_vect_only_p;
|
||||
};
|
||||
|
||||
/* Information about a gather/scatter call. */
|
||||
@ -1041,6 +1044,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
|
||||
#define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses
|
||||
#define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
|
||||
#define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
|
||||
#define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
|
||||
|
||||
#define DR_GROUP_FIRST_ELEMENT(S) \
|
||||
(gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
|
||||
|
Loading…
x
Reference in New Issue
Block a user