mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-19 08:30:28 +08:00
md.texi (vec_load_lanes, [...]): Document.
gcc/ * doc/md.texi (vec_load_lanes, vec_store_lanes): Document. * optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New convert_optab_index values. (vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs. * genopinit.c (optabs): Initialize the new optabs. * internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions. * internal-fn.c (get_multi_vector_move, expand_LOAD_LANES) (expand_STORE_LANES): New functions. * tree.h (build_array_type_nelts): Declare. * tree.c (build_array_type_nelts): New function. * tree-vectorizer.h (vect_model_store_cost): Add a bool argument. (vect_model_load_cost): Likewise. (vect_store_lanes_supported, vect_load_lanes_supported) (vect_record_strided_load_vectors): Declare. * tree-vect-data-refs.c (vect_lanes_optab_supported_p) (vect_store_lanes_supported, vect_load_lanes_supported): New functions. (vect_transform_strided_load): Split out statement recording into... (vect_record_strided_load_vectors): ...this new function. * tree-vect-stmts.c (create_vector_array, read_vector_array) (write_vector_array, create_array_ref): New functions. (vect_model_store_cost): Add store_lanes_p argument. (vect_model_load_cost): Add load_lanes_p argument. (vectorizable_store): Try to use store-lanes functions for interleaved stores. (vectorizable_load): Likewise load-lanes and loads. * tree-vect-slp.c (vect_get_and_check_slp_defs): Update call to vect_model_store_cost. (vect_build_slp_tree): Likewise vect_model_load_cost. From-SVN: r173291
This commit is contained in:
parent
0f6d54f720
commit
272c6793a0
@ -1,3 +1,34 @@
|
||||
2011-05-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
|
||||
* optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
|
||||
convert_optab_index values.
|
||||
(vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
|
||||
* genopinit.c (optabs): Initialize the new optabs.
|
||||
* internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
|
||||
* internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
|
||||
(expand_STORE_LANES): New functions.
|
||||
* tree.h (build_array_type_nelts): Declare.
|
||||
* tree.c (build_array_type_nelts): New function.
|
||||
* tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
|
||||
(vect_model_load_cost): Likewise.
|
||||
(vect_store_lanes_supported, vect_load_lanes_supported)
|
||||
(vect_record_strided_load_vectors): Declare.
|
||||
* tree-vect-data-refs.c (vect_lanes_optab_supported_p)
|
||||
(vect_store_lanes_supported, vect_load_lanes_supported): New functions.
|
||||
(vect_transform_strided_load): Split out statement recording into...
|
||||
(vect_record_strided_load_vectors): ...this new function.
|
||||
* tree-vect-stmts.c (create_vector_array, read_vector_array)
|
||||
(write_vector_array, create_array_ref): New functions.
|
||||
(vect_model_store_cost): Add store_lanes_p argument.
|
||||
(vect_model_load_cost): Add load_lanes_p argument.
|
||||
(vectorizable_store): Try to use store-lanes functions for
|
||||
interleaved stores.
|
||||
(vectorizable_load): Likewise load-lanes and loads.
|
||||
* tree-vect-slp.c (vect_get_and_check_slp_defs): Update call
|
||||
to vect_model_store_cost.
|
||||
(vect_build_slp_tree): Likewise vect_model_load_cost.
|
||||
|
||||
2011-05-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* hooks.h (hook_bool_mode_uhwi_false): Declare.
|
||||
|
@ -3846,6 +3846,48 @@ into consecutive memory locations. Operand 0 is the first of the
|
||||
consecutive memory locations, operand 1 is the first register, and
|
||||
operand 2 is a constant: the number of consecutive registers.
|
||||
|
||||
@cindex @code{vec_load_lanes@var{m}@var{n}} instruction pattern
|
||||
@item @samp{vec_load_lanes@var{m}@var{n}}
|
||||
Perform an interleaved load of several vectors from memory operand 1
|
||||
into register operand 0. Both operands have mode @var{m}. The register
|
||||
operand is viewed as holding consecutive vectors of mode @var{n},
|
||||
while the memory operand is a flat array that contains the same number
|
||||
of elements. The operation is equivalent to:
|
||||
|
||||
@smallexample
|
||||
int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
|
||||
for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
|
||||
for (i = 0; i < c; i++)
|
||||
operand0[i][j] = operand1[j * c + i];
|
||||
@end smallexample
|
||||
|
||||
For example, @samp{vec_load_lanestiv4hi} loads 8 16-bit values
|
||||
from memory into a register of mode @samp{TI}@. The register
|
||||
contains two consecutive vectors of mode @samp{V4HI}@.
|
||||
|
||||
This pattern can only be used if:
|
||||
@smallexample
|
||||
TARGET_ARRAY_MODE_SUPPORTED_P (@var{n}, @var{c})
|
||||
@end smallexample
|
||||
is true. GCC assumes that, if a target supports this kind of
|
||||
instruction for some mode @var{n}, it also supports unaligned
|
||||
loads for vectors of mode @var{n}.
|
||||
|
||||
@cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern
|
||||
@item @samp{vec_store_lanes@var{m}@var{n}}
|
||||
Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory
|
||||
and register operands reversed. That is, the instruction is
|
||||
equivalent to:
|
||||
|
||||
@smallexample
|
||||
int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
|
||||
for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
|
||||
for (i = 0; i < c; i++)
|
||||
operand0[j * c + i] = operand1[i][j];
|
||||
@end smallexample
|
||||
|
||||
for a memory operand 0 and register operand 1.
|
||||
|
||||
@cindex @code{vec_set@var{m}} instruction pattern
|
||||
@item @samp{vec_set@var{m}}
|
||||
Set given field in the vector value. Operand 0 is the vector to modify,
|
||||
|
@ -74,6 +74,8 @@ static const char * const optabs[] =
|
||||
"set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
|
||||
"set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
|
||||
"set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
|
||||
"set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))",
|
||||
"set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))",
|
||||
"set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
|
||||
"set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
|
||||
set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
|
||||
|
@ -42,6 +42,73 @@ const int internal_fn_flags_array[] = {
|
||||
0
|
||||
};
|
||||
|
||||
/* ARRAY_TYPE is an array of vector modes. Return the associated insn
|
||||
for load-lanes-style optab OPTAB. The insn must exist. */
|
||||
|
||||
static enum insn_code
|
||||
get_multi_vector_move (tree array_type, convert_optab optab)
|
||||
{
|
||||
enum insn_code icode;
|
||||
enum machine_mode imode;
|
||||
enum machine_mode vmode;
|
||||
|
||||
gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
|
||||
imode = TYPE_MODE (array_type);
|
||||
vmode = TYPE_MODE (TREE_TYPE (array_type));
|
||||
|
||||
icode = convert_optab_handler (optab, imode, vmode);
|
||||
gcc_assert (icode != CODE_FOR_nothing);
|
||||
return icode;
|
||||
}
|
||||
|
||||
/* Expand LOAD_LANES call STMT. */
|
||||
|
||||
static void
|
||||
expand_LOAD_LANES (gimple stmt)
|
||||
{
|
||||
struct expand_operand ops[2];
|
||||
tree type, lhs, rhs;
|
||||
rtx target, mem;
|
||||
|
||||
lhs = gimple_call_lhs (stmt);
|
||||
rhs = gimple_call_arg (stmt, 0);
|
||||
type = TREE_TYPE (lhs);
|
||||
|
||||
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
mem = expand_normal (rhs);
|
||||
|
||||
gcc_assert (MEM_P (mem));
|
||||
PUT_MODE (mem, TYPE_MODE (type));
|
||||
|
||||
create_output_operand (&ops[0], target, TYPE_MODE (type));
|
||||
create_fixed_operand (&ops[1], mem);
|
||||
expand_insn (get_multi_vector_move (type, vec_load_lanes_optab), 2, ops);
|
||||
}
|
||||
|
||||
/* Expand STORE_LANES call STMT. */
|
||||
|
||||
static void
|
||||
expand_STORE_LANES (gimple stmt)
|
||||
{
|
||||
struct expand_operand ops[2];
|
||||
tree type, lhs, rhs;
|
||||
rtx target, reg;
|
||||
|
||||
lhs = gimple_call_lhs (stmt);
|
||||
rhs = gimple_call_arg (stmt, 0);
|
||||
type = TREE_TYPE (rhs);
|
||||
|
||||
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
reg = expand_normal (rhs);
|
||||
|
||||
gcc_assert (MEM_P (target));
|
||||
PUT_MODE (target, TYPE_MODE (type));
|
||||
|
||||
create_fixed_operand (&ops[0], target);
|
||||
create_input_operand (&ops[1], reg, TYPE_MODE (type));
|
||||
expand_insn (get_multi_vector_move (type, vec_store_lanes_optab), 2, ops);
|
||||
}
|
||||
|
||||
/* Routines to expand each internal function, indexed by function number.
|
||||
Each routine has the prototype:
|
||||
|
||||
|
@ -37,3 +37,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
void expand_NAME (gimple stmt)
|
||||
|
||||
where STMT is the statement that performs the call. */
|
||||
|
||||
DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF)
|
||||
DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
|
||||
|
@ -578,6 +578,9 @@ enum convert_optab_index
|
||||
COI_satfract,
|
||||
COI_satfractuns,
|
||||
|
||||
COI_vec_load_lanes,
|
||||
COI_vec_store_lanes,
|
||||
|
||||
COI_MAX
|
||||
};
|
||||
|
||||
@ -598,6 +601,8 @@ enum convert_optab_index
|
||||
#define fractuns_optab (&convert_optab_table[COI_fractuns])
|
||||
#define satfract_optab (&convert_optab_table[COI_satfract])
|
||||
#define satfractuns_optab (&convert_optab_table[COI_satfractuns])
|
||||
#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes])
|
||||
#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes])
|
||||
|
||||
/* Contains the optab used for each rtx code. */
|
||||
extern optab code_to_optab[NUM_RTX_CODE + 1];
|
||||
|
@ -43,6 +43,45 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "expr.h"
|
||||
#include "optabs.h"
|
||||
|
||||
/* Return true if load- or store-lanes optab OPTAB is implemented for
|
||||
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
|
||||
|
||||
static bool
|
||||
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
|
||||
tree vectype, unsigned HOST_WIDE_INT count)
|
||||
{
|
||||
enum machine_mode mode, array_mode;
|
||||
bool limit_p;
|
||||
|
||||
mode = TYPE_MODE (vectype);
|
||||
limit_p = !targetm.array_mode_supported_p (mode, count);
|
||||
array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
|
||||
MODE_INT, limit_p);
|
||||
|
||||
if (array_mode == BLKmode)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
|
||||
GET_MODE_NAME (mode), count);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "cannot use %s<%s><%s>",
|
||||
name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "can use %s<%s><%s>",
|
||||
name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Return the smallest scalar part of STMT.
|
||||
This is used to determine the vectype of the stmt. We generally set the
|
||||
vectype according to the type of the result (lhs). For stmts whose
|
||||
@ -3368,6 +3407,18 @@ vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||
}
|
||||
|
||||
|
||||
/* Return TRUE if vec_store_lanes is available for COUNT vectors of
|
||||
type VECTYPE. */
|
||||
|
||||
bool
|
||||
vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||
{
|
||||
return vect_lanes_optab_supported_p ("vec_store_lanes",
|
||||
vec_store_lanes_optab,
|
||||
vectype, count);
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_permute_store_chain.
|
||||
|
||||
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
||||
@ -3822,6 +3873,16 @@ vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return TRUE if vec_load_lanes is available for COUNT vectors of
|
||||
type VECTYPE. */
|
||||
|
||||
bool
|
||||
vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||
{
|
||||
return vect_lanes_optab_supported_p ("vec_load_lanes",
|
||||
vec_load_lanes_optab,
|
||||
vectype, count);
|
||||
}
|
||||
|
||||
/* Function vect_permute_load_chain.
|
||||
|
||||
@ -3969,19 +4030,28 @@ void
|
||||
vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
||||
gimple_stmt_iterator *gsi)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
|
||||
gimple next_stmt, new_stmt;
|
||||
VEC(tree,heap) *result_chain = NULL;
|
||||
unsigned int i, gap_count;
|
||||
tree tmp_data_ref;
|
||||
|
||||
/* DR_CHAIN contains input data-refs that are a part of the interleaving.
|
||||
RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
|
||||
vectors, that are ready for vector computation. */
|
||||
result_chain = VEC_alloc (tree, heap, size);
|
||||
/* Permute. */
|
||||
vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
|
||||
vect_record_strided_load_vectors (stmt, result_chain);
|
||||
VEC_free (tree, heap, result_chain);
|
||||
}
|
||||
|
||||
/* RESULT_CHAIN contains the output of a group of strided loads that were
|
||||
generated as part of the vectorization of STMT. Assign the statement
|
||||
for each vector to the associated scalar statement. */
|
||||
|
||||
void
|
||||
vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
|
||||
{
|
||||
gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
||||
gimple next_stmt, new_stmt;
|
||||
unsigned int i, gap_count;
|
||||
tree tmp_data_ref;
|
||||
|
||||
/* Put a permuted data-ref in the VECTORIZED_STMT field.
|
||||
Since we scan the chain starting from it's first node, their order
|
||||
@ -4043,8 +4113,6 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
VEC_free (tree, heap, result_chain);
|
||||
}
|
||||
|
||||
/* Function vect_force_dr_alignment_p.
|
||||
|
@ -215,7 +215,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
|
||||
else
|
||||
/* Store. */
|
||||
vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
|
||||
vect_model_store_cost (stmt_info, ncopies_for_cost, false,
|
||||
dt[0], slp_node);
|
||||
}
|
||||
|
||||
else
|
||||
@ -579,7 +580,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||
|
||||
/* Analyze costs (for the first stmt in the group). */
|
||||
vect_model_load_cost (vinfo_for_stmt (stmt),
|
||||
ncopies_for_cost, *node);
|
||||
ncopies_for_cost, false, *node);
|
||||
}
|
||||
|
||||
/* Store the place of this load in the interleaving chain. In
|
||||
|
@ -42,6 +42,82 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "langhooks.h"
|
||||
|
||||
|
||||
/* Return a variable of type ELEM_TYPE[NELEMS]. */
|
||||
|
||||
static tree
|
||||
create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
|
||||
{
|
||||
return create_tmp_var (build_array_type_nelts (elem_type, nelems),
|
||||
"vect_array");
|
||||
}
|
||||
|
||||
/* ARRAY is an array of vectors created by create_vector_array.
|
||||
Return an SSA_NAME for the vector in index N. The reference
|
||||
is part of the vectorization of STMT and the vector is associated
|
||||
with scalar destination SCALAR_DEST. */
|
||||
|
||||
static tree
|
||||
read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
|
||||
tree array, unsigned HOST_WIDE_INT n)
|
||||
{
|
||||
tree vect_type, vect, vect_name, array_ref;
|
||||
gimple new_stmt;
|
||||
|
||||
gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
|
||||
vect_type = TREE_TYPE (TREE_TYPE (array));
|
||||
vect = vect_create_destination_var (scalar_dest, vect_type);
|
||||
array_ref = build4 (ARRAY_REF, vect_type, array,
|
||||
build_int_cst (size_type_node, n),
|
||||
NULL_TREE, NULL_TREE);
|
||||
|
||||
new_stmt = gimple_build_assign (vect, array_ref);
|
||||
vect_name = make_ssa_name (vect, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, vect_name);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
|
||||
return vect_name;
|
||||
}
|
||||
|
||||
/* ARRAY is an array of vectors created by create_vector_array.
|
||||
Emit code to store SSA_NAME VECT in index N of the array.
|
||||
The store is part of the vectorization of STMT. */
|
||||
|
||||
static void
|
||||
write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
|
||||
tree array, unsigned HOST_WIDE_INT n)
|
||||
{
|
||||
tree array_ref;
|
||||
gimple new_stmt;
|
||||
|
||||
array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
|
||||
build_int_cst (size_type_node, n),
|
||||
NULL_TREE, NULL_TREE);
|
||||
|
||||
new_stmt = gimple_build_assign (array_ref, vect);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
}
|
||||
|
||||
/* PTR is a pointer to an array of type TYPE. Return a representation
|
||||
of *PTR. The memory reference replaces those in FIRST_DR
|
||||
(and its group). */
|
||||
|
||||
static tree
|
||||
create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
|
||||
{
|
||||
struct ptr_info_def *pi;
|
||||
tree mem_ref, alias_ptr_type;
|
||||
|
||||
alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
|
||||
mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
|
||||
/* Arrays have the same alignment as their type. */
|
||||
pi = get_ptr_info (ptr);
|
||||
pi->align = TYPE_ALIGN_UNIT (type);
|
||||
pi->misalign = 0;
|
||||
return mem_ref;
|
||||
}
|
||||
|
||||
/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
|
||||
|
||||
/* Function vect_mark_relevant.
|
||||
@ -648,7 +724,8 @@ vect_cost_strided_group_size (stmt_vec_info stmt_info)
|
||||
|
||||
void
|
||||
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
enum vect_def_type dt, slp_tree slp_node)
|
||||
bool store_lanes_p, enum vect_def_type dt,
|
||||
slp_tree slp_node)
|
||||
{
|
||||
int group_size;
|
||||
unsigned int inside_cost = 0, outside_cost = 0;
|
||||
@ -685,9 +762,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
first_dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||
}
|
||||
|
||||
/* Is this an access in a group of stores, which provide strided access?
|
||||
If so, add in the cost of the permutes. */
|
||||
if (group_size > 1)
|
||||
/* We assume that the cost of a single store-lanes instruction is
|
||||
equivalent to the cost of GROUP_SIZE separate stores. If a strided
|
||||
access is instead being provided by a permute-and-store operation,
|
||||
include the cost of the permutes. */
|
||||
if (!store_lanes_p && group_size > 1)
|
||||
{
|
||||
/* Uses a high and low interleave operation for each needed permute. */
|
||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||
@ -763,8 +842,8 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
|
||||
access scheme chosen. */
|
||||
|
||||
void
|
||||
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
||||
|
||||
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
|
||||
slp_tree slp_node)
|
||||
{
|
||||
int group_size;
|
||||
gimple first_stmt;
|
||||
@ -789,9 +868,11 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
||||
first_dr = dr;
|
||||
}
|
||||
|
||||
/* Is this an access in a group of loads providing strided access?
|
||||
If so, add in the cost of the permutes. */
|
||||
if (group_size > 1)
|
||||
/* We assume that the cost of a single load-lanes instruction is
|
||||
equivalent to the cost of GROUP_SIZE separate loads. If a strided
|
||||
access is instead being provided by a load-and-permute operation,
|
||||
include the cost of the permutes. */
|
||||
if (!load_lanes_p && group_size > 1)
|
||||
{
|
||||
/* Uses an even and odd extract operations for each needed permute. */
|
||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||
@ -3329,6 +3410,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
tree elem_type;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
struct loop *loop = NULL;
|
||||
enum machine_mode vec_mode;
|
||||
@ -3344,6 +3426,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
int j;
|
||||
gimple next_stmt, first_stmt = NULL;
|
||||
bool strided_store = false;
|
||||
bool store_lanes_p = false;
|
||||
unsigned int group_size, i;
|
||||
VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
|
||||
bool inv_p;
|
||||
@ -3351,6 +3434,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
bool slp = (slp_node != NULL);
|
||||
unsigned int vec_num;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
tree aggr_type;
|
||||
|
||||
if (loop_vinfo)
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
@ -3404,7 +3488,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
|
||||
/* The scalar rhs type needs to be trivially convertible to the vector
|
||||
component type. This should always be the case. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
|
||||
elem_type = TREE_TYPE (vectype);
|
||||
if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "??? operands of different types");
|
||||
@ -3434,7 +3519,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (!slp && !PURE_SLP_STMT (stmt_info))
|
||||
{
|
||||
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||
if (!vect_strided_store_supported (vectype, group_size))
|
||||
if (vect_store_lanes_supported (vectype, group_size))
|
||||
store_lanes_p = true;
|
||||
else if (!vect_strided_store_supported (vectype, group_size))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3462,7 +3549,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
||||
vect_model_store_cost (stmt_info, ncopies, dt, NULL);
|
||||
vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3517,6 +3604,16 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
|
||||
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
||||
gcc_assert (alignment_support_scheme);
|
||||
/* Targets with store-lane instructions must not require explicit
|
||||
realignment. */
|
||||
gcc_assert (!store_lanes_p
|
||||
|| alignment_support_scheme == dr_aligned
|
||||
|| alignment_support_scheme == dr_unaligned_supported);
|
||||
|
||||
if (store_lanes_p)
|
||||
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
|
||||
else
|
||||
aggr_type = vectype;
|
||||
|
||||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
@ -3605,7 +3702,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
/* We should have catched mismatched types earlier. */
|
||||
gcc_assert (useless_type_conversion_p (vectype,
|
||||
TREE_TYPE (vec_oprnd)));
|
||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, NULL,
|
||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
|
||||
NULL_TREE, &dummy, gsi,
|
||||
&ptr_incr, false, &inv_p);
|
||||
gcc_assert (bb_vinfo || !inv_p);
|
||||
@ -3628,70 +3725,93 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
VEC_replace(tree, dr_chain, i, vec_oprnd);
|
||||
VEC_replace(tree, oprnds, i, vec_oprnd);
|
||||
}
|
||||
dataref_ptr =
|
||||
bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||
TYPE_SIZE_UNIT (aggr_type));
|
||||
}
|
||||
|
||||
new_stmt = NULL;
|
||||
if (strided_store)
|
||||
if (store_lanes_p)
|
||||
{
|
||||
result_chain = VEC_alloc (tree, heap, group_size);
|
||||
/* Permute. */
|
||||
vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
|
||||
&result_chain);
|
||||
}
|
||||
tree vec_array;
|
||||
|
||||
next_stmt = first_stmt;
|
||||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
struct ptr_info_def *pi;
|
||||
|
||||
if (i > 0)
|
||||
/* Bump the vector pointer. */
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||
NULL_TREE);
|
||||
|
||||
if (slp)
|
||||
vec_oprnd = VEC_index (tree, vec_oprnds, i);
|
||||
else if (strided_store)
|
||||
/* For strided stores vectorized defs are interleaved in
|
||||
vect_permute_store_chain(). */
|
||||
vec_oprnd = VEC_index (tree, result_chain, i);
|
||||
|
||||
data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
pi = get_ptr_info (dataref_ptr);
|
||||
pi->align = TYPE_ALIGN_UNIT (vectype);
|
||||
if (aligned_access_p (first_dr))
|
||||
pi->misalign = 0;
|
||||
else if (DR_MISALIGNMENT (first_dr) == -1)
|
||||
/* Combine all the vectors into an array. */
|
||||
vec_array = create_vector_array (vectype, vec_num);
|
||||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
||||
pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||
vec_oprnd = VEC_index (tree, dr_chain, i);
|
||||
write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
|
||||
}
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
|
||||
/* Emit:
|
||||
MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
|
||||
data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
|
||||
new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
|
||||
gimple_call_set_lhs (new_stmt, data_ref);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_stmt = NULL;
|
||||
if (strided_store)
|
||||
{
|
||||
result_chain = VEC_alloc (tree, heap, group_size);
|
||||
/* Permute. */
|
||||
vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
|
||||
&result_chain);
|
||||
}
|
||||
|
||||
if (slp)
|
||||
continue;
|
||||
next_stmt = first_stmt;
|
||||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
struct ptr_info_def *pi;
|
||||
|
||||
next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
|
||||
if (!next_stmt)
|
||||
break;
|
||||
if (i > 0)
|
||||
/* Bump the vector pointer. */
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
|
||||
stmt, NULL_TREE);
|
||||
|
||||
if (slp)
|
||||
vec_oprnd = VEC_index (tree, vec_oprnds, i);
|
||||
else if (strided_store)
|
||||
/* For strided stores vectorized defs are interleaved in
|
||||
vect_permute_store_chain(). */
|
||||
vec_oprnd = VEC_index (tree, result_chain, i);
|
||||
|
||||
data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
pi = get_ptr_info (dataref_ptr);
|
||||
pi->align = TYPE_ALIGN_UNIT (vectype);
|
||||
if (aligned_access_p (first_dr))
|
||||
pi->misalign = 0;
|
||||
else if (DR_MISALIGNMENT (first_dr) == -1)
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (elem_type));
|
||||
pi->align = TYPE_ALIGN_UNIT (elem_type);
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (elem_type));
|
||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||
}
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
|
||||
if (slp)
|
||||
continue;
|
||||
|
||||
next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
|
||||
if (!next_stmt)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!slp)
|
||||
{
|
||||
@ -3810,6 +3930,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
bool nested_in_vect_loop = false;
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
tree elem_type;
|
||||
tree new_temp;
|
||||
enum machine_mode mode;
|
||||
gimple new_stmt = NULL;
|
||||
@ -3826,6 +3947,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
gimple phi = NULL;
|
||||
VEC(tree,heap) *dr_chain = NULL;
|
||||
bool strided_load = false;
|
||||
bool load_lanes_p = false;
|
||||
gimple first_stmt;
|
||||
tree scalar_type;
|
||||
bool inv_p;
|
||||
@ -3838,6 +3960,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
enum tree_code code;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
int vf;
|
||||
tree aggr_type;
|
||||
|
||||
if (loop_vinfo)
|
||||
{
|
||||
@ -3914,7 +4037,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
|
||||
/* The vector component type needs to be trivially convertible to the
|
||||
scalar lhs. This should always be the case. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
|
||||
elem_type = TREE_TYPE (vectype);
|
||||
if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "??? operands of different types");
|
||||
@ -3932,7 +4056,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (!slp && !PURE_SLP_STMT (stmt_info))
|
||||
{
|
||||
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||
if (!vect_strided_load_supported (vectype, group_size))
|
||||
if (vect_load_lanes_supported (vectype, group_size))
|
||||
load_lanes_p = true;
|
||||
else if (!vect_strided_load_supported (vectype, group_size))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -3959,7 +4085,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
||||
vect_model_load_cost (stmt_info, ncopies, NULL);
|
||||
vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4000,6 +4126,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
|
||||
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
||||
gcc_assert (alignment_support_scheme);
|
||||
/* Targets with load-lane instructions must not require explicit
|
||||
realignment. */
|
||||
gcc_assert (!load_lanes_p
|
||||
|| alignment_support_scheme == dr_aligned
|
||||
|| alignment_support_scheme == dr_unaligned_supported);
|
||||
|
||||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
@ -4131,208 +4262,250 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (negative)
|
||||
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
|
||||
|
||||
if (load_lanes_p)
|
||||
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
|
||||
else
|
||||
aggr_type = vectype;
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
/* 1. Create the vector pointer update chain. */
|
||||
/* 1. Create the vector or array pointer update chain. */
|
||||
if (j == 0)
|
||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, at_loop,
|
||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
|
||||
offset, &dummy, gsi,
|
||||
&ptr_incr, false, &inv_p);
|
||||
else
|
||||
dataref_ptr =
|
||||
bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||
TYPE_SIZE_UNIT (aggr_type));
|
||||
|
||||
if (strided_load || slp_perm)
|
||||
dr_chain = VEC_alloc (tree, heap, vec_num);
|
||||
|
||||
for (i = 0; i < vec_num; i++)
|
||||
if (load_lanes_p)
|
||||
{
|
||||
if (i > 0)
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||
NULL_TREE);
|
||||
tree vec_array;
|
||||
|
||||
/* 2. Create the vector-load in the loop. */
|
||||
switch (alignment_support_scheme)
|
||||
{
|
||||
case dr_aligned:
|
||||
case dr_unaligned_supported:
|
||||
{
|
||||
struct ptr_info_def *pi;
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, dataref_ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
pi = get_ptr_info (dataref_ptr);
|
||||
pi->align = TYPE_ALIGN_UNIT (vectype);
|
||||
if (alignment_support_scheme == dr_aligned)
|
||||
{
|
||||
gcc_assert (aligned_access_p (first_dr));
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else if (DR_MISALIGNMENT (first_dr) == -1)
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
||||
pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case dr_explicit_realign:
|
||||
{
|
||||
tree ptr, bump;
|
||||
tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
|
||||
vec_array = create_vector_array (vectype, vec_num);
|
||||
|
||||
if (compute_in_loop)
|
||||
msq = vect_setup_realignment (first_stmt, gsi,
|
||||
&realignment_token,
|
||||
dr_explicit_realign,
|
||||
dataref_ptr, NULL);
|
||||
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, dataref_ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (dataref_ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, ptr);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
gimple_set_vdef (new_stmt, gimple_vdef (stmt));
|
||||
gimple_set_vuse (new_stmt, gimple_vuse (stmt));
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
msq = new_temp;
|
||||
|
||||
bump = size_binop (MULT_EXPR, vs_minus_1,
|
||||
TYPE_SIZE_UNIT (scalar_type));
|
||||
ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, ptr);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
break;
|
||||
}
|
||||
case dr_explicit_realign_optimized:
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, dataref_ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (dataref_ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, new_temp,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
/* Emit:
|
||||
VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
|
||||
data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
|
||||
new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
|
||||
gimple_call_set_lhs (new_stmt, vec_array);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
|
||||
/* 3. Handle explicit realignment if necessary/supported. Create in
|
||||
loop: vec_dest = realign_load (msq, lsq, realignment_token) */
|
||||
if (alignment_support_scheme == dr_explicit_realign_optimized
|
||||
|| alignment_support_scheme == dr_explicit_realign)
|
||||
/* Extract each vector into an SSA_NAME. */
|
||||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
lsq = gimple_assign_lhs (new_stmt);
|
||||
if (!realignment_token)
|
||||
realignment_token = dataref_ptr;
|
||||
new_temp = read_vector_array (stmt, gsi, scalar_dest,
|
||||
vec_array, i);
|
||||
VEC_quick_push (tree, dr_chain, new_temp);
|
||||
}
|
||||
|
||||
/* Record the mapping between SSA_NAMEs and statements. */
|
||||
vect_record_strided_load_vectors (stmt, dr_chain);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < vec_num; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
|
||||
stmt, NULL_TREE);
|
||||
|
||||
/* 2. Create the vector-load in the loop. */
|
||||
switch (alignment_support_scheme)
|
||||
{
|
||||
case dr_aligned:
|
||||
case dr_unaligned_supported:
|
||||
{
|
||||
struct ptr_info_def *pi;
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, dataref_ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
pi = get_ptr_info (dataref_ptr);
|
||||
pi->align = TYPE_ALIGN_UNIT (vectype);
|
||||
if (alignment_support_scheme == dr_aligned)
|
||||
{
|
||||
gcc_assert (aligned_access_p (first_dr));
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else if (DR_MISALIGNMENT (first_dr) == -1)
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (elem_type));
|
||||
pi->align = TYPE_ALIGN_UNIT (elem_type);
|
||||
pi->misalign = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
TREE_TYPE (data_ref)
|
||||
= build_aligned_type (TREE_TYPE (data_ref),
|
||||
TYPE_ALIGN (elem_type));
|
||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case dr_explicit_realign:
|
||||
{
|
||||
tree ptr, bump;
|
||||
tree vs_minus_1;
|
||||
|
||||
vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
|
||||
|
||||
if (compute_in_loop)
|
||||
msq = vect_setup_realignment (first_stmt, gsi,
|
||||
&realignment_token,
|
||||
dr_explicit_realign,
|
||||
dataref_ptr, NULL);
|
||||
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, dataref_ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (dataref_ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, ptr);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
vec_dest = vect_create_destination_var (scalar_dest,
|
||||
vectype);
|
||||
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
gimple_set_vdef (new_stmt, gimple_vdef (stmt));
|
||||
gimple_set_vuse (new_stmt, gimple_vuse (stmt));
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
msq = new_temp;
|
||||
|
||||
bump = size_binop (MULT_EXPR, vs_minus_1,
|
||||
TYPE_SIZE_UNIT (scalar_type));
|
||||
ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, ptr);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, ptr,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
break;
|
||||
}
|
||||
case dr_explicit_realign_optimized:
|
||||
new_stmt = gimple_build_assign_with_ops
|
||||
(BIT_AND_EXPR, NULL_TREE, dataref_ptr,
|
||||
build_int_cst
|
||||
(TREE_TYPE (dataref_ptr),
|
||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||
new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
|
||||
new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
data_ref
|
||||
= build2 (MEM_REF, vectype, new_temp,
|
||||
build_int_cst (reference_alias_ptr_type
|
||||
(DR_REF (first_dr)), 0));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
new_stmt
|
||||
= gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR, vec_dest,
|
||||
msq, lsq, realignment_token);
|
||||
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
|
||||
if (alignment_support_scheme == dr_explicit_realign_optimized)
|
||||
/* 3. Handle explicit realignment if necessary/supported.
|
||||
Create in loop:
|
||||
vec_dest = realign_load (msq, lsq, realignment_token) */
|
||||
if (alignment_support_scheme == dr_explicit_realign_optimized
|
||||
|| alignment_support_scheme == dr_explicit_realign)
|
||||
{
|
||||
gcc_assert (phi);
|
||||
if (i == vec_num - 1 && j == ncopies - 1)
|
||||
add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
|
||||
UNKNOWN_LOCATION);
|
||||
msq = lsq;
|
||||
}
|
||||
}
|
||||
|
||||
/* 4. Handle invariant-load. */
|
||||
if (inv_p && !bb_vinfo)
|
||||
{
|
||||
gcc_assert (!strided_load);
|
||||
gcc_assert (nested_in_vect_loop_p (loop, stmt));
|
||||
if (j == 0)
|
||||
{
|
||||
int k;
|
||||
tree t = NULL_TREE;
|
||||
tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
|
||||
|
||||
/* CHECKME: bitpos depends on endianess? */
|
||||
bitpos = bitsize_zero_node;
|
||||
vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
|
||||
bitsize, bitpos);
|
||||
vec_dest =
|
||||
vect_create_destination_var (scalar_dest, NULL_TREE);
|
||||
new_stmt = gimple_build_assign (vec_dest, vec_inv);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
lsq = gimple_assign_lhs (new_stmt);
|
||||
if (!realignment_token)
|
||||
realignment_token = dataref_ptr;
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
new_stmt
|
||||
= gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
|
||||
vec_dest, msq, lsq,
|
||||
realignment_token);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
for (k = nunits - 1; k >= 0; --k)
|
||||
t = tree_cons (NULL_TREE, new_temp, t);
|
||||
/* FIXME: use build_constructor directly. */
|
||||
vec_inv = build_constructor_from_list (vectype, t);
|
||||
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
|
||||
if (alignment_support_scheme == dr_explicit_realign_optimized)
|
||||
{
|
||||
gcc_assert (phi);
|
||||
if (i == vec_num - 1 && j == ncopies - 1)
|
||||
add_phi_arg (phi, lsq,
|
||||
loop_latch_edge (containing_loop),
|
||||
UNKNOWN_LOCATION);
|
||||
msq = lsq;
|
||||
}
|
||||
}
|
||||
|
||||
/* 4. Handle invariant-load. */
|
||||
if (inv_p && !bb_vinfo)
|
||||
{
|
||||
gcc_assert (!strided_load);
|
||||
gcc_assert (nested_in_vect_loop_p (loop, stmt));
|
||||
if (j == 0)
|
||||
{
|
||||
int k;
|
||||
tree t = NULL_TREE;
|
||||
tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
|
||||
|
||||
/* CHECKME: bitpos depends on endianess? */
|
||||
bitpos = bitsize_zero_node;
|
||||
vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
|
||||
bitsize, bitpos);
|
||||
vec_dest = vect_create_destination_var (scalar_dest,
|
||||
NULL_TREE);
|
||||
new_stmt = gimple_build_assign (vec_dest, vec_inv);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
for (k = nunits - 1; k >= 0; --k)
|
||||
t = tree_cons (NULL_TREE, new_temp, t);
|
||||
/* FIXME: use build_constructor directly. */
|
||||
vec_inv = build_constructor_from_list (vectype, t);
|
||||
new_temp = vect_init_vector (stmt, vec_inv,
|
||||
vectype, gsi);
|
||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||
}
|
||||
else
|
||||
gcc_unreachable (); /* FORNOW. */
|
||||
}
|
||||
|
||||
if (negative)
|
||||
{
|
||||
new_temp = reverse_vec_elements (new_temp, stmt, gsi);
|
||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||
}
|
||||
else
|
||||
gcc_unreachable (); /* FORNOW. */
|
||||
|
||||
/* Collect vector loads and later create their permutation in
|
||||
vect_transform_strided_load (). */
|
||||
if (strided_load || slp_perm)
|
||||
VEC_quick_push (tree, dr_chain, new_temp);
|
||||
|
||||
/* Store vector loads in the corresponding SLP_NODE. */
|
||||
if (slp && !slp_perm)
|
||||
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
|
||||
new_stmt);
|
||||
}
|
||||
|
||||
if (negative)
|
||||
{
|
||||
new_temp = reverse_vec_elements (new_temp, stmt, gsi);
|
||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||
}
|
||||
|
||||
/* Collect vector loads and later create their permutation in
|
||||
vect_transform_strided_load (). */
|
||||
if (strided_load || slp_perm)
|
||||
VEC_quick_push (tree, dr_chain, new_temp);
|
||||
|
||||
/* Store vector loads in the corresponding SLP_NODE. */
|
||||
if (slp && !slp_perm)
|
||||
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
|
||||
}
|
||||
|
||||
if (slp && !slp_perm)
|
||||
@ -4351,7 +4524,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
{
|
||||
if (strided_load)
|
||||
{
|
||||
vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
|
||||
if (!load_lanes_p)
|
||||
vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
|
||||
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
||||
}
|
||||
else
|
||||
|
@ -788,9 +788,9 @@ extern void free_stmt_vec_info (gimple stmt);
|
||||
extern tree vectorizable_function (gimple, tree, tree);
|
||||
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
|
||||
slp_tree);
|
||||
extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
|
||||
slp_tree);
|
||||
extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
|
||||
extern void vect_model_store_cost (stmt_vec_info, int, bool,
|
||||
enum vect_def_type, slp_tree);
|
||||
extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree);
|
||||
extern void vect_finish_stmt_generation (gimple, gimple,
|
||||
gimple_stmt_iterator *);
|
||||
extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
||||
@ -829,7 +829,9 @@ extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
|
||||
extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
|
||||
extern tree vect_create_destination_var (tree, tree);
|
||||
extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
|
||||
extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
|
||||
extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
|
||||
extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
|
||||
extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
|
||||
gimple_stmt_iterator *, VEC(tree,heap) **);
|
||||
extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
|
||||
@ -837,6 +839,7 @@ extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
|
||||
struct loop **);
|
||||
extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
|
||||
gimple_stmt_iterator *);
|
||||
extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *);
|
||||
extern int vect_get_place_in_interleaving_chain (gimple, gimple);
|
||||
extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
|
||||
extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
|
||||
|
@ -7340,6 +7340,15 @@ build_nonshared_array_type (tree elt_type, tree index_type)
|
||||
return build_array_type_1 (elt_type, index_type, false);
|
||||
}
|
||||
|
||||
/* Return a representation of ELT_TYPE[NELTS], using indices of type
|
||||
sizetype. */
|
||||
|
||||
tree
|
||||
build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts)
|
||||
{
|
||||
return build_array_type (elt_type, build_index_type (size_int (nelts - 1)));
|
||||
}
|
||||
|
||||
/* Recursively examines the array elements of TYPE, until a non-array
|
||||
element type is found. */
|
||||
|
||||
|
@ -4250,6 +4250,7 @@ extern tree build_type_no_quals (tree);
|
||||
extern tree build_index_type (tree);
|
||||
extern tree build_array_type (tree, tree);
|
||||
extern tree build_nonshared_array_type (tree, tree);
|
||||
extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT);
|
||||
extern tree build_function_type (tree, tree);
|
||||
extern tree build_function_type_list (tree, ...);
|
||||
extern tree build_function_type_skip_args (tree, bitmap);
|
||||
|
Loading…
x
Reference in New Issue
Block a user