mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-18 23:51:08 +08:00
tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be a pointer.
* tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be a pointer. * tree-vect-patterns.c (vect_recog_widen_sum_pattern, vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. (vect_pattern_recog_1): Remove declaration. (widened_name_p): Remove declaration. Add new argument to specify whether to check that both types are either signed or unsigned. (vect_recog_widen_mult_pattern): Update documentation. Handle unsigned patterns and multiplication by constants. (vect_pattern_recog_1): Update vect_recog_func references. Use statement information from the statement returned from pattern detection functions. (vect_pattern_recog): Update vect_recog_func reference. * tree-vect-stmts.c (vectorizable_type_promotion): For widening multiplication by a constant use the type of the other operand. From-SVN: r174735
This commit is contained in:
parent
f527cc50e3
commit
383d9c8375
@ -1,3 +1,22 @@
|
||||
2011-06-07 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
* tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be
|
||||
a pointer.
|
||||
* tree-vect-patterns.c (vect_recog_widen_sum_pattern,
|
||||
vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern,
|
||||
vect_recog_pow_pattern): Likewise.
|
||||
(vect_pattern_recog_1): Remove declaration.
|
||||
(widened_name_p): Remove declaration. Add new argument to specify
|
||||
whether to check that both types are either signed or unsigned.
|
||||
(vect_recog_widen_mult_pattern): Update documentation. Handle
|
||||
unsigned patterns and multiplication by constants.
|
||||
(vect_pattern_recog_1): Update vect_recog_func references. Use
|
||||
statement information from the statement returned from pattern
|
||||
detection functions.
|
||||
(vect_pattern_recog): Update vect_recog_func reference.
|
||||
* tree-vect-stmts.c (vectorizable_type_promotion): For widening
|
||||
multiplication by a constant use the type of the other operand.
|
||||
|
||||
2011-06-06 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
PR rtl-optimization/49145
|
||||
|
@ -1,3 +1,17 @@
|
||||
2011-06-07 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
* lib/target-supports.exp
|
||||
(check_effective_target_vect_widen_mult_qi_to_hi):
|
||||
Add NEON as supporting target.
|
||||
(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
|
||||
(check_effective_target_vect_widen_mult_qi_to_hi_pattern): New.
|
||||
(check_effective_target_vect_widen_mult_hi_to_si_pattern): New.
|
||||
* gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized
|
||||
using widening multiplication on targets that support it.
|
||||
* gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
|
||||
* gcc.dg/vect/vect-widen-mult-const-s16.c: New test.
|
||||
* gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
|
||||
|
||||
2011-06-06 Nicola Pero <nicola.pero@meta-innovation.com>
|
||||
|
||||
Updated all testcases in the ObjC/ObjC++ testsuites to use the
|
||||
|
60
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
Normal file
60
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
Normal file
@ -0,0 +1,60 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 32
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (int *__restrict a,
|
||||
short *__restrict b,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
a[i] = b[i] * 2333;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a[i] != b[i] * 2333)
|
||||
abort ();
|
||||
}
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
bar (int *__restrict a,
|
||||
short *__restrict b,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
a[i] = b[i] * (short) 2333;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a[i] != b[i] * (short) 2333)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
int a[N];
|
||||
short b[N];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 0;
|
||||
b[i] = i;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (a, b, N);
|
||||
bar (a, b, N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
77
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
Normal file
77
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
Normal file
@ -0,0 +1,77 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 32
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo (unsigned int *__restrict a,
|
||||
unsigned short *__restrict b,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
a[i] = b[i] * 2333;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a[i] != b[i] * 2333)
|
||||
abort ();
|
||||
}
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
bar (unsigned int *__restrict a,
|
||||
unsigned short *__restrict b,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
a[i] = (unsigned short) 2333 * b[i];
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a[i] != b[i] * (unsigned short) 2333)
|
||||
abort ();
|
||||
}
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
baz (unsigned int *__restrict a,
|
||||
unsigned short *__restrict b,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
a[i] = b[i] * 233333333;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (a[i] != b[i] * 233333333)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
unsigned int a[N];
|
||||
unsigned short b[N];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 0;
|
||||
b[i] = i;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo (a, b, N);
|
||||
bar (a, b, N);
|
||||
baz (a, b, N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -9,13 +9,11 @@ unsigned short X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned int result[N];
|
||||
|
||||
/* short->int widening-mult */
|
||||
/* unsigned short->unsigned int widening-mult. */
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
/* Not vectorized because X[i] and Y[i] are casted to 'int'
|
||||
so the widening multiplication pattern is not recognized. */
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = (unsigned int)(X[i] * Y[i]);
|
||||
}
|
||||
@ -43,8 +41,8 @@ int main (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*The induction loop is vectorized */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
@ -9,7 +9,7 @@ unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned short result[N];
|
||||
|
||||
/* char->short widening-mult */
|
||||
/* unsigned char-> unsigned short widening-mult. */
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
@ -28,8 +28,7 @@ int main (void)
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
if (i%4 == 0)
|
||||
X[i] = 5;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
@ -43,5 +42,7 @@ int main (void)
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
|
||||
/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
@ -2668,7 +2668,8 @@ proc check_effective_target_vect_widen_mult_qi_to_hi { } {
|
||||
} else {
|
||||
set et_vect_widen_mult_qi_to_hi_saved 0
|
||||
}
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_widen_mult_qi_to_hi_saved 1
|
||||
}
|
||||
}
|
||||
@ -2701,7 +2702,8 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } {
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
|| [istarget x86_64-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_widen_mult_hi_to_si_saved 1
|
||||
}
|
||||
}
|
||||
@ -2709,6 +2711,52 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } {
|
||||
return $et_vect_widen_mult_hi_to_si_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening multiplication of *char* args into *short* result, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } {
|
||||
global et_vect_widen_mult_qi_to_hi_pattern
|
||||
|
||||
if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] {
|
||||
verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_mult_qi_to_hi_pattern_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_widen_mult_qi_to_hi_pattern_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2
|
||||
return $et_vect_widen_mult_qi_to_hi_pattern_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening multiplication of *short* args into *int* result, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
|
||||
global et_vect_widen_mult_hi_to_si_pattern
|
||||
|
||||
if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] {
|
||||
verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_mult_hi_to_si_pattern_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_widen_mult_hi_to_si_pattern_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2
|
||||
return $et_vect_widen_mult_hi_to_si_pattern_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# dot-product of signed chars, 0 otherwise.
|
||||
#
|
||||
|
@ -38,16 +38,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "recog.h"
|
||||
#include "diagnostic-core.h"
|
||||
|
||||
/* Function prototypes */
|
||||
static void vect_pattern_recog_1
|
||||
(gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator);
|
||||
static bool widened_name_p (tree, gimple, tree *, gimple *);
|
||||
|
||||
/* Pattern recognition functions */
|
||||
static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *);
|
||||
static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *);
|
||||
static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *);
|
||||
static gimple vect_recog_pow_pattern (gimple, tree *, tree *);
|
||||
static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
|
||||
static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
|
||||
static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
|
||||
static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
|
||||
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
|
||||
vect_recog_widen_mult_pattern,
|
||||
vect_recog_widen_sum_pattern,
|
||||
@ -61,10 +56,12 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
|
||||
is a result of a type-promotion, such that:
|
||||
DEF_STMT: NAME = NOP (name0)
|
||||
where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
|
||||
*/
|
||||
If CHECK_SIGN is TRUE, check that either both types are signed or both are
|
||||
unsigned. */
|
||||
|
||||
static bool
|
||||
widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
|
||||
widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
|
||||
bool check_sign)
|
||||
{
|
||||
tree dummy;
|
||||
gimple dummy_gimple;
|
||||
@ -98,7 +95,7 @@ widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
|
||||
|
||||
*half_type = TREE_TYPE (oprnd0);
|
||||
if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
|
||||
|| (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
|
||||
|| ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
|
||||
|| (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
|
||||
return false;
|
||||
|
||||
@ -168,12 +165,12 @@ vect_recog_temp_ssa_var (tree type, gimple stmt)
|
||||
inner-loop nested in an outer-loop that us being vectorized). */
|
||||
|
||||
static gimple
|
||||
vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
|
||||
{
|
||||
gimple stmt;
|
||||
tree oprnd0, oprnd1;
|
||||
tree oprnd00, oprnd01;
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
|
||||
tree type, half_type;
|
||||
gimple pattern_stmt;
|
||||
tree prod_type;
|
||||
@ -181,10 +178,10 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
tree var;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
if (!is_gimple_assign (*last_stmt))
|
||||
return NULL;
|
||||
|
||||
type = gimple_expr_type (last_stmt);
|
||||
type = gimple_expr_type (*last_stmt);
|
||||
|
||||
/* Look for the following pattern
|
||||
DX = (TYPE1) X;
|
||||
@ -210,7 +207,7 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
/* Starting from LAST_STMT, follow the defs of its uses in search
|
||||
of the above pattern. */
|
||||
|
||||
if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
|
||||
if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
|
||||
return NULL;
|
||||
|
||||
if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
|
||||
@ -231,14 +228,14 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
|
||||
return NULL;
|
||||
oprnd0 = gimple_assign_rhs1 (last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (last_stmt);
|
||||
oprnd0 = gimple_assign_rhs1 (*last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (*last_stmt);
|
||||
if (!types_compatible_p (TREE_TYPE (oprnd0), type)
|
||||
|| !types_compatible_p (TREE_TYPE (oprnd1), type))
|
||||
return NULL;
|
||||
stmt = last_stmt;
|
||||
stmt = *last_stmt;
|
||||
|
||||
if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
|
||||
if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
|
||||
{
|
||||
stmt = def_stmt;
|
||||
oprnd0 = gimple_assign_rhs1 (stmt);
|
||||
@ -247,7 +244,7 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
half_type = type;
|
||||
}
|
||||
|
||||
/* So far so good. Since last_stmt was detected as a (summation) reduction,
|
||||
/* So far so good. Since *last_stmt was detected as a (summation) reduction,
|
||||
we know that oprnd1 is the reduction variable (defined by a loop-header
|
||||
phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
|
||||
Left to check that oprnd0 is defined by a (widen_)mult_expr */
|
||||
@ -293,10 +290,10 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
|
||||
|| !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
|
||||
return NULL;
|
||||
if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
|
||||
if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
|
||||
return NULL;
|
||||
oprnd00 = gimple_assign_rhs1 (def_stmt);
|
||||
if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
|
||||
if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
|
||||
return NULL;
|
||||
oprnd01 = gimple_assign_rhs1 (def_stmt);
|
||||
if (!types_compatible_p (half_type0, half_type1))
|
||||
@ -322,7 +319,7 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
|
||||
/* We don't allow changing the order of the computation in the inner-loop
|
||||
when doing outer-loop vectorization. */
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
|
||||
|
||||
return pattern_stmt;
|
||||
}
|
||||
@ -342,24 +339,47 @@ vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
|
||||
where type 'TYPE' is at least double the size of type 'type'.
|
||||
|
||||
Also detect unsgigned cases:
|
||||
|
||||
unsigned type a_t, b_t;
|
||||
unsigned TYPE u_prod_T;
|
||||
TYPE a_T, b_T, prod_T;
|
||||
|
||||
S1 a_t = ;
|
||||
S2 b_t = ;
|
||||
S3 a_T = (TYPE) a_t;
|
||||
S4 b_T = (TYPE) b_t;
|
||||
S5 prod_T = a_T * b_T;
|
||||
S6 u_prod_T = (unsigned TYPE) prod_T;
|
||||
|
||||
and multiplication by constants:
|
||||
|
||||
type a_t;
|
||||
TYPE a_T, prod_T;
|
||||
|
||||
S1 a_t = ;
|
||||
S3 a_T = (TYPE) a_t;
|
||||
S5 prod_T = a_T * CONST;
|
||||
|
||||
Input:
|
||||
|
||||
* LAST_STMT: A stmt from which the pattern search begins. In the example,
|
||||
when this function is called with S5, the pattern {S3,S4,S5} is be detected.
|
||||
* LAST_STMT: A stmt from which the pattern search begins. In the example,
|
||||
when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
|
||||
detected.
|
||||
|
||||
Output:
|
||||
|
||||
* TYPE_IN: The type of the input arguments to the pattern.
|
||||
|
||||
* TYPE_OUT: The type of the output of this pattern.
|
||||
* TYPE_OUT: The type of the output of this pattern.
|
||||
|
||||
* Return value: A new stmt that will be used to replace the sequence of
|
||||
stmts that constitute the pattern. In this case it will be:
|
||||
stmts that constitute the pattern. In this case it will be:
|
||||
WIDEN_MULT <a_t, b_t>
|
||||
*/
|
||||
|
||||
static gimple
|
||||
vect_recog_widen_mult_pattern (gimple last_stmt,
|
||||
vect_recog_widen_mult_pattern (gimple *last_stmt,
|
||||
tree *type_in,
|
||||
tree *type_out)
|
||||
{
|
||||
@ -367,39 +387,110 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
||||
tree oprnd0, oprnd1;
|
||||
tree type, half_type0, half_type1;
|
||||
gimple pattern_stmt;
|
||||
tree vectype, vectype_out;
|
||||
tree vectype, vectype_out = NULL_TREE;
|
||||
tree dummy;
|
||||
tree var;
|
||||
enum tree_code dummy_code;
|
||||
int dummy_int;
|
||||
VEC (tree, heap) *dummy_vec;
|
||||
bool op0_ok, op1_ok;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
if (!is_gimple_assign (*last_stmt))
|
||||
return NULL;
|
||||
|
||||
type = gimple_expr_type (last_stmt);
|
||||
type = gimple_expr_type (*last_stmt);
|
||||
|
||||
/* Starting from LAST_STMT, follow the defs of its uses in search
|
||||
of the above pattern. */
|
||||
|
||||
if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
|
||||
if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
|
||||
return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (last_stmt);
|
||||
oprnd0 = gimple_assign_rhs1 (*last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (*last_stmt);
|
||||
if (!types_compatible_p (TREE_TYPE (oprnd0), type)
|
||||
|| !types_compatible_p (TREE_TYPE (oprnd1), type))
|
||||
return NULL;
|
||||
|
||||
/* Check argument 0 */
|
||||
if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
|
||||
return NULL;
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
/* Check argument 0. */
|
||||
op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
|
||||
/* Check argument 1. */
|
||||
op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
|
||||
|
||||
/* Check argument 1 */
|
||||
if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
|
||||
/* In case of multiplication by a constant one of the operands may not match
|
||||
the pattern, but not both. */
|
||||
if (!op0_ok && !op1_ok)
|
||||
return NULL;
|
||||
oprnd1 = gimple_assign_rhs1 (def_stmt1);
|
||||
|
||||
if (op0_ok && op1_ok)
|
||||
{
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
oprnd1 = gimple_assign_rhs1 (def_stmt1);
|
||||
}
|
||||
else if (!op0_ok)
|
||||
{
|
||||
if (CONSTANT_CLASS_P (oprnd0)
|
||||
&& TREE_CODE (half_type1) == INTEGER_TYPE
|
||||
&& tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
|
||||
&& tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
|
||||
{
|
||||
/* OPRND0 is a constant of HALF_TYPE1. */
|
||||
half_type0 = half_type1;
|
||||
oprnd1 = gimple_assign_rhs1 (def_stmt1);
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
else if (!op1_ok)
|
||||
{
|
||||
if (CONSTANT_CLASS_P (oprnd1)
|
||||
&& TREE_CODE (half_type0) == INTEGER_TYPE
|
||||
&& tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
|
||||
&& tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
|
||||
{
|
||||
/* OPRND1 is a constant of HALF_TYPE0. */
|
||||
half_type1 = half_type0;
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Handle unsigned case. Look for
|
||||
S6 u_prod_T = (unsigned TYPE) prod_T;
|
||||
Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
|
||||
if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
|
||||
{
|
||||
tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
|
||||
imm_use_iterator imm_iter;
|
||||
use_operand_p use_p;
|
||||
int nuses = 0;
|
||||
gimple use_stmt = NULL;
|
||||
tree use_type;
|
||||
|
||||
if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
|
||||
return NULL;
|
||||
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
|
||||
{
|
||||
use_stmt = USE_STMT (use_p);
|
||||
nuses++;
|
||||
}
|
||||
|
||||
if (nuses != 1 || !is_gimple_assign (use_stmt)
|
||||
|| gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
|
||||
return NULL;
|
||||
|
||||
use_lhs = gimple_assign_lhs (use_stmt);
|
||||
use_type = TREE_TYPE (use_lhs);
|
||||
if (!INTEGRAL_TYPE_P (use_type)
|
||||
|| (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
|
||||
|| (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
|
||||
return NULL;
|
||||
|
||||
type = use_type;
|
||||
*last_stmt = use_stmt;
|
||||
}
|
||||
|
||||
if (!types_compatible_p (half_type0, half_type1))
|
||||
return NULL;
|
||||
@ -413,7 +504,7 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
||||
vectype_out = get_vectype_for_scalar_type (type);
|
||||
if (!vectype
|
||||
|| !vectype_out
|
||||
|| !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
|
||||
|| !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
|
||||
vectype_out, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code, &dummy_int, &dummy_vec))
|
||||
@ -462,16 +553,16 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
||||
*/
|
||||
|
||||
static gimple
|
||||
vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
|
||||
{
|
||||
tree fn, base, exp = NULL;
|
||||
gimple stmt;
|
||||
tree var;
|
||||
|
||||
if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
|
||||
if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
|
||||
return NULL;
|
||||
|
||||
fn = gimple_call_fndecl (last_stmt);
|
||||
fn = gimple_call_fndecl (*last_stmt);
|
||||
if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
|
||||
return NULL;
|
||||
|
||||
@ -481,8 +572,8 @@ vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
case BUILT_IN_POWI:
|
||||
case BUILT_IN_POWF:
|
||||
case BUILT_IN_POW:
|
||||
base = gimple_call_arg (last_stmt, 0);
|
||||
exp = gimple_call_arg (last_stmt, 1);
|
||||
base = gimple_call_arg (*last_stmt, 0);
|
||||
exp = gimple_call_arg (*last_stmt, 1);
|
||||
if (TREE_CODE (exp) != REAL_CST
|
||||
&& TREE_CODE (exp) != INTEGER_CST)
|
||||
return NULL;
|
||||
@ -574,21 +665,21 @@ vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
inner-loop nested in an outer-loop that us being vectorized). */
|
||||
|
||||
static gimple
|
||||
vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
|
||||
{
|
||||
gimple stmt;
|
||||
tree oprnd0, oprnd1;
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
|
||||
tree type, half_type;
|
||||
gimple pattern_stmt;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
tree var;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
if (!is_gimple_assign (*last_stmt))
|
||||
return NULL;
|
||||
|
||||
type = gimple_expr_type (last_stmt);
|
||||
type = gimple_expr_type (*last_stmt);
|
||||
|
||||
/* Look for the following pattern
|
||||
DX = (TYPE) X;
|
||||
@ -600,25 +691,25 @@ vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
/* Starting from LAST_STMT, follow the defs of its uses in search
|
||||
of the above pattern. */
|
||||
|
||||
if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
|
||||
if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
|
||||
return NULL;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
|
||||
return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (last_stmt);
|
||||
oprnd0 = gimple_assign_rhs1 (*last_stmt);
|
||||
oprnd1 = gimple_assign_rhs2 (*last_stmt);
|
||||
if (!types_compatible_p (TREE_TYPE (oprnd0), type)
|
||||
|| !types_compatible_p (TREE_TYPE (oprnd1), type))
|
||||
return NULL;
|
||||
|
||||
/* So far so good. Since last_stmt was detected as a (summation) reduction,
|
||||
/* So far so good. Since *last_stmt was detected as a (summation) reduction,
|
||||
we know that oprnd1 is the reduction variable (defined by a loop-header
|
||||
phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
|
||||
Left to check that oprnd0 is defined by a cast from type 'type' to type
|
||||
'TYPE'. */
|
||||
|
||||
if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
|
||||
if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
|
||||
return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (stmt);
|
||||
@ -639,7 +730,7 @@ vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
|
||||
/* We don't allow changing the order of the computation in the inner-loop
|
||||
when doing outer-loop vectorization. */
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
|
||||
|
||||
return pattern_stmt;
|
||||
}
|
||||
@ -669,23 +760,27 @@ vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
|
||||
|
||||
static void
|
||||
vect_pattern_recog_1 (
|
||||
gimple (* vect_recog_func) (gimple, tree *, tree *),
|
||||
gimple (* vect_recog_func) (gimple *, tree *, tree *),
|
||||
gimple_stmt_iterator si)
|
||||
{
|
||||
gimple stmt = gsi_stmt (si), pattern_stmt;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
stmt_vec_info stmt_info;
|
||||
stmt_vec_info pattern_stmt_info;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
loop_vec_info loop_vinfo;
|
||||
tree pattern_vectype;
|
||||
tree type_in, type_out;
|
||||
enum tree_code code;
|
||||
int i;
|
||||
gimple next;
|
||||
|
||||
pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
|
||||
pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
|
||||
if (!pattern_stmt)
|
||||
return;
|
||||
|
||||
si = gsi_for_stmt (stmt);
|
||||
stmt_info = vinfo_for_stmt (stmt);
|
||||
loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
|
||||
if (VECTOR_MODE_P (TYPE_MODE (type_in)))
|
||||
{
|
||||
/* No need to check target support (already checked by the pattern
|
||||
@ -832,7 +927,7 @@ vect_pattern_recog (loop_vec_info loop_vinfo)
|
||||
unsigned int nbbs = loop->num_nodes;
|
||||
gimple_stmt_iterator si;
|
||||
unsigned int i, j;
|
||||
gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
|
||||
gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_pattern_recog ===");
|
||||
|
@ -3232,6 +3232,33 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
fprintf (vect_dump, "use not simple.");
|
||||
return false;
|
||||
}
|
||||
|
||||
op_type = TREE_CODE_LENGTH (code);
|
||||
if (op_type == binary_op)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
op1 = gimple_assign_rhs2 (stmt);
|
||||
if (code == WIDEN_MULT_EXPR)
|
||||
{
|
||||
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
|
||||
OP1. */
|
||||
if (CONSTANT_CLASS_P (op0))
|
||||
ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
|
||||
&def_stmt, &def, &dt[1], &vectype_in);
|
||||
else
|
||||
ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
|
||||
&dt[1]);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "use not simple.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If op0 is an external or constant def use a vector type with
|
||||
the same size as the output vector type. */
|
||||
if (!vectype_in)
|
||||
@ -3264,18 +3291,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
|
||||
gcc_assert (ncopies >= 1);
|
||||
|
||||
op_type = TREE_CODE_LENGTH (code);
|
||||
if (op_type == binary_op)
|
||||
{
|
||||
op1 = gimple_assign_rhs2 (stmt);
|
||||
if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "use not simple.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Supportable by target? */
|
||||
if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
|
||||
&decl1, &decl2, &code1, &code2,
|
||||
@ -3301,6 +3316,14 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
|
||||
ncopies);
|
||||
|
||||
if (code == WIDEN_MULT_EXPR)
|
||||
{
|
||||
if (CONSTANT_CLASS_P (op0))
|
||||
op0 = fold_convert (TREE_TYPE (op1), op0);
|
||||
else if (CONSTANT_CLASS_P (op1))
|
||||
op1 = fold_convert (TREE_TYPE (op0), op1);
|
||||
}
|
||||
|
||||
/* Handle def. */
|
||||
/* In case of multi-step promotion, we first generate promotion operations
|
||||
to the intermediate types, and then from that types to the final one.
|
||||
|
@ -896,7 +896,7 @@ extern void vect_slp_transform_bb (basic_block);
|
||||
/* Pattern recognition functions.
|
||||
Additional pattern recognition functions can (and will) be added
|
||||
in the future. */
|
||||
typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
|
||||
typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
|
||||
#define NUM_PATTERNS 4
|
||||
void vect_pattern_recog (loop_vec_info);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user