mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-31 15:31:11 +08:00
Vectorise multiply high with scaling operations (PR 89386)
2019-09-12 Yuliang Wang <yuliang.wang@arm.com> gcc/ PR tree-optimization/89386 * config/aarch64/aarch64-sve2.md (<su>mull<bt><Vwide>) (<r>shrnb<mode>, <r>shrnt<mode>): New SVE2 patterns. (<su>mulh<r>s<mode>3): New pattern for MULHRS. * config/aarch64/iterators.md (UNSPEC_SMULLB, UNSPEC_SMULLT) (UNSPEC_UMULLB, UNSPEC_UMULLT, UNSPEC_SHRNB, UNSPEC_SHRNT) (UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SMULHS, UNSPEC_SMULHRS) UNSPEC_UMULHS, UNSPEC_UMULHRS): New unspecs. (MULLBT, SHRNB, SHRNT, MULHRS): New int iterators. (su, r): Handle the unspecs above. (bt): New int attribute. * internal-fn.def (IFN_MULHS, IFN_MULHRS): New internal functions. * internal-fn.c (first_commutative_argument): Commutativity info for above. * optabs.def (smulhs_optab, smulhrs_optab, umulhs_optab) (umulhrs_optab): New optabs. * doc/md.texi (smulhs$var{m3}, umulhs$var{m3}) (smulhrs$var{m3}, umulhrs$var{m3}): Documentation for the above. * tree-vect-patterns.c (vect_recog_mulhs_pattern): New pattern function. (vect_vect_recog_func_ptrs): Add it. * testsuite/gcc.target/aarch64/sve2/mulhrs_1.c: New test. * testsuite/gcc.dg/vect/vect-mulhrs-1.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-2.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-3.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-4.c: As above. * doc/sourcebuild.texi (vect_mulhrs_hi): Document new target selector. * testsuite/lib/target-supports.exp (check_effective_target_vect_mulhrs_hi): Return true for AArch64 with SVE2. From-SVN: r275682
This commit is contained in:
parent
8c58d9d837
commit
58cc98767a
@ -1,3 +1,36 @@
|
||||
2019-09-12 Yuliang Wang <yuliang.wang@arm.com>
|
||||
|
||||
PR tree-optimization/89386
|
||||
* config/aarch64/aarch64-sve2.md (<su>mull<bt><Vwide>)
|
||||
(<r>shrnb<mode>, <r>shrnt<mode>): New SVE2 patterns.
|
||||
(<su>mulh<r>s<mode>3): New pattern for MULHRS.
|
||||
* config/aarch64/iterators.md (UNSPEC_SMULLB, UNSPEC_SMULLT)
|
||||
(UNSPEC_UMULLB, UNSPEC_UMULLT, UNSPEC_SHRNB, UNSPEC_SHRNT)
|
||||
(UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SMULHS, UNSPEC_SMULHRS)
|
||||
UNSPEC_UMULHS, UNSPEC_UMULHRS): New unspecs.
|
||||
(MULLBT, SHRNB, SHRNT, MULHRS): New int iterators.
|
||||
(su, r): Handle the unspecs above.
|
||||
(bt): New int attribute.
|
||||
* internal-fn.def (IFN_MULHS, IFN_MULHRS): New internal functions.
|
||||
* internal-fn.c (first_commutative_argument): Commutativity info for
|
||||
above.
|
||||
* optabs.def (smulhs_optab, smulhrs_optab, umulhs_optab)
|
||||
(umulhrs_optab): New optabs.
|
||||
* doc/md.texi (smulhs$var{m3}, umulhs$var{m3})
|
||||
(smulhrs$var{m3}, umulhrs$var{m3}): Documentation for the above.
|
||||
* tree-vect-patterns.c (vect_recog_mulhs_pattern): New pattern
|
||||
function.
|
||||
(vect_vect_recog_func_ptrs): Add it.
|
||||
* testsuite/gcc.target/aarch64/sve2/mulhrs_1.c: New test.
|
||||
* testsuite/gcc.dg/vect/vect-mulhrs-1.c: As above.
|
||||
* testsuite/gcc.dg/vect/vect-mulhrs-2.c: As above.
|
||||
* testsuite/gcc.dg/vect/vect-mulhrs-3.c: As above.
|
||||
* testsuite/gcc.dg/vect/vect-mulhrs-4.c: As above.
|
||||
* doc/sourcebuild.texi (vect_mulhrs_hi): Document new target selector.
|
||||
* testsuite/lib/target-supports.exp
|
||||
(check_effective_target_vect_mulhrs_hi): Return true for AArch64
|
||||
with SVE2.
|
||||
|
||||
2019-09-11 Michael Meissner <meissner@linux.ibm.com>
|
||||
|
||||
* config/rs6000/predicates.md (non_add_cint_operand): Simplify the
|
||||
|
@ -63,3 +63,63 @@
|
||||
movprfx\t%0, %2\;<sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
|
||||
[(set_attr "movprfx" "*,yes")]
|
||||
)
|
||||
|
||||
;; Multiply long top / bottom.
|
||||
(define_insn "<su>mull<bt><Vwide>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")
|
||||
(match_operand:SVE_BHSI 2 "register_operand" "w")]
|
||||
MULLBT))]
|
||||
"TARGET_SVE2"
|
||||
"<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
|
||||
)
|
||||
|
||||
;; (Rounding) Right shift narrow bottom.
|
||||
(define_insn "<r>shrnb<mode>"
|
||||
[(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
|
||||
(unspec:SVE_BHSI
|
||||
[(match_operand:<VWIDE> 1 "register_operand" "w")
|
||||
(match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")]
|
||||
SHRNB))]
|
||||
"TARGET_SVE2"
|
||||
"<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2"
|
||||
)
|
||||
|
||||
;; (Rounding) Right shift narrow top.
|
||||
(define_insn "<r>shrnt<mode>"
|
||||
[(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
|
||||
(unspec:SVE_BHSI
|
||||
[(match_operand:SVE_BHSI 1 "register_operand" "0")
|
||||
(match_operand:<VWIDE> 2 "register_operand" "w")
|
||||
(match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")]
|
||||
SHRNT))]
|
||||
"TARGET_SVE2"
|
||||
"<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3"
|
||||
)
|
||||
|
||||
;; Unpredicated integer multiply-high-with-(round-and-)scale.
|
||||
(define_expand "<su>mulh<r>s<mode>3"
|
||||
[(set (match_operand:SVE_BHSI 0 "register_operand")
|
||||
(unspec:SVE_BHSI
|
||||
[(match_dup 3)
|
||||
(unspec:SVE_BHSI [(match_operand:SVE_BHSI 1 "register_operand")
|
||||
(match_operand:SVE_BHSI 2 "register_operand")]
|
||||
MULHRS)]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE2"
|
||||
{
|
||||
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
|
||||
|
||||
rtx prod_b = gen_reg_rtx (<VWIDE>mode);
|
||||
rtx prod_t = gen_reg_rtx (<VWIDE>mode);
|
||||
emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2]));
|
||||
emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2]));
|
||||
|
||||
rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
|
||||
emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift));
|
||||
emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift));
|
||||
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -378,6 +378,10 @@
|
||||
UNSPEC_RSUBHN2 ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQDMULH ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQRDMULH ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMULLB ; Used in aarch64-sve2.md.
|
||||
UNSPEC_SMULLT ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULLB ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULLT ; Used in aarch64-sve2.md.
|
||||
UNSPEC_PMUL ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMULX ; Used in aarch64-simd.md.
|
||||
UNSPEC_USQADD ; Used in aarch64-simd.md.
|
||||
@ -400,6 +404,10 @@
|
||||
UNSPEC_UQSHRN ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQRSHRN ; Used in aarch64-simd.md.
|
||||
UNSPEC_UQRSHRN ; Used in aarch64-simd.md.
|
||||
UNSPEC_SHRNB ; Used in aarch64-sve2.md.
|
||||
UNSPEC_SHRNT ; Used in aarch64-sve2.md.
|
||||
UNSPEC_RSHRNB ; Used in aarch64-sve2.md.
|
||||
UNSPEC_RSHRNT ; Used in aarch64-sve2.md.
|
||||
UNSPEC_SSHL ; Used in aarch64-simd.md.
|
||||
UNSPEC_USHL ; Used in aarch64-simd.md.
|
||||
UNSPEC_SRSHL ; Used in aarch64-simd.md.
|
||||
@ -523,6 +531,10 @@
|
||||
UNSPEC_FCMLA90 ; Used in aarch64-simd.md.
|
||||
UNSPEC_FCMLA180 ; Used in aarch64-simd.md.
|
||||
UNSPEC_FCMLA270 ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMULHS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_SMULHRS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULHS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULHRS ; Used in aarch64-sve2.md.
|
||||
])
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
@ -1588,6 +1600,13 @@
|
||||
|
||||
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
|
||||
|
||||
(define_int_iterator MULLBT [UNSPEC_SMULLB UNSPEC_UMULLB
|
||||
UNSPEC_SMULLT UNSPEC_UMULLT])
|
||||
|
||||
(define_int_iterator SHRNB [UNSPEC_SHRNB UNSPEC_RSHRNB])
|
||||
|
||||
(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT])
|
||||
|
||||
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
|
||||
|
||||
(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
|
||||
@ -1607,6 +1626,9 @@
|
||||
|
||||
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
|
||||
|
||||
(define_int_iterator MULHRS [UNSPEC_SMULHS UNSPEC_UMULHS
|
||||
UNSPEC_SMULHRS UNSPEC_UMULHRS])
|
||||
|
||||
(define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD])
|
||||
|
||||
(define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN])
|
||||
@ -1872,7 +1894,11 @@
|
||||
(UNSPEC_COND_FCVTZS "s")
|
||||
(UNSPEC_COND_FCVTZU "u")
|
||||
(UNSPEC_COND_SCVTF "s")
|
||||
(UNSPEC_COND_UCVTF "u")])
|
||||
(UNSPEC_COND_UCVTF "u")
|
||||
(UNSPEC_SMULLB "s") (UNSPEC_UMULLB "u")
|
||||
(UNSPEC_SMULLT "s") (UNSPEC_UMULLT "u")
|
||||
(UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
|
||||
(UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
|
||||
|
||||
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
|
||||
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
|
||||
@ -1910,6 +1936,10 @@
|
||||
(UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r")
|
||||
(UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
|
||||
(UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r")
|
||||
(UNSPEC_SHRNB "") (UNSPEC_SHRNT "")
|
||||
(UNSPEC_RSHRNB "r") (UNSPEC_RSHRNT "r")
|
||||
(UNSPEC_SMULHS "") (UNSPEC_UMULHS "")
|
||||
(UNSPEC_SMULHRS "r") (UNSPEC_UMULHRS "r")
|
||||
])
|
||||
|
||||
(define_int_attr lr [(UNSPEC_SSLI "l") (UNSPEC_USLI "l")
|
||||
@ -1922,6 +1952,9 @@
|
||||
(UNSPEC_SHADD "") (UNSPEC_UHADD "u")
|
||||
(UNSPEC_SRHADD "") (UNSPEC_URHADD "u")])
|
||||
|
||||
(define_int_attr bt [(UNSPEC_SMULLB "b") (UNSPEC_UMULLB "b")
|
||||
(UNSPEC_SMULLT "t") (UNSPEC_UMULLT "t")])
|
||||
|
||||
(define_int_attr addsub [(UNSPEC_SHADD "add")
|
||||
(UNSPEC_UHADD "add")
|
||||
(UNSPEC_SRHADD "add")
|
||||
|
@ -5387,6 +5387,33 @@ operand 1. Add operand 1 to operand 2 and place the widened result in
|
||||
operand 0. (This is used express accumulation of elements into an accumulator
|
||||
of a wider mode.)
|
||||
|
||||
@cindex @code{smulhs@var{m3}} instruction pattern
|
||||
@item @samp{smulhs@var{m3}}
|
||||
@cindex @code{umulhs@var{m3}} instruction pattern
|
||||
@itemx @samp{umulhs@var{m3}}
|
||||
Signed/unsigned multiply high with scale. This is equivalent to the C code:
|
||||
@smallexample
|
||||
narrow op0, op1, op2;
|
||||
@dots{}
|
||||
op0 = (narrow) (((wide) op1 * (wide) op2) >> (N / 2 - 1));
|
||||
@end smallexample
|
||||
where the sign of @samp{narrow} determines whether this is a signed
|
||||
or unsigned operation, and @var{N} is the size of @samp{wide} in bits.
|
||||
|
||||
@cindex @code{smulhrs@var{m3}} instruction pattern
|
||||
@item @samp{smulhrs@var{m3}}
|
||||
@cindex @code{umulhrs@var{m3}} instruction pattern
|
||||
@itemx @samp{umulhrs@var{m3}}
|
||||
Signed/unsigned multiply high with round and scale. This is
|
||||
equivalent to the C code:
|
||||
@smallexample
|
||||
narrow op0, op1, op2;
|
||||
@dots{}
|
||||
op0 = (narrow) (((((wide) op1 * (wide) op2) >> (N / 2 - 2)) + 1) >> 1);
|
||||
@end smallexample
|
||||
where the sign of @samp{narrow} determines whether this is a signed
|
||||
or unsigned operation, and @var{N} is the size of @samp{wide} in bits.
|
||||
|
||||
@cindex @code{vec_shl_insert_@var{m}} instruction pattern
|
||||
@item @samp{vec_shl_insert_@var{m}}
|
||||
Shift the elements in vector input operand 1 left one element (i.e.@:
|
||||
|
@ -1442,6 +1442,10 @@ vector alignment.
|
||||
Target supports both signed and unsigned averaging operations on vectors
|
||||
of bytes.
|
||||
|
||||
@item vect_mulhrs_hi
|
||||
Target supports both signed and unsigned multiply-high-with-round-and-scale
|
||||
operations on vectors of half-words.
|
||||
|
||||
@item vect_condition
|
||||
Target supports vector conditional operations.
|
||||
|
||||
|
@ -3210,6 +3210,8 @@ first_commutative_argument (internal_fn fn)
|
||||
case IFN_FNMS:
|
||||
case IFN_AVG_FLOOR:
|
||||
case IFN_AVG_CEIL:
|
||||
case IFN_MULHS:
|
||||
case IFN_MULHRS:
|
||||
case IFN_FMIN:
|
||||
case IFN_FMAX:
|
||||
return 0;
|
||||
|
@ -149,6 +149,11 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
|
||||
savg_ceil, uavg_ceil, binary)
|
||||
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first,
|
||||
smulhs, umulhs, binary)
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
|
||||
smulhrs, umulhrs, binary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary)
|
||||
|
@ -343,6 +343,10 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a")
|
||||
OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
|
||||
OPTAB_D (usad_optab, "usad$I$a")
|
||||
OPTAB_D (ssad_optab, "ssad$I$a")
|
||||
OPTAB_D (smulhs_optab, "smulhs$a3")
|
||||
OPTAB_D (smulhrs_optab, "smulhrs$a3")
|
||||
OPTAB_D (umulhs_optab, "umulhs$a3")
|
||||
OPTAB_D (umulhrs_optab, "umulhrs$a3")
|
||||
OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
|
||||
OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a")
|
||||
OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")
|
||||
|
49
gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c
Normal file
49
gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c
Normal file
@ -0,0 +1,49 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
#ifndef SIGNEDNESS
|
||||
#define SIGNEDNESS signed
|
||||
#endif
|
||||
#ifndef BIAS
|
||||
#define BIAS 0
|
||||
#endif
|
||||
|
||||
#define HRS(x) ((((x) >> (15 - BIAS)) + BIAS) >> BIAS)
|
||||
|
||||
void __attribute__ ((noipa))
|
||||
f (SIGNEDNESS short *restrict a, SIGNEDNESS short *restrict b,
|
||||
SIGNEDNESS short *restrict c, __INTPTR_TYPE__ n)
|
||||
{
|
||||
for (__INTPTR_TYPE__ i = 0; i < n; ++i)
|
||||
a[i] = HRS((SIGNEDNESS int) b[i] * (SIGNEDNESS int) c[i]);
|
||||
}
|
||||
|
||||
#define N 50
|
||||
#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
|
||||
#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
|
||||
#define CONST1 0x01AB
|
||||
#define CONST2 0x01CD
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
SIGNEDNESS short a[N], b[N], c[N];
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
b[i] = BASE1 + i * CONST1;
|
||||
c[i] = BASE2 + i * CONST2;
|
||||
asm volatile ("" ::: "memory");
|
||||
}
|
||||
f (a, b, c, N);
|
||||
for (int i = 0; i < N; ++i)
|
||||
if (a[i] != HRS(BASE1 * BASE2 + i * i * (CONST1 * CONST2)
|
||||
+ i * (BASE1 * CONST2 + BASE2 * CONST1)))
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */
|
9
gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c
Normal file
9
gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#define SIGNEDNESS unsigned
|
||||
|
||||
#include "vect-mulhrs-1.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */
|
9
gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c
Normal file
9
gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#define BIAS 1
|
||||
|
||||
#include "vect-mulhrs-1.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */
|
10
gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c
Normal file
10
gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#define SIGNEDNESS unsigned
|
||||
#define BIAS 1
|
||||
|
||||
#include "vect-mulhrs-1.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */
|
63
gcc/testsuite/gcc.target/aarch64/sve2/mulhrs_1.c
Normal file
63
gcc/testsuite/gcc.target/aarch64/sve2/mulhrs_1.c
Normal file
@ -0,0 +1,63 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define MULTHI(TYPE, BIGGER, RND) \
|
||||
TYPE __attribute__ ((noinline, noclone)) \
|
||||
mulhs_##TYPE##_##RND (TYPE *restrict x, \
|
||||
TYPE *restrict y, TYPE *restrict z, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i++) \
|
||||
{ \
|
||||
z[i] = ((((BIGGER)x[i] * (BIGGER)y[i]) >> \
|
||||
(sizeof(BIGGER)*8/2-2)) + RND) >> 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
MULTHI (int8_t, int16_t, 0)
|
||||
MULTHI (int16_t, int32_t, 0)
|
||||
MULTHI (int32_t, int64_t, 0)
|
||||
|
||||
MULTHI (uint8_t, uint16_t, 0)
|
||||
MULTHI (uint16_t, uint32_t, 0)
|
||||
MULTHI (uint32_t, uint64_t, 0)
|
||||
|
||||
MULTHI (int8_t, int16_t, 1)
|
||||
MULTHI (int16_t, int32_t, 1)
|
||||
MULTHI (int32_t, int64_t, 1)
|
||||
|
||||
MULTHI (uint8_t, uint16_t, 1)
|
||||
MULTHI (uint16_t, uint32_t, 1)
|
||||
MULTHI (uint32_t, uint64_t, 1)
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 12 "vect" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
|
||||
|
@ -6175,6 +6175,15 @@ proc check_effective_target_vect_avg_qi {} {
|
||||
&& ![check_effective_target_aarch64_sve1_only] }]
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports both signed
|
||||
# and unsigned multiply-high-with-round-and-scale operations
|
||||
# on vectors of half-words.
|
||||
|
||||
proc check_effective_target_vect_mulhrs_hi {} {
|
||||
return [expr { [istarget aarch64*-*-*]
|
||||
&& [check_effective_target_aarch64_sve2] }]
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# demotion (packing) of shorts (to chars) and ints (to shorts)
|
||||
# using modulo arithmetic, 0 otherwise.
|
||||
|
@ -1723,6 +1723,175 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
/* Recognize the following patterns:
|
||||
|
||||
ATYPE a; // narrower than TYPE
|
||||
BTYPE b; // narrower than TYPE
|
||||
|
||||
1) Multiply high with scaling
|
||||
TYPE res = ((TYPE) a * (TYPE) b) >> c;
|
||||
2) ... or also with rounding
|
||||
TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
|
||||
|
||||
where only the bottom half of res is used. */
|
||||
|
||||
static gimple *
|
||||
vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
||||
{
|
||||
/* Check for a right shift. */
|
||||
gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
|
||||
if (!last_stmt
|
||||
|| gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
|
||||
return NULL;
|
||||
vec_info *vinfo = last_stmt_info->vinfo;
|
||||
|
||||
/* Check that the shift result is wider than the users of the
|
||||
result need (i.e. that narrowing would be a natural choice). */
|
||||
tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
|
||||
unsigned int target_precision
|
||||
= vect_element_precision (last_stmt_info->min_output_precision);
|
||||
if (!INTEGRAL_TYPE_P (lhs_type)
|
||||
|| target_precision >= TYPE_PRECISION (lhs_type))
|
||||
return NULL;
|
||||
|
||||
/* Look through any change in sign on the outer shift input. */
|
||||
vect_unpromoted_value unprom_rshift_input;
|
||||
tree rshift_input = vect_look_through_possible_promotion
|
||||
(vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
|
||||
if (!rshift_input
|
||||
|| TYPE_PRECISION (TREE_TYPE (rshift_input))
|
||||
!= TYPE_PRECISION (lhs_type))
|
||||
return NULL;
|
||||
|
||||
/* Get the definition of the shift input. */
|
||||
stmt_vec_info rshift_input_stmt_info
|
||||
= vect_get_internal_def (vinfo, rshift_input);
|
||||
if (!rshift_input_stmt_info)
|
||||
return NULL;
|
||||
gassign *rshift_input_stmt
|
||||
= dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
|
||||
if (!rshift_input_stmt)
|
||||
return NULL;
|
||||
|
||||
stmt_vec_info mulh_stmt_info;
|
||||
tree scale_term;
|
||||
internal_fn ifn;
|
||||
unsigned int expect_offset;
|
||||
|
||||
/* Check for the presence of the rounding term. */
|
||||
if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
|
||||
{
|
||||
/* Check that the outer shift was by 1. */
|
||||
if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
|
||||
return NULL;
|
||||
|
||||
/* Check that the second operand of the PLUS_EXPR is 1. */
|
||||
if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
|
||||
return NULL;
|
||||
|
||||
/* Look through any change in sign on the addition input. */
|
||||
vect_unpromoted_value unprom_plus_input;
|
||||
tree plus_input = vect_look_through_possible_promotion
|
||||
(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
|
||||
if (!plus_input
|
||||
|| TYPE_PRECISION (TREE_TYPE (plus_input))
|
||||
!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
|
||||
return NULL;
|
||||
|
||||
/* Get the definition of the multiply-high-scale part. */
|
||||
stmt_vec_info plus_input_stmt_info
|
||||
= vect_get_internal_def (vinfo, plus_input);
|
||||
if (!plus_input_stmt_info)
|
||||
return NULL;
|
||||
gassign *plus_input_stmt
|
||||
= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
|
||||
if (!plus_input_stmt
|
||||
|| gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
|
||||
return NULL;
|
||||
|
||||
/* Look through any change in sign on the scaling input. */
|
||||
vect_unpromoted_value unprom_scale_input;
|
||||
tree scale_input = vect_look_through_possible_promotion
|
||||
(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
|
||||
if (!scale_input
|
||||
|| TYPE_PRECISION (TREE_TYPE (scale_input))
|
||||
!= TYPE_PRECISION (TREE_TYPE (plus_input)))
|
||||
return NULL;
|
||||
|
||||
/* Get the definition of the multiply-high part. */
|
||||
mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
|
||||
if (!mulh_stmt_info)
|
||||
return NULL;
|
||||
|
||||
/* Get the scaling term. */
|
||||
scale_term = gimple_assign_rhs2 (plus_input_stmt);
|
||||
|
||||
expect_offset = target_precision + 2;
|
||||
ifn = IFN_MULHRS;
|
||||
}
|
||||
else
|
||||
{
|
||||
mulh_stmt_info = rshift_input_stmt_info;
|
||||
scale_term = gimple_assign_rhs2 (last_stmt);
|
||||
|
||||
expect_offset = target_precision + 1;
|
||||
ifn = IFN_MULHS;
|
||||
}
|
||||
|
||||
/* Check that the scaling factor is correct. */
|
||||
if (TREE_CODE (scale_term) != INTEGER_CST
|
||||
|| wi::to_widest (scale_term) + expect_offset
|
||||
!= TYPE_PRECISION (lhs_type))
|
||||
return NULL;
|
||||
|
||||
/* Check whether the scaling input term can be seen as two widened
|
||||
inputs multiplied together. */
|
||||
vect_unpromoted_value unprom_mult[2];
|
||||
tree new_type;
|
||||
unsigned int nops
|
||||
= vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
|
||||
false, 2, unprom_mult, &new_type);
|
||||
if (nops != 2)
|
||||
return NULL;
|
||||
|
||||
vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
|
||||
|
||||
/* Adjust output precision. */
|
||||
if (TYPE_PRECISION (new_type) < target_precision)
|
||||
new_type = build_nonstandard_integer_type
|
||||
(target_precision, TYPE_UNSIGNED (new_type));
|
||||
|
||||
/* Check for target support. */
|
||||
tree new_vectype = get_vectype_for_scalar_type (new_type);
|
||||
if (!new_vectype
|
||||
|| !direct_internal_fn_supported_p
|
||||
(ifn, new_vectype, OPTIMIZE_FOR_SPEED))
|
||||
return NULL;
|
||||
|
||||
/* The IR requires a valid vector type for the cast result, even though
|
||||
it's likely to be discarded. */
|
||||
*type_out = get_vectype_for_scalar_type (lhs_type);
|
||||
if (!*type_out)
|
||||
return NULL;
|
||||
|
||||
/* Generate the IFN_MULHRS call. */
|
||||
tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
|
||||
tree new_ops[2];
|
||||
vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
|
||||
unprom_mult, new_vectype);
|
||||
gcall *mulhrs_stmt
|
||||
= gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
|
||||
gimple_call_set_lhs (mulhrs_stmt, new_var);
|
||||
gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"created pattern stmt: %G", mulhrs_stmt);
|
||||
|
||||
return vect_convert_output (last_stmt_info, lhs_type,
|
||||
mulhrs_stmt, new_vectype);
|
||||
}
|
||||
|
||||
/* Recognize the patterns:
|
||||
|
||||
ATYPE a; // narrower than TYPE
|
||||
@ -4713,6 +4882,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
|
||||
/* Must come after over_widening, which narrows the shift as much as
|
||||
possible beforehand. */
|
||||
{ vect_recog_average_pattern, "average" },
|
||||
{ vect_recog_mulhs_pattern, "mult_high" },
|
||||
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
|
||||
{ vect_recog_widen_mult_pattern, "widen_mult" },
|
||||
{ vect_recog_dot_prod_pattern, "dot_prod" },
|
||||
|
Loading…
x
Reference in New Issue
Block a user