mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 17:21:07 +08:00
aarch64-simd.md (clrsb<mode>2, [...]): New patterns.
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New patterns. * config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New builtins. * config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8, vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8, vcntq_u8): Rewrite using builtin functions. Co-Authored-By: Shanyao Chen <chenshanyao@huawei.com> From-SVN: r218464
This commit is contained in:
parent
07bdf21b53
commit
a5e69cad62
@ -1,3 +1,14 @@
|
||||
2014-12-07 Felix Yang <felix.yang@huawei.com>
|
||||
Shanyao Chen <chenshanyao@huawei.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
|
||||
patterns.
|
||||
* config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
|
||||
builtins.
|
||||
* config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
|
||||
vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
|
||||
vcntq_u8): Rewrite using builtin functions.
|
||||
|
||||
2014-12-07 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* symtab.c (symtab_node::equal_address_to): New function.
|
||||
|
@ -44,8 +44,10 @@
|
||||
BUILTIN_VDQF (UNOP, sqrt, 2)
|
||||
BUILTIN_VD_BHSI (BINOP, addp, 0)
|
||||
VAR1 (UNOP, addp, 0, di)
|
||||
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
|
||||
BUILTIN_VDQ_BHSI (UNOP, clz, 2)
|
||||
BUILTIN_VS (UNOP, ctz, 2)
|
||||
BUILTIN_VB (UNOP, popcount, 2)
|
||||
|
||||
/* be_checked_get_lane does its own lane swapping, so not a lane index. */
|
||||
BUILTIN_VALL (GETREG, be_checked_get_lane, 0)
|
||||
|
@ -1903,6 +1903,14 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "clrsb<mode>2"
|
||||
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
||||
(clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
|
||||
"TARGET_SIMD"
|
||||
"cls\\t%0.<Vtype>, %1.<Vtype>"
|
||||
[(set_attr "type" "neon_cls<q>")]
|
||||
)
|
||||
|
||||
(define_insn "clz<mode>2"
|
||||
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
||||
(clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
|
||||
@ -1911,6 +1919,14 @@
|
||||
[(set_attr "type" "neon_cls<q>")]
|
||||
)
|
||||
|
||||
(define_insn "popcount<mode>2"
|
||||
[(set (match_operand:VB 0 "register_operand" "=w")
|
||||
(popcount:VB (match_operand:VB 1 "register_operand" "w")))]
|
||||
"TARGET_SIMD"
|
||||
"cnt\\t%0.<Vbtype>, %1.<Vbtype>"
|
||||
[(set_attr "type" "neon_cnt<q>")]
|
||||
)
|
||||
|
||||
;; 'across lanes' max and min ops.
|
||||
|
||||
;; Template for outputting a scalar, so we can create __builtins which can be
|
||||
|
@ -5317,138 +5317,6 @@ vaddlvq_u32 (uint32x4_t a)
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vcls_s8 (int8x8_t a)
|
||||
{
|
||||
int8x8_t result;
|
||||
__asm__ ("cls %0.8b,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vcls_s16 (int16x4_t a)
|
||||
{
|
||||
int16x4_t result;
|
||||
__asm__ ("cls %0.4h,%1.4h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vcls_s32 (int32x2_t a)
|
||||
{
|
||||
int32x2_t result;
|
||||
__asm__ ("cls %0.2s,%1.2s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vclsq_s8 (int8x16_t a)
|
||||
{
|
||||
int8x16_t result;
|
||||
__asm__ ("cls %0.16b,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||
vclsq_s16 (int16x8_t a)
|
||||
{
|
||||
int16x8_t result;
|
||||
__asm__ ("cls %0.8h,%1.8h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
|
||||
vclsq_s32 (int32x4_t a)
|
||||
{
|
||||
int32x4_t result;
|
||||
__asm__ ("cls %0.4s,%1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_p8 (poly8x8_t a)
|
||||
{
|
||||
poly8x8_t result;
|
||||
__asm__ ("cnt %0.8b,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_s8 (int8x8_t a)
|
||||
{
|
||||
int8x8_t result;
|
||||
__asm__ ("cnt %0.8b,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_u8 (uint8x8_t a)
|
||||
{
|
||||
uint8x8_t result;
|
||||
__asm__ ("cnt %0.8b,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_p8 (poly8x16_t a)
|
||||
{
|
||||
poly8x16_t result;
|
||||
__asm__ ("cnt %0.16b,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_s8 (int8x16_t a)
|
||||
{
|
||||
int8x16_t result;
|
||||
__asm__ ("cnt %0.16b,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_u8 (uint8x16_t a)
|
||||
{
|
||||
uint8x16_t result;
|
||||
__asm__ ("cnt %0.16b,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
#define vcopyq_lane_f32(a, b, c, d) \
|
||||
__extension__ \
|
||||
({ \
|
||||
@ -14082,6 +13950,44 @@ vcltzd_f64 (float64_t __a)
|
||||
return __a < 0.0 ? -1ll : 0ll;
|
||||
}
|
||||
|
||||
/* vcls. */
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vcls_s8 (int8x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv8qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vcls_s16 (int16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv4hi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vcls_s32 (int32x2_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv2si (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vclsq_s8 (int8x16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv16qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||
vclsq_s16 (int16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv8hi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
|
||||
vclsq_s32 (int32x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_clrsbv4si (__a);
|
||||
}
|
||||
|
||||
/* vclz. */
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
@ -14156,6 +14062,44 @@ vclzq_u32 (uint32x4_t __a)
|
||||
return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
|
||||
}
|
||||
|
||||
/* vcnt. */
|
||||
|
||||
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_p8 (poly8x8_t __a)
|
||||
{
|
||||
return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_s8 (int8x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_popcountv8qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
|
||||
vcnt_u8 (uint8x8_t __a)
|
||||
{
|
||||
return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_p8 (poly8x16_t __a)
|
||||
{
|
||||
return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_s8 (int8x16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_popcountv16qi (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
|
||||
vcntq_u8 (uint8x16_t __a)
|
||||
{
|
||||
return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
|
||||
}
|
||||
|
||||
/* vcvt (double -> float). */
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
|
Loading…
x
Reference in New Issue
Block a user