aarch64-simd.md (clrsb<mode>2, [...]): New patterns.

* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
        patterns.
        * config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
        builtins.
        * config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
        vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
        vcntq_u8): Rewrite using builtin functions.

Co-Authored-By: Shanyao Chen <chenshanyao@huawei.com>

From-SVN: r218464
This commit is contained in:
Felix Yang 2014-12-07 15:01:23 +00:00 committed by Fei Yang
parent 07bdf21b53
commit a5e69cad62
4 changed files with 105 additions and 132 deletions

View File

@ -1,3 +1,14 @@
2014-12-07 Felix Yang <felix.yang@huawei.com>
Shanyao Chen <chenshanyao@huawei.com>
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
patterns.
* config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
builtins.
* config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
vcntq_u8): Rewrite using builtin functions.
2014-12-07 Jan Hubicka <hubicka@ucw.cz>
* symtab.c (symtab_node::equal_address_to): New function.

View File

@ -44,8 +44,10 @@
BUILTIN_VDQF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
BUILTIN_VDQ_BHSI (UNOP, clz, 2)
BUILTIN_VS (UNOP, ctz, 2)
BUILTIN_VB (UNOP, popcount, 2)
/* be_checked_get_lane does its own lane swapping, so not a lane index. */
BUILTIN_VALL (GETREG, be_checked_get_lane, 0)

View File

@ -1903,6 +1903,14 @@
DONE;
})
(define_insn "clrsb<mode>2"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
"TARGET_SIMD"
"cls\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_cls<q>")]
)
(define_insn "clz<mode>2"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
@ -1911,6 +1919,14 @@
[(set_attr "type" "neon_cls<q>")]
)
(define_insn "popcount<mode>2"
[(set (match_operand:VB 0 "register_operand" "=w")
(popcount:VB (match_operand:VB 1 "register_operand" "w")))]
"TARGET_SIMD"
"cnt\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_cnt<q>")]
)
;; 'across lanes' max and min ops.
;; Template for outputting a scalar, so we can create __builtins which can be

View File

@ -5317,138 +5317,6 @@ vaddlvq_u32 (uint32x4_t a)
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcls_s8 (int8x8_t a)
{
int8x8_t result;
__asm__ ("cls %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcls_s16 (int16x4_t a)
{
int16x4_t result;
__asm__ ("cls %0.4h,%1.4h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcls_s32 (int32x2_t a)
{
int32x2_t result;
__asm__ ("cls %0.2s,%1.2s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclsq_s8 (int8x16_t a)
{
int8x16_t result;
__asm__ ("cls %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclsq_s16 (int16x8_t a)
{
int16x8_t result;
__asm__ ("cls %0.8h,%1.8h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclsq_s32 (int32x4_t a)
{
int32x4_t result;
__asm__ ("cls %0.4s,%1.4s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcnt_p8 (poly8x8_t a)
{
poly8x8_t result;
__asm__ ("cnt %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcnt_s8 (int8x8_t a)
{
int8x8_t result;
__asm__ ("cnt %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcnt_u8 (uint8x8_t a)
{
uint8x8_t result;
__asm__ ("cnt %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcntq_p8 (poly8x16_t a)
{
poly8x16_t result;
__asm__ ("cnt %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcntq_s8 (int8x16_t a)
{
int8x16_t result;
__asm__ ("cnt %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcntq_u8 (uint8x16_t a)
{
uint8x16_t result;
__asm__ ("cnt %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
#define vcopyq_lane_f32(a, b, c, d) \
__extension__ \
({ \
@ -14082,6 +13950,44 @@ vcltzd_f64 (float64_t __a)
return __a < 0.0 ? -1ll : 0ll;
}
/* vcls. */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcls_s8 (int8x8_t __a)
{
return __builtin_aarch64_clrsbv8qi (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcls_s16 (int16x4_t __a)
{
return __builtin_aarch64_clrsbv4hi (__a);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcls_s32 (int32x2_t __a)
{
return __builtin_aarch64_clrsbv2si (__a);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclsq_s8 (int8x16_t __a)
{
return __builtin_aarch64_clrsbv16qi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclsq_s16 (int16x8_t __a)
{
return __builtin_aarch64_clrsbv8hi (__a);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclsq_s32 (int32x4_t __a)
{
return __builtin_aarch64_clrsbv4si (__a);
}
/* vclz. */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@ -14156,6 +14062,44 @@ vclzq_u32 (uint32x4_t __a)
return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
}
/* vcnt. */
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcnt_p8 (poly8x8_t __a)
{
return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcnt_s8 (int8x8_t __a)
{
return __builtin_aarch64_popcountv8qi (__a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcnt_u8 (uint8x8_t __a)
{
return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcntq_p8 (poly8x16_t __a)
{
return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcntq_s8 (int8x16_t __a)
{
return __builtin_aarch64_popcountv16qi (__a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcntq_u8 (uint8x16_t __a)
{
return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
}
/* vcvt (double -> float). */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))