mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-10 13:31:04 +08:00
IBM Z: vec_store_len_r/vec_load_len_r fix
This fixes a problem with the vec_store_len_r intrinsic. The macros mapping the intrinsic to a GCC builtin had the wrong signature. With the patch an immediate length operand of vlrl/vstrl is handled the same way as if it was passed in a register to vlrlr/vstrlr. Values bigger than 15 always load the full vector. If it can be recognized that it is in effect a full vector register load or store it is now implemented with vl/vst instead. gcc/ChangeLog: 2020-04-30 Andreas Krebbel <krebbel@linux.ibm.com> * config/s390/constraints.md ("j>f", "jb4"): New constraints. * config/s390/vecintrin.h (vec_load_len_r, vec_store_len_r): Fix macro definitions. * config/s390/vx-builtins.md ("vlrlrv16qi", "vstrlrv16qi"): Add a separate expander. ("*vlrlrv16qi", "*vstrlrv16qi"): Add alternative for vl/vst. Change constraint for vlrl/vstrl to jb4. gcc/testsuite/ChangeLog: 2020-04-30 Andreas Krebbel <krebbel@linux.ibm.com> * gcc.target/s390/zvector/vec_load_len_r.c: New test. * gcc.target/s390/zvector/vec_store_len_r.c: New test.
This commit is contained in:
parent
2786c0221b
commit
cd5fa73359
@ -1,3 +1,13 @@
|
||||
2020-04-30 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* config/s390/constraints.md ("j>f", "jb4"): New constraints.
|
||||
* config/s390/vecintrin.h (vec_load_len_r, vec_store_len_r): Fix
|
||||
macro definitions.
|
||||
* config/s390/vx-builtins.md ("vlrlrv16qi", "vstrlrv16qi"): Add a
|
||||
separate expander.
|
||||
("*vlrlrv16qi", "*vstrlrv16qi"): Add alternative for vl/vst.
|
||||
Change constraint for vlrl/vstrl to jb4.
|
||||
|
||||
2020-04-30 Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
|
||||
|
||||
* var-tracking.c (vt_initialize): Move variables pre and post
|
||||
|
@ -38,6 +38,8 @@
|
||||
;; matching K constraint
|
||||
;; jm6: An integer operand with the lowest order 6 bits all ones.
|
||||
;; jdd: A constant operand that fits into the data section.
|
||||
;; j>f: An integer operand whose lower 32 bits are greater than or equal to 15
|
||||
;; jb4: An unsigned constant 4 bit operand.
|
||||
;; t -- Access registers 36 and 37.
|
||||
;; v -- Vector registers v0-v31.
|
||||
;; C -- A signed 8-bit constant (-128..127)
|
||||
@ -425,7 +427,7 @@
|
||||
|
||||
|
||||
;;
|
||||
;; Vector constraints follow.
|
||||
;; Vector and scalar constraints for constant values follow.
|
||||
;;
|
||||
|
||||
(define_constraint "j00"
|
||||
@ -462,6 +464,16 @@
|
||||
"@internal An integer operand with the lowest order 6 bits all ones."
|
||||
(match_operand 0 "const_int_6bitset_operand"))
|
||||
|
||||
(define_constraint "j>f"
|
||||
"@internal An integer operand whose lower 32 bits are greater than or equal to 15."
|
||||
(and (match_code "const_int")
|
||||
(match_test "(unsigned int)(ival & 0xffffffff) >= 15")))
|
||||
|
||||
(define_constraint "jb4"
|
||||
"@internal Constant unsigned integer 4 bit value"
|
||||
(and (match_code "const_int")
|
||||
(match_test "ival >= 0 && ival <= 15")))
|
||||
|
||||
;;
|
||||
;; Memory constraints follow.
|
||||
;;
|
||||
|
@ -111,8 +111,10 @@ __lcbb(const void *ptr, int bndry)
|
||||
#define vec_round(X) __builtin_s390_vfi((X), 4, 4)
|
||||
#define vec_doublee(X) __builtin_s390_vfll((X))
|
||||
#define vec_floate(X) __builtin_s390_vflr((X), 0, 0)
|
||||
#define vec_load_len_r(X,Y) __builtin_s390_vlrl((Y),(X))
|
||||
#define vec_store_len_r(X,Y) __builtin_s390_vstrl((Y),(X))
|
||||
#define vec_load_len_r(X,L) \
|
||||
(__vector unsigned char)__builtin_s390_vlrlr((L),(X))
|
||||
#define vec_store_len_r(X,Y,L) \
|
||||
__builtin_s390_vstrlr((__vector signed char)(X),(L),(Y))
|
||||
|
||||
#define vec_all_nan(a) \
|
||||
__extension__ ({ \
|
||||
|
@ -202,16 +202,34 @@
|
||||
"vlbb\t%v0,%1,%2"
|
||||
[(set_attr "op_type" "VRX")])
|
||||
|
||||
(define_insn "vlrlrv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v,v")
|
||||
(unspec:V16QI [(match_operand:BLK 2 "memory_operand" "Q,Q")
|
||||
(match_operand:SI 1 "nonmemory_operand" "d,C")]
|
||||
; Vector load rightmost with length
|
||||
|
||||
(define_expand "vlrlrv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
(unspec:V16QI [(match_operand:BLK 2 "memory_operand" "")
|
||||
(match_operand:SI 1 "nonmemory_operand" "")]
|
||||
UNSPEC_VEC_LOAD_LEN_R))]
|
||||
"TARGET_VXE"
|
||||
{
|
||||
/* vlrlr sets all length values beyond 15 to 15. Emulate the same
|
||||
behavior for immediate length operands. vlrl would trigger a
|
||||
SIGILL for too large immediate operands. */
|
||||
if (CONST_INT_P (operands[1])
|
||||
&& (UINTVAL (operands[1]) & 0xffffffff) > 15)
|
||||
operands[1] = GEN_INT (15);
|
||||
})
|
||||
|
||||
(define_insn "*vlrlrv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v, v, v")
|
||||
(unspec:V16QI [(match_operand:BLK 2 "memory_operand" "Q, R, Q")
|
||||
(match_operand:SI 1 "nonmemory_operand" "d,j>f,jb4")]
|
||||
UNSPEC_VEC_LOAD_LEN_R))]
|
||||
"TARGET_VXE"
|
||||
"@
|
||||
vlrlr\t%v0,%1,%2
|
||||
vl\t%v0,%2%A2
|
||||
vlrl\t%v0,%2,%1"
|
||||
[(set_attr "op_type" "VRS,VSI")])
|
||||
[(set_attr "op_type" "VRS,VRX,VSI")])
|
||||
|
||||
|
||||
; FIXME: The following two patterns might using vec_merge. But what is
|
||||
@ -545,16 +563,32 @@
|
||||
|
||||
; Vector store rightmost with length
|
||||
|
||||
(define_insn "vstrlrv16qi"
|
||||
[(set (match_operand:BLK 2 "memory_operand" "=Q,Q")
|
||||
(unspec:BLK [(match_operand:V16QI 0 "register_operand" "v,v")
|
||||
(match_operand:SI 1 "nonmemory_operand" "d,C")]
|
||||
(define_expand "vstrlrv16qi"
|
||||
[(set (match_operand:BLK 2 "memory_operand" "")
|
||||
(unspec:BLK [(match_operand:V16QI 0 "register_operand" "")
|
||||
(match_operand:SI 1 "nonmemory_operand" "")]
|
||||
UNSPEC_VEC_STORE_LEN_R))]
|
||||
"TARGET_VXE"
|
||||
{
|
||||
/* vstrlr sets all length values beyond 15 to 15. Emulate the same
|
||||
behavior for immediate length operands. vstrl would trigger a
|
||||
SIGILL for too large immediate operands. */
|
||||
if (CONST_INT_P (operands[1])
|
||||
&& (UINTVAL (operands[1]) & 0xffffffff) > 15)
|
||||
operands[1] = GEN_INT (15);
|
||||
})
|
||||
|
||||
(define_insn "*vstrlrv16qi"
|
||||
[(set (match_operand:BLK 2 "memory_operand" "=Q, R, Q")
|
||||
(unspec:BLK [(match_operand:V16QI 0 "register_operand" "v, v, v")
|
||||
(match_operand:SI 1 "nonmemory_operand" "d,j>f,jb4")]
|
||||
UNSPEC_VEC_STORE_LEN_R))]
|
||||
"TARGET_VXE"
|
||||
"@
|
||||
vstrlr\t%v0,%2,%1
|
||||
vstrl\t%v0,%1,%2"
|
||||
[(set_attr "op_type" "VRS,VSI")])
|
||||
vstrlr\t%v0,%1,%2
|
||||
vst\t%v0,%2%A2
|
||||
vstrl\t%v0,%2,%1"
|
||||
[(set_attr "op_type" "VRS,VRX,VSI")])
|
||||
|
||||
|
||||
|
||||
|
@ -1,3 +1,8 @@
|
||||
2020-04-30 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* gcc.target/s390/zvector/vec_load_len_r.c: New test.
|
||||
* gcc.target/s390/zvector/vec_store_len_r.c: New test.
|
||||
|
||||
2020-04-29 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/94704
|
||||
|
94
gcc/testsuite/gcc.target/s390/zvector/vec_load_len_r.c
Normal file
94
gcc/testsuite/gcc.target/s390/zvector/vec_load_len_r.c
Normal file
@ -0,0 +1,94 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target s390_vxe2 } */
|
||||
/* { dg-options "-O3 -mzarch -march=arch13 -mzvector --save-temps" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <vecintrin.h>
|
||||
|
||||
typedef vector unsigned char uv16qi;
|
||||
|
||||
const unsigned char test_vec[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 };
|
||||
|
||||
#define NUM_TEST_LENGTHS 3
|
||||
|
||||
unsigned int test_len[NUM_TEST_LENGTHS] = { 0, 12, 18 };
|
||||
|
||||
|
||||
/* Proceeding from left to right, the specified number (LEN+1) of
|
||||
bytes from SOURCE are stored right-justified in TARGET. */
|
||||
void __attribute__((noinline, noclone, target ("arch=zEC12")))
|
||||
emul (const unsigned char *source, unsigned char *target, unsigned int len)
|
||||
{
|
||||
int start = 15 - len;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
for (int s = 0, t = start; t < 16; s++, t++)
|
||||
target[t] = source[s];
|
||||
}
|
||||
|
||||
uv16qi __attribute__((noinline, noclone))
|
||||
vec_load_len_r_reg (const unsigned char *s, unsigned int len)
|
||||
{
|
||||
return vec_load_len_r (s, len);
|
||||
}
|
||||
|
||||
void __attribute__((noinline, noclone))
|
||||
vec_load_len_r_mem (const unsigned char *s, uv16qi *t, unsigned int *len)
|
||||
{
|
||||
*t = vec_load_len_r (s, *len);
|
||||
}
|
||||
|
||||
#define GEN_CONST_FUNC(CONST) \
|
||||
static uv16qi inline \
|
||||
vec_load_len_r_const##CONST (const unsigned char *s) \
|
||||
{ \
|
||||
return vec_load_len_r (s, CONST); \
|
||||
}
|
||||
|
||||
#define GEN_CONST_TEST(CONST) \
|
||||
memset (exp_result, 0, 16); \
|
||||
emul (test_vec, exp_result, CONST); \
|
||||
result = (uv16qi) { 0 }; \
|
||||
result = vec_load_len_r_const##CONST (test_vec); \
|
||||
if (memcmp ((char*)&result, exp_result, 16) != 0) \
|
||||
__builtin_abort ();
|
||||
|
||||
GEN_CONST_FUNC(0)
|
||||
GEN_CONST_FUNC(12)
|
||||
GEN_CONST_FUNC(18)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned char exp_result[16];
|
||||
uv16qi result;
|
||||
|
||||
for (int i = 0; i < NUM_TEST_LENGTHS; i++)
|
||||
{
|
||||
memset (exp_result, 0, 16);
|
||||
|
||||
emul (test_vec, exp_result, test_len[i]);
|
||||
|
||||
result = (uv16qi) { 0 };
|
||||
result = vec_load_len_r_reg (test_vec, test_len[i]);
|
||||
if (memcmp ((char*)&result, exp_result, 16) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
result = (uv16qi) { 0 };
|
||||
vec_load_len_r_mem (test_vec, &result, &test_len[i]);
|
||||
if (memcmp ((char*)&result, exp_result, 16) != 0)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
GEN_CONST_TEST(0)
|
||||
GEN_CONST_TEST(12)
|
||||
GEN_CONST_TEST(18)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vec_load_len_r_reg and vec_load_len_r_mem */
|
||||
/* { dg-final { scan-assembler-times "vlrlr\t" 2 } } */
|
||||
|
||||
/* For the 2 constants. The 3. should be implemented with vl. */
|
||||
/* { dg-final { scan-assembler-times "vlrl\t" 2 } } */
|
94
gcc/testsuite/gcc.target/s390/zvector/vec_store_len_r.c
Normal file
94
gcc/testsuite/gcc.target/s390/zvector/vec_store_len_r.c
Normal file
@ -0,0 +1,94 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target s390_vxe2 } */
|
||||
/* { dg-options "-O3 -mzarch -march=arch13 -mzvector --save-temps" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <vecintrin.h>
|
||||
|
||||
typedef vector unsigned char uv16qi;
|
||||
|
||||
uv16qi test_vec = (uv16qi){ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 };
|
||||
|
||||
#define NUM_TEST_LENGTHS 3
|
||||
|
||||
unsigned int test_len[NUM_TEST_LENGTHS] = { 0, 12, 18 };
|
||||
|
||||
|
||||
/* Proceeding from left to right, the specified number (LEN+1) of
|
||||
rightmost bytes from SOURCE are stored in TARGET. */
|
||||
void __attribute__((noinline, noclone, target ("arch=zEC12")))
|
||||
emul (unsigned char *source, unsigned char *target, unsigned int len)
|
||||
{
|
||||
int start = 15 - len;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
for (int s = start, t = 0; s < 16; s++, t++)
|
||||
target[t] = source[s];
|
||||
}
|
||||
|
||||
void __attribute__((noinline, noclone))
|
||||
vec_store_len_r_reg (uv16qi s, unsigned char *t, unsigned int len)
|
||||
{
|
||||
vec_store_len_r (s, t, len);
|
||||
}
|
||||
|
||||
void __attribute__((noinline, noclone))
|
||||
vec_store_len_r_mem (uv16qi *s, unsigned char *t, unsigned int *len)
|
||||
{
|
||||
vec_store_len_r (*s, t, *len);
|
||||
}
|
||||
|
||||
#define GEN_CONST_FUNC(CONST) \
|
||||
static void inline \
|
||||
vec_store_len_r_const##CONST (uv16qi s, unsigned char *t) \
|
||||
{ \
|
||||
vec_store_len_r (s, t, CONST); \
|
||||
}
|
||||
|
||||
#define GEN_CONST_TEST(CONST) \
|
||||
memset (exp_result, 0, 16); \
|
||||
emul ((unsigned char*)&test_vec, exp_result, CONST); \
|
||||
memset (result, 0, 16); \
|
||||
vec_store_len_r_const##CONST (test_vec, result); \
|
||||
if (memcmp (result, exp_result, 16) != 0) \
|
||||
__builtin_abort ();
|
||||
|
||||
GEN_CONST_FUNC(0)
|
||||
GEN_CONST_FUNC(12)
|
||||
GEN_CONST_FUNC(18)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned char exp_result[16];
|
||||
unsigned char result[16];
|
||||
|
||||
for (int i = 0; i < NUM_TEST_LENGTHS; i++)
|
||||
{
|
||||
memset (exp_result, 0, 16);
|
||||
|
||||
emul ((unsigned char*)&test_vec, exp_result, test_len[i]);
|
||||
|
||||
memset (result, 0, 16);
|
||||
vec_store_len_r_reg (test_vec, result, test_len[i]);
|
||||
if (memcmp (result, exp_result, 16) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
memset (result, 0, 16);
|
||||
vec_store_len_r_mem (&test_vec, result, &test_len[i]);
|
||||
if (memcmp (result, exp_result, 16) != 0)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
GEN_CONST_TEST(0)
|
||||
GEN_CONST_TEST(12)
|
||||
GEN_CONST_TEST(18)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vec_store_len_r_reg and vec_store_len_r_mem */
|
||||
/* { dg-final { scan-assembler-times "vstrlr\t" 2 } } */
|
||||
|
||||
/* For the 2 constants. The 3. should be implemented with vst. */
|
||||
/* { dg-final { scan-assembler-times "vstrl\t" 2 } } */
|
Loading…
x
Reference in New Issue
Block a user