diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8ee4ed4bdb0..9e2b2c15dcf4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2019-08-14 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_ptrue_all): Declare. + * config/aarch64/aarch64.c (aarch64_ptrue_all): New function. + * config/aarch64/aarch64.md (UNSPEC_PTEST_PTRUE): Delete. + (UNSPEC_PTEST): New unspec. + (SVE_MAYBE_NOT_PTRUE, SVE_KNOWN_PTRUE): New constants. + * config/aarch64/iterators.md (data_bytes): New mode attribute. + * config/aarch64/predicates.md (aarch64_sve_ptrue_flag): New predicate. + * config/aarch64/aarch64-sve.md: Add a new section describing the + handling of UNSPEC_PTEST. + (pred_3): Rename to... + (@aarch64_pred__z): ...this. + (ptest_ptrue): Replace with... + (aarch64_ptest): ...this new pattern. + (cbranch4): Update after above changes. + (*3_cc): Use UNSPEC_PTEST instead of + UNSPEC_PTEST_PTRUE. + (*cmp_cc): Likewise. + (*cmp_ptest): Likewise. + (*while_ult_cc): Likewise. + 2019-08-14 Xiong Hu Luo PR lto/91287 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 86d53c5ce1ea..22a5f9433db8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -550,6 +550,7 @@ const char * aarch64_output_probe_stack_range (rtx, rtx); const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); void aarch64_err_no_fpadvsimd (machine_mode); void aarch64_expand_epilogue (bool); +rtx aarch64_ptrue_all (unsigned int); void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 53d93a367dbd..f94ad5cdbf47 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -23,6 +23,7 @@ ;; ;; == General notes ;; ---- Note on the handling of big-endian SVE +;; ---- Description of UNSPEC_PTEST ;; ;; == Moves ;; ---- Moves of single vectors @@ -166,7 +167,67 @@ ;; the order of the bytes within the elements is different. We instead ;; access spill slots via LD1 and ST1, using secondary reloads to ;; reserve a predicate register. - +;; +;; ------------------------------------------------------------------------- +;; ---- Description of UNSPEC_PTEST +;; ------------------------------------------------------------------------- +;; +;; SVE provides a PTEST instruction for testing the active lanes of a +;; predicate and setting the flags based on the result. The associated +;; condition code tests are: +;; +;; - any (= ne): at least one active bit is set +;; - none (= eq): all active bits are clear (*) +;; - first (= mi): the first active bit is set +;; - nfrst (= pl): the first active bit is clear (*) +;; - last (= cc): the last active bit is set +;; - nlast (= cs): the last active bit is clear (*) +;; +;; where the conditions marked (*) are also true when there are no active +;; lanes (i.e. when the governing predicate is a PFALSE). The flags results +;; of a PTEST use the condition code mode CC_NZC. +;; +;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). +;; This means that for other predicate modes, we need a governing predicate +;; in which all bits are defined. +;; +;; For example, most predicated .H operations ignore the odd bits of the +;; governing predicate, so that an active lane is represented by the +;; bits "1x" and an inactive lane by the bits "0x", where "x" can be +;; any value. To test a .H predicate, we instead need "10" and "00" +;; respectively, so that the condition only tests the even bits of the +;; predicate. +;; +;; Several instructions set the flags as a side-effect, in the same way +;; that a separate PTEST would. It's important for code quality that we +;; use these flags results as often as possible, particularly in the case +;; of WHILE* and RDFFR. +;; +;; Also, some of the instructions that set the flags are unpredicated +;; and instead implicitly test all .B, .H, .S or .D elements, as though +;; they were predicated on a PTRUE of that size. For example, a .S +;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE +;; would. +;; +;; We therefore need to represent PTEST operations in a way that +;; makes it easy to combine them with both predicated and unpredicated +;; operations, while using a VNx16BI governing predicate for all +;; predicate modes. We do this using: +;; +;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) +;; +;; where: +;; +;; - GP is the real VNx16BI governing predicate +;; +;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting +;; GP to CAST_GP are guaranteed to be clear in GP. +;; +;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value +;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and +;; SVE_MAYBE_NOT_PTRUE otherwise. +;; +;; - OP is the predicate we want to test, of the same mode as CAST_GP. ;; ========================================================================= ;; == Moves @@ -2343,7 +2404,7 @@ ) ;; Predicated predicate AND, EOR and ORR. -(define_insn "pred_3" +(define_insn "@aarch64_pred__z" [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (LOGICAL:PRED_ALL @@ -2355,23 +2416,23 @@ ) ;; Perform a logical operation on operands 2 and 3, using operand 1 as -;; the GP (which is known to be a PTRUE). Store the result in operand 0 -;; and set the flags in the same way as for PTEST. The (and ...) in the -;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested -;; value is structurally equivalent to rhs of the second set. +;; the GP. Store the result in operand 0 and set the flags in the same +;; way as for PTEST. (define_insn "*3_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand:PRED_ALL 1 "register_operand" "Upa") + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (and:PRED_ALL (LOGICAL:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa") (match_operand:PRED_ALL 3 "register_operand" "Upa")) - (match_dup 1))] - UNSPEC_PTEST_PTRUE)) + (match_dup 4))] + UNSPEC_PTEST)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) - (match_dup 1)))] + (match_dup 4)))] "TARGET_SVE" "s\t%0.b, %1/z, %2.b, %3.b" ) @@ -2836,17 +2897,19 @@ (define_insn "*cmp_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand: 1 "register_operand" "Upl, Upl") + [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (set (match_operand: 0 "register_operand" "=Upa, Upa") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_dup 2) (match_dup 3))] @@ -2862,14 +2925,16 @@ (define_insn "*cmp_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand: 1 "register_operand" "Upl, Upl") + [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: - [(match_dup 1) + [(match_dup 4) (SVE_INT_CMP: (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (clobber (match_scratch: 0 "=Upa, Upa"))] "TARGET_SVE" "@ @@ -2940,28 +3005,31 @@ ) ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. -;; Handle the case in which both results are useful. The GP operand -;; to the PTEST isn't needed, so we allow it to be anything. +;; Handle the case in which both results are useful. The GP operands +;; to the PTEST aren't needed, so we allow them to be anything. (define_insn_and_rewrite "*while_ult_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC - [(match_operand:PRED_ALL 1) + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) (unspec:PRED_ALL - [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") - (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] + [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] UNSPEC_WHILE_LO)] - UNSPEC_PTEST_PTRUE)) + UNSPEC_PTEST)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL [(match_dup 2) - (match_dup 3)] + (unspec:PRED_ALL [(match_dup 1) + (match_dup 2)] UNSPEC_WHILE_LO))] "TARGET_SVE" - "whilelo\t%0., %2, %3" + "whilelo\t%0., %1, %2" ;; Force the compiler to drop the unused predicate operand, so that we ;; don't have an unnecessary PTRUE. - "&& !CONSTANT_P (operands[1])" + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" { - operands[1] = CONSTM1_RTX (mode); + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (mode); } ) @@ -3133,36 +3201,34 @@ (pc)))] "" { - rtx ptrue = aarch64_ptrue_reg (mode); + rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all ()); + rtx cast_ptrue = gen_lowpart (mode, ptrue); + rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); rtx pred; if (operands[2] == CONST0_RTX (mode)) pred = operands[1]; else { pred = gen_reg_rtx (mode); - emit_insn (gen_pred_xor3 (pred, ptrue, operands[1], - operands[2])); + emit_insn (gen_aarch64_pred_xor_z (pred, cast_ptrue, operands[1], + operands[2])); } - emit_insn (gen_ptest_ptrue (ptrue, pred)); + emit_insn (gen_aarch64_ptest (ptrue, cast_ptrue, ptrue_flag, pred)); operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); operands[2] = const0_rtx; } ) -;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. -;; -;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP -;; is a PTRUE even if the optimizers haven't yet been able to propagate -;; the constant. We would use a separate unspec code for PTESTs involving -;; GPs that might not be PTRUEs. -(define_insn "ptest_ptrue" +;; See "Description of UNSPEC_PTEST" above for details. +(define_insn "aarch64_ptest" [(set (reg:CC_NZC CC_REGNUM) - (unspec:CC_NZC - [(match_operand:PRED_ALL 0 "register_operand" "Upa") - (match_operand:PRED_ALL 1 "register_operand" "Upa")] - UNSPEC_PTEST_PTRUE))] + (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") + (match_operand 1) + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_ALL 3 "register_operand" "Upa")] + UNSPEC_PTEST))] "TARGET_SVE" - "ptest\t%0, %1.b" + "ptest\t%0, %3.b" ) ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 46c058c98579..b2d49da843b9 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2699,6 +2699,22 @@ aarch64_svpattern_for_vl (machine_mode pred_mode, int vl) return AARCH64_NUM_SVPATTERNS; } +/* Return a VNx16BImode constant in which every sequence of ELT_SIZE + bits has the lowest bit set and the upper bits clear. This is the + VNx16BImode equivalent of a PTRUE for controlling elements of + ELT_SIZE bytes. However, because the constant is VNx16BImode, + all bits are significant, even the upper zeros. */ + +rtx +aarch64_ptrue_all (unsigned int elt_size) +{ + rtx_vector_builder builder (VNx16BImode, elt_size, 1); + builder.quick_push (const1_rtx); + for (unsigned int i = 1; i < elt_size; ++i) + builder.quick_push (const0_rtx); + return builder.build (); +} + /* Return an all-true predicate register of mode MODE. */ rtx diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a85bdd13faee..3f802c79ee8c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -220,7 +220,7 @@ UNSPEC_LD1_GATHER UNSPEC_ST1_SCATTER UNSPEC_MERGE_PTRUE - UNSPEC_PTEST_PTRUE + UNSPEC_PTEST UNSPEC_UNPACKSHI UNSPEC_UNPACKUHI UNSPEC_UNPACKSLO @@ -259,6 +259,15 @@ ] ) +;; These constants are used as a const_int in various SVE unspecs +;; to indicate whether the governing predicate is known to be a PTRUE. +(define_constants + [; Indicates that the predicate might not be a PTRUE. + (SVE_MAYBE_NOT_PTRUE 0) + + ; Indicates that the predicate is known to be a PTRUE. + (SVE_KNOWN_PTRUE 1)]) + ;; If further include files are added the defintion of MD_INCLUDES ;; must be updated. diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f59052baf21f..f6ca6380b052 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1169,6 +1169,10 @@ (V4HF "[%4]") (V8HF "[%4]") ]) +;; The number of bytes controlled by a predicate +(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") + (VNx4BI "4") (VNx2BI "8")]) + ;; ------------------------------------------------------------------- ;; Code Iterators ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 5d229f8cc0ee..9038dfb71a19 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -684,6 +684,11 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_constant_vector_operand"))) +(define_predicate "aarch64_sve_ptrue_flag" + (and (match_code "const_int") + (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE") + (match_test "INTVAL (op) == SVE_KNOWN_PTRUE")))) + (define_predicate "aarch64_gather_scale_operand_w" (and (match_code "const_int") (match_test "INTVAL (op) == 1 || INTVAL (op) == 4")))