diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8382f2b1f6a8..83746347fc28 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2010-09-02 Julian Brown + + * config/arm/neon.md (UNSPEC_VCLE, UNSPEC_VCLT): New constants for + unspecs. + (vcond, vcondu): New expanders. + (neon_vceq, neon_vcge, neon_vcgt): Support + comparisons with zero. + (neon_vcle, neon_vclt): New patterns. + * config/arm/constraints.md (Dz): New constraint. + 2010-09-02 Anatoly Sokolov * target.def (class_likely_spilled_p): New hook. diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index fce8b5ea0bec..41a066379753 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -29,7 +29,7 @@ ;; in Thumb-1 state: I, J, K, L, M, N, O ;; The following multi-letter normal constraints have been used: -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di +;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ;; in Thumb-1 state: Pa, Pb, Pc, Pd ;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px @@ -199,6 +199,12 @@ (and (match_code "const_double") (match_test "TARGET_32BIT && neg_const_double_rtx_ok_for_fpa (op)"))) +(define_constraint "Dz" + "@internal + In ARM/Thumb-2 state a vector of constant zeros." + (and (match_code "const_vector") + (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + (define_constraint "Da" "@internal In ARM/Thumb-2 state a const_int, const_double or const_vector that can diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 96241b9c17f8..8e8fc01961b2 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -140,7 +140,9 @@ (UNSPEC_VUZP1 201) (UNSPEC_VUZP2 202) (UNSPEC_VZIP1 203) - (UNSPEC_VZIP2 204)]) + (UNSPEC_VZIP2 204) + (UNSPEC_VCLE 206) + (UNSPEC_VCLT 207)]) ;; Attribute used to permit string comparisons against in @@ -1452,6 +1454,169 @@ [(set_attr "neon_type" "neon_int_5")] ) +;; Conditional instructions. These are comparisons with conditional moves for +;; vectors. They perform the assignment: +;; +;; Vop0 = (Vop4 Vop5) ? Vop1 : Vop2; +;; +;; where op3 is <, <=, ==, !=, >= or >. Operations are performed +;; element-wise. + +(define_expand "vcond" + [(set (match_operand:VDQW 0 "s_register_operand" "") + (if_then_else:VDQW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQW 4 "s_register_operand" "") + (match_operand:VDQW 5 "nonmemory_operand" "")]) + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + /* See the description of "magic" bits in the 'T' case of + arm_print_operand. */ + HOST_WIDE_INT magic_word = (mode == V2SFmode || mode == V4SFmode) + ? 3 : 1; + rtx magic_rtx = GEN_INT (magic_word); + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GE: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + magic_rtx)); + break; + + case GT: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + magic_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + break; + + case LE: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + magic_rtx)); + break; + + case LT: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + magic_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + +(define_expand "vcondu" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (if_then_else:VDQIW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQIW 4 "s_register_operand" "") + (match_operand:VDQIW 5 "s_register_operand" "")]) + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GEU: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + const0_rtx)); + break; + + case GTU: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + const0_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + break; + + case LEU: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + const0_rtx)); + break; + + case LTU: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + const0_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + ;; Patterns for builtins. ; good for plain vadd, vaddq. @@ -1863,13 +2028,16 @@ ) (define_insn "neon_vceq" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCEQ))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCEQ))] "TARGET_NEON" - "vceq.\t%0, %1, %2" + "@ + vceq.\t%0, %1, %2 + vceq.\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -1879,13 +2047,16 @@ ) (define_insn "neon_vcge" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCGE))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGE))] "TARGET_NEON" - "vcge.%T3%#\t%0, %1, %2" + "@ + vcge.%T3%#\t%0, %1, %2 + vcge.%T3%#\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -1895,13 +2066,16 @@ ) (define_insn "neon_vcgt" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCGT))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGT))] "TARGET_NEON" - "vcgt.%T3%#\t%0, %1, %2" + "@ + vcgt.%T3%#\t%0, %1, %2 + vcgt.%T3%#\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -1910,6 +2084,43 @@ (const_string "neon_int_5")))] ) +;; VCLE and VCLT only support comparisons with immediate zero (register +;; variants are VCGE and VCGT with operands reversed). + +(define_insn "neon_vcle" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLE))] + "TARGET_NEON" + "vcle.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vclt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLT))] + "TARGET_NEON" + "vclt.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + (define_insn "neon_vcage" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w")