From 8acfdd43da580ac54e7c86334d24d2b3e8b2585e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 5 Feb 2003 01:40:57 -0800 Subject: [PATCH] i386.md (UNSPEC_BSF): Remove. * config/i386/i386.md (UNSPEC_BSF): Remove. (ffssi2): Split into cmove and no_cmove insns and splitters; lose pentium float trick for now. (ffssi_1): Add * to name; use CTZ instead of UNSPEC. (ctzsi2, clzsi2, bsr): New. From-SVN: r62434 --- gcc/ChangeLog | 8 ++ gcc/config/i386/i386.md | 167 +++++++++++++++++++--------------------- 2 files changed, 88 insertions(+), 87 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b5c6eb1174a..ee7f50da2cc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2003-02-04 Richard Henderson + + * config/i386/i386.md (UNSPEC_BSF): Remove. + (ffssi2): Split into cmove and no_cmove insns and splitters; + lose pentium float trick for now. + (ffssi_1): Add * to name; use CTZ instead of UNSPEC. + (ctzsi2, clzsi2, bsr): New. + 2003-02-04 Richard Henderson * config/ia64/ia64.c (rtx_needs_barrier): Handle POPCOUNT, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 45bd3d65aee..ffb2153bf3b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -80,7 +80,6 @@ (UNSPEC_SCAS 20) (UNSPEC_SIN 21) (UNSPEC_COS 22) - (UNSPEC_BSF 23) (UNSPEC_FNSTSW 24) (UNSPEC_SAHF 25) (UNSPEC_FSTCW 26) @@ -14110,104 +14109,98 @@ [(set_attr "type" "leave")]) (define_expand "ffssi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC 17))])] "" + "") + +(define_insn_and_split "*ffs_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&r")) + (clobber (reg:CC 17))] + "TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SI + (eq (reg:CCZ 17) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] + "") + +(define_insn_and_split "*ffs_no_cmove" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&r")) + (clobber (reg:CC 17))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC 17))]) + (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ 17) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] { - rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); - rtx in = operands[1]; - - if (TARGET_CMOVE) - { - emit_move_insn (tmp, constm1_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, out, - gen_rtx_IF_THEN_ELSE (SImode, - gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx), - tmp, - out))); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); - } - - /* Pentium bsf instruction is extremely slow. The following code is - recommended by the Intel Optimizing Manual as a reasonable replacement: - TEST EAX,EAX - JZ SHORT BS2 - XOR ECX,ECX - MOV DWORD PTR [TEMP+4],ECX - SUB ECX,EAX - AND EAX,ECX - MOV DWORD PTR [TEMP],EAX - FILD QWORD PTR [TEMP] - FSTP QWORD PTR [TEMP] - WAIT ; WAIT only needed for compatibility with - ; earlier processors - MOV ECX, DWORD PTR [TEMP+4] - SHR ECX,20 - SUB ECX,3FFH - TEST EAX,EAX ; clear zero flag - BS2: - Following piece of code expand ffs to similar beast. - */ - - else if (TARGET_PENTIUM && !optimize_size && TARGET_80387) - { - rtx label = gen_label_rtx (); - rtx lo, hi; - rtx mem = assign_386_stack_local (DImode, 0); - rtx fptmp = gen_reg_rtx (DFmode); - split_di (&mem, 1, &lo, &hi); - - emit_move_insn (out, const0_rtx); - - emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label); - - emit_move_insn (hi, out); - emit_insn (gen_subsi3 (out, out, in)); - emit_insn (gen_andsi3 (out, out, in)); - emit_move_insn (lo, out); - emit_insn (gen_floatdidf2 (fptmp,mem)); - emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp); - emit_move_insn (out, hi); - emit_insn (gen_lshrsi3 (out, out, GEN_INT (20))); - emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1))); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operands[0], out); - } - else - { - emit_move_insn (tmp, const0_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, - gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)), - gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx))); - emit_insn (gen_negsi2 (tmp, tmp)); - emit_insn (gen_iorsi3 (out, out, tmp)); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); - } - DONE; + operands[3] = gen_lowpart (QImode, operands[2]); }) -(define_insn "ffssi_1" +(define_insn "*ffssi_1" [(set (reg:CCZ 17) - (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_dup 1)] UNSPEC_BSF))] + (ctz:SI (match_dup 1)))] "" "bsf{l}\t{%1, %0|%0, %1}" [(set_attr "prefix_0f" "1") (set_attr "ppro_uops" "few")]) -;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger -;; and slower than the two-byte movzx insn needed to do the work in SImode. +(define_insn "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC 17))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "ppro_uops" "few")]) + +(define_expand "clzsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC 17))]) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) + (clobber (reg:CC 17))])] + "" + "") + +(define_insn "*bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "ppro_uops" "few")]) ;; Thread-local storage patterns for ELF. ;;