mirror of
git://gcc.gnu.org/git/gcc.git
synced 2024-12-28 16:25:47 +08:00
i386.md (UNSPEC_BSF): Remove.
* config/i386/i386.md (UNSPEC_BSF): Remove. (ffssi2): Split into cmove and no_cmove insns and splitters; lose pentium float trick for now. (ffssi_1): Add * to name; use CTZ instead of UNSPEC. (ctzsi2, clzsi2, bsr): New. From-SVN: r62434
This commit is contained in:
parent
c407570a72
commit
8acfdd43da
@ -1,3 +1,11 @@
|
||||
2003-02-04 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.md (UNSPEC_BSF): Remove.
|
||||
(ffssi2): Split into cmove and no_cmove insns and splitters;
|
||||
lose pentium float trick for now.
|
||||
(ffssi_1): Add * to name; use CTZ instead of UNSPEC.
|
||||
(ctzsi2, clzsi2, bsr): New.
|
||||
|
||||
2003-02-04 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/ia64/ia64.c (rtx_needs_barrier): Handle POPCOUNT,
|
||||
|
@ -80,7 +80,6 @@
|
||||
(UNSPEC_SCAS 20)
|
||||
(UNSPEC_SIN 21)
|
||||
(UNSPEC_COS 22)
|
||||
(UNSPEC_BSF 23)
|
||||
(UNSPEC_FNSTSW 24)
|
||||
(UNSPEC_SAHF 25)
|
||||
(UNSPEC_FSTCW 26)
|
||||
@ -14110,104 +14109,98 @@
|
||||
[(set_attr "type" "leave")])
|
||||
|
||||
(define_expand "ffssi2"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "")
|
||||
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
|
||||
[(parallel
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
|
||||
(clobber (match_scratch:SI 2 ""))
|
||||
(clobber (reg:CC 17))])]
|
||||
""
|
||||
"")
|
||||
|
||||
(define_insn_and_split "*ffs_cmove"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (match_scratch:SI 2 "=&r"))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_CMOVE"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 2) (const_int -1))
|
||||
(parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
|
||||
(set (match_dup 0) (ctz:SI (match_dup 1)))])
|
||||
(set (match_dup 0) (if_then_else:SI
|
||||
(eq (reg:CCZ 17) (const_int 0))
|
||||
(match_dup 2)
|
||||
(match_dup 0)))
|
||||
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
|
||||
(clobber (reg:CC 17))])]
|
||||
"")
|
||||
|
||||
(define_insn_and_split "*ffs_no_cmove"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=r")
|
||||
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (match_scratch:SI 2 "=&r"))
|
||||
(clobber (reg:CC 17))]
|
||||
""
|
||||
"#"
|
||||
"reload_completed"
|
||||
[(parallel [(set (match_dup 2) (const_int 0))
|
||||
(clobber (reg:CC 17))])
|
||||
(parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
|
||||
(set (match_dup 0) (ctz:SI (match_dup 1)))])
|
||||
(set (strict_low_part (match_dup 3))
|
||||
(eq:QI (reg:CCZ 17) (const_int 0)))
|
||||
(parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
|
||||
(clobber (reg:CC 17))])
|
||||
(parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
|
||||
(clobber (reg:CC 17))])
|
||||
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
|
||||
(clobber (reg:CC 17))])]
|
||||
{
|
||||
rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
|
||||
rtx in = operands[1];
|
||||
|
||||
if (TARGET_CMOVE)
|
||||
{
|
||||
emit_move_insn (tmp, constm1_rtx);
|
||||
emit_insn (gen_ffssi_1 (out, in));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, out,
|
||||
gen_rtx_IF_THEN_ELSE (SImode,
|
||||
gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG),
|
||||
const0_rtx),
|
||||
tmp,
|
||||
out)));
|
||||
emit_insn (gen_addsi3 (out, out, const1_rtx));
|
||||
emit_move_insn (operands[0], out);
|
||||
}
|
||||
|
||||
/* Pentium bsf instruction is extremely slow. The following code is
|
||||
recommended by the Intel Optimizing Manual as a reasonable replacement:
|
||||
TEST EAX,EAX
|
||||
JZ SHORT BS2
|
||||
XOR ECX,ECX
|
||||
MOV DWORD PTR [TEMP+4],ECX
|
||||
SUB ECX,EAX
|
||||
AND EAX,ECX
|
||||
MOV DWORD PTR [TEMP],EAX
|
||||
FILD QWORD PTR [TEMP]
|
||||
FSTP QWORD PTR [TEMP]
|
||||
WAIT ; WAIT only needed for compatibility with
|
||||
; earlier processors
|
||||
MOV ECX, DWORD PTR [TEMP+4]
|
||||
SHR ECX,20
|
||||
SUB ECX,3FFH
|
||||
TEST EAX,EAX ; clear zero flag
|
||||
BS2:
|
||||
Following piece of code expand ffs to similar beast.
|
||||
*/
|
||||
|
||||
else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
|
||||
{
|
||||
rtx label = gen_label_rtx ();
|
||||
rtx lo, hi;
|
||||
rtx mem = assign_386_stack_local (DImode, 0);
|
||||
rtx fptmp = gen_reg_rtx (DFmode);
|
||||
split_di (&mem, 1, &lo, &hi);
|
||||
|
||||
emit_move_insn (out, const0_rtx);
|
||||
|
||||
emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label);
|
||||
|
||||
emit_move_insn (hi, out);
|
||||
emit_insn (gen_subsi3 (out, out, in));
|
||||
emit_insn (gen_andsi3 (out, out, in));
|
||||
emit_move_insn (lo, out);
|
||||
emit_insn (gen_floatdidf2 (fptmp,mem));
|
||||
emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
|
||||
emit_move_insn (out, hi);
|
||||
emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
|
||||
emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1)));
|
||||
|
||||
emit_label (label);
|
||||
LABEL_NUSES (label) = 1;
|
||||
|
||||
emit_move_insn (operands[0], out);
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_move_insn (tmp, const0_rtx);
|
||||
emit_insn (gen_ffssi_1 (out, in));
|
||||
emit_insn (gen_rtx_SET (VOIDmode,
|
||||
gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)),
|
||||
gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
|
||||
const0_rtx)));
|
||||
emit_insn (gen_negsi2 (tmp, tmp));
|
||||
emit_insn (gen_iorsi3 (out, out, tmp));
|
||||
emit_insn (gen_addsi3 (out, out, const1_rtx));
|
||||
emit_move_insn (operands[0], out);
|
||||
}
|
||||
DONE;
|
||||
operands[3] = gen_lowpart (QImode, operands[2]);
|
||||
})
|
||||
|
||||
(define_insn "ffssi_1"
|
||||
(define_insn "*ffssi_1"
|
||||
[(set (reg:CCZ 17)
|
||||
(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
|
||||
(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
|
||||
(const_int 0)))
|
||||
(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(unspec:SI [(match_dup 1)] UNSPEC_BSF))]
|
||||
(ctz:SI (match_dup 1)))]
|
||||
""
|
||||
"bsf{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger
|
||||
;; and slower than the two-byte movzx insn needed to do the work in SImode.
|
||||
(define_insn "ctzsi2"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC 17))]
|
||||
""
|
||||
"bsf{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
(define_expand "clzsi2"
|
||||
[(parallel
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(minus:SI (const_int 31)
|
||||
(clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
|
||||
(clobber (reg:CC 17))])
|
||||
(parallel
|
||||
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
|
||||
(clobber (reg:CC 17))])]
|
||||
""
|
||||
"")
|
||||
|
||||
(define_insn "*bsr"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(minus:SI (const_int 31)
|
||||
(clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
|
||||
(clobber (reg:CC 17))]
|
||||
""
|
||||
"bsr{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "ppro_uops" "few")])
|
||||
|
||||
;; Thread-local storage patterns for ELF.
|
||||
;;
|
||||
|
Loading…
Reference in New Issue
Block a user