mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-02 08:40:26 +08:00
re PR target/62011 (False Data Dependency in popcnt instruction)
PR target/62011 * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): New tune flag. * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define. * config/i386/i386.md (unspec) <UNSPEC_INSN_FALSE_DEP>: New unspec. (ffs<mode>2): Do not expand with tzcnt for TARGET_AVOID_FALSE_DEP_FOR_BMI. (ffssi2_no_cmove): Ditto. (*tzcnt<mode>_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI. (ctz<mode>2): New expander. (*ctz<mode>2_falsedep_1): New insn_and_split pattern. (*ctz<mode>2_falsedep): New insn. (*ctz<mode>2): Rename from ctz<mode>2. (clz<mode>2_lzcnt): New expander. (*clz<mode>2_lzcnt_falsedep_1): New insn_and_split pattern. (*clz<mode>2_lzcnt_falsedep): New insn. (*clz<mode>2): Rename from ctz<mode>2. (popcount<mode>2): New expander. (*popcount<mode>2_falsedep_1): New insn_and_split pattern. (*popcount<mode>2_falsedep): New insn. (*popcount<mode>2): Rename from ctz<mode>2. (*popcount<mode>2_cmp): Remove. (*popcountsi2_cmp_zext): Ditto. From-SVN: r214112
This commit is contained in:
parent
d6b0bb1c4e
commit
374f5bf801
@ -1,3 +1,29 @@
|
||||
2014-08-18 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/62011
|
||||
* config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI):
|
||||
New tune flag.
|
||||
* config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define.
|
||||
* config/i386/i386.md (unspec) <UNSPEC_INSN_FALSE_DEP>: New unspec.
|
||||
(ffs<mode>2): Do not expand with tzcnt for
|
||||
TARGET_AVOID_FALSE_DEP_FOR_BMI.
|
||||
(ffssi2_no_cmove): Ditto.
|
||||
(*tzcnt<mode>_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI.
|
||||
(ctz<mode>2): New expander.
|
||||
(*ctz<mode>2_falsedep_1): New insn_and_split pattern.
|
||||
(*ctz<mode>2_falsedep): New insn.
|
||||
(*ctz<mode>2): Rename from ctz<mode>2.
|
||||
(clz<mode>2_lzcnt): New expander.
|
||||
(*clz<mode>2_lzcnt_falsedep_1): New insn_and_split pattern.
|
||||
(*clz<mode>2_lzcnt_falsedep): New insn.
|
||||
(*clz<mode>2): Rename from ctz<mode>2.
|
||||
(popcount<mode>2): New expander.
|
||||
(*popcount<mode>2_falsedep_1): New insn_and_split pattern.
|
||||
(*popcount<mode>2_falsedep): New insn.
|
||||
(*popcount<mode>2): Rename from ctz<mode>2.
|
||||
(*popcount<mode>2_cmp): Remove.
|
||||
(*popcountsi2_cmp_zext): Ditto.
|
||||
|
||||
2014-08-18 Ajit Agarwal <ajitkum@xilinx.com>
|
||||
|
||||
* config/microblaze/microblaze.c (microblaze_elf_asm_cdtor): New.
|
||||
|
@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
||||
ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
|
||||
#define TARGET_ADJUST_UNROLL \
|
||||
ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
|
||||
#define TARGET_AVOID_FALSE_DEP_FOR_BMI \
|
||||
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
|
||||
|
||||
/* Feature tests against the various architecture variations. */
|
||||
enum ix86_arch_indices {
|
||||
|
@ -112,6 +112,7 @@
|
||||
UNSPEC_XBEGIN_ABORT
|
||||
UNSPEC_STOS
|
||||
UNSPEC_PEEPSIB
|
||||
UNSPEC_INSN_FALSE_DEP
|
||||
|
||||
;; For SSE/MMX support:
|
||||
UNSPEC_FIX_NOTRUNC
|
||||
@ -12197,7 +12198,8 @@
|
||||
DONE;
|
||||
}
|
||||
|
||||
flags_mode = TARGET_BMI ? CCCmode : CCZmode;
|
||||
flags_mode
|
||||
= (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode;
|
||||
|
||||
operands[2] = gen_reg_rtx (<MODE>mode);
|
||||
operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
|
||||
@ -12223,7 +12225,8 @@
|
||||
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
|
||||
enum machine_mode flags_mode
|
||||
= (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode;
|
||||
|
||||
operands[3] = gen_lowpart (QImode, operands[2]);
|
||||
operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
|
||||
@ -12238,7 +12241,7 @@
|
||||
(const_int 0)))
|
||||
(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(ctz:SWI48 (match_dup 1)))]
|
||||
"TARGET_BMI"
|
||||
"TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI"
|
||||
"tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
@ -12259,7 +12262,52 @@
|
||||
(set_attr "btver2_decode" "double")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "ctz<mode>2"
|
||||
(define_expand "ctz<mode>2"
|
||||
[(parallel
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(ctz:SWI248
|
||||
(match_operand:SWI248 1 "nonimmediate_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])])
|
||||
|
||||
(define_insn_and_split "*ctz<mode>2_falsedep_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=&r")
|
||||
(ctz:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"(TARGET_BMI || TARGET_GENERIC)
|
||||
&& TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(ctz:SWI48 (match_dup 1)))
|
||||
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"ix86_expand_clear (operands[0]);")
|
||||
|
||||
(define_insn "*ctz<mode>2_falsedep"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(ctz:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(unspec [(match_operand:SWI48 2 "register_operand" "0")]
|
||||
UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
{
|
||||
if (TARGET_BMI)
|
||||
return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
|
||||
else if (TARGET_GENERIC)
|
||||
/* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
|
||||
return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
[(set_attr "type" "alu1")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*ctz<mode>2"
|
||||
[(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
@ -12306,7 +12354,44 @@
|
||||
operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
|
||||
})
|
||||
|
||||
(define_insn "clz<mode>2_lzcnt"
|
||||
(define_expand "clz<mode>2_lzcnt"
|
||||
[(parallel
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(clz:SWI248
|
||||
(match_operand:SWI248 1 "nonimmediate_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_LZCNT")
|
||||
|
||||
(define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=&r")
|
||||
(clz:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_LZCNT
|
||||
&& TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(clz:SWI48 (match_dup 1)))
|
||||
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"ix86_expand_clear (operands[0]);")
|
||||
|
||||
(define_insn "*clz<mode>2_lzcnt_falsedep"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(clz:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(unspec [(match_operand:SWI48 2 "register_operand" "0")]
|
||||
UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_LZCNT"
|
||||
"lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*clz<mode>2_lzcnt"
|
||||
[(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
@ -12589,7 +12674,50 @@
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_insn "popcount<mode>2"
|
||||
(define_expand "popcount<mode>2"
|
||||
[(parallel
|
||||
[(set (match_operand:SWI248 0 "register_operand")
|
||||
(popcount:SWI248
|
||||
(match_operand:SWI248 1 "nonimmediate_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_POPCNT")
|
||||
|
||||
(define_insn_and_split "*popcount<mode>2_falsedep_1"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=&r")
|
||||
(popcount:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_POPCNT
|
||||
&& TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(popcount:SWI48 (match_dup 1)))
|
||||
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"ix86_expand_clear (operands[0]);")
|
||||
|
||||
(define_insn "*popcount<mode>2_falsedep"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(popcount:SWI48
|
||||
(match_operand:SWI48 1 "nonimmediate_operand" "rm")))
|
||||
(unspec [(match_operand:SWI48 2 "register_operand" "0")]
|
||||
UNSPEC_INSN_FALSE_DEP)
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_POPCNT"
|
||||
{
|
||||
#if TARGET_MACHO
|
||||
return "popcnt\t{%1, %0|%0, %1}";
|
||||
#else
|
||||
return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
|
||||
#endif
|
||||
}
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*popcount<mode>2"
|
||||
[(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(popcount:SWI248
|
||||
(match_operand:SWI248 1 "nonimmediate_operand" "rm")))
|
||||
@ -12606,45 +12734,6 @@
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*popcount<mode>2_cmp"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:SWI248
|
||||
(match_operand:SWI248 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(popcount:SWI248 (match_dup 1)))]
|
||||
"TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
{
|
||||
#if TARGET_MACHO
|
||||
return "popcnt\t{%1, %0|%0, %1}";
|
||||
#else
|
||||
return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
|
||||
#endif
|
||||
}
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*popcountsi2_cmp_zext"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI(popcount:SI (match_dup 1))))]
|
||||
"TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
{
|
||||
#if TARGET_MACHO
|
||||
return "popcnt\t{%1, %0|%0, %1}";
|
||||
#else
|
||||
return "popcnt{l}\t{%1, %0|%0, %1}";
|
||||
#endif
|
||||
}
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "bswapdi2"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
|
||||
|
@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
|
||||
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
|
||||
m_K8)
|
||||
|
||||
/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
|
||||
for bit-manipulation instructions. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
|
||||
m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* This never worked well before. */
|
||||
/*****************************************************************************/
|
||||
|
Loading…
x
Reference in New Issue
Block a user