From 374f5bf801f8487193ddd1e73027af1df8221f3e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 18 Aug 2014 20:00:52 +0200 Subject: [PATCH] re PR target/62011 (False Data Dependency in popcnt instruction) PR target/62011 * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): New tune flag. * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define. * config/i386/i386.md (unspec) : New unspec. (ffs2): Do not expand with tzcnt for TARGET_AVOID_FALSE_DEP_FOR_BMI. (ffssi2_no_cmove): Ditto. (*tzcnt_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI. (ctz2): New expander. (*ctz2_falsedep_1): New insn_and_split pattern. (*ctz2_falsedep): New insn. (*ctz2): Rename from ctz2. (clz2_lzcnt): New expander. (*clz2_lzcnt_falsedep_1): New insn_and_split pattern. (*clz2_lzcnt_falsedep): New insn. (*clz2): Rename from ctz2. (popcount2): New expander. (*popcount2_falsedep_1): New insn_and_split pattern. (*popcount2_falsedep): New insn. (*popcount2): Rename from ctz2. (*popcount2_cmp): Remove. (*popcountsi2_cmp_zext): Ditto. From-SVN: r214112 --- gcc/ChangeLog | 26 +++++ gcc/config/i386/i386.h | 2 + gcc/config/i386/i386.md | 179 ++++++++++++++++++++++++++--------- gcc/config/i386/x86-tune.def | 5 + 4 files changed, 167 insertions(+), 45 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 629ae5f5aa24..8674f949b4b2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2014-08-18 Uros Bizjak + + PR target/62011 + * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): + New tune flag. + * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define. + * config/i386/i386.md (unspec) : New unspec. + (ffs2): Do not expand with tzcnt for + TARGET_AVOID_FALSE_DEP_FOR_BMI. + (ffssi2_no_cmove): Ditto. + (*tzcnt_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI. + (ctz2): New expander. + (*ctz2_falsedep_1): New insn_and_split pattern. + (*ctz2_falsedep): New insn. + (*ctz2): Rename from ctz2. + (clz2_lzcnt): New expander. + (*clz2_lzcnt_falsedep_1): New insn_and_split pattern. + (*clz2_lzcnt_falsedep): New insn. + (*clz2): Rename from ctz2. + (popcount2): New expander. + (*popcount2_falsedep_1): New insn_and_split pattern. + (*popcount2_falsedep): New insn. + (*popcount2): Rename from ctz2. + (*popcount2_cmp): Remove. + (*popcountsi2_cmp_zext): Ditto. + 2014-08-18 Ajit Agarwal * config/microblaze/microblaze.c (microblaze_elf_asm_cdtor): New. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c2f0ceed45f4..ec6ed2535e91 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] #define TARGET_ADJUST_UNROLL \ ix86_tune_features[X86_TUNE_ADJUST_UNROLL] +#define TARGET_AVOID_FALSE_DEP_FOR_BMI \ + ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9bb7e164e281..4749b74294e8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -112,6 +112,7 @@ UNSPEC_XBEGIN_ABORT UNSPEC_STOS UNSPEC_PEEPSIB + UNSPEC_INSN_FALSE_DEP ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -12197,7 +12198,8 @@ DONE; } - flags_mode = TARGET_BMI ? CCCmode : CCZmode; + flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[2] = gen_reg_rtx (mode); operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -12223,7 +12225,8 @@ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] { - enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; + enum machine_mode flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[3] = gen_lowpart (QImode, operands[2]); operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -12238,7 +12241,7 @@ (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] - "TARGET_BMI" + "TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI" "tzcnt{}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") @@ -12259,7 +12262,52 @@ (set_attr "btver2_decode" "double") (set_attr "mode" "")]) -(define_insn "ctz2" +(define_expand "ctz2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (ctz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn_and_split "*ctz2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_BMI || TARGET_GENERIC) + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (ctz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*ctz2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{}\t{%1, %0|%0, %1}"; + else if (TARGET_GENERIC) + /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ + return "rep%; bsf{}\t{%1, %0|%0, %1}"; + else + gcc_unreachable (); +} + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "")]) + +(define_insn "*ctz2" [(set (match_operand:SWI248 0 "register_operand" "=r") (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -12306,7 +12354,44 @@ operands[2] = GEN_INT (GET_MODE_BITSIZE (mode)-1); }) -(define_insn "clz2_lzcnt" +(define_expand "clz2_lzcnt" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (clz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_LZCNT") + +(define_insn_and_split "*clz2_lzcnt_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*clz2_lzcnt_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*clz2_lzcnt" [(set (match_operand:SWI248 0 "register_operand" "=r") (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -12589,7 +12674,50 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "HI")]) -(define_insn "popcount2" +(define_expand "popcount2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_POPCNT") + +(define_insn_and_split "*popcount2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*popcount2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcount2" [(set (match_operand:SWI248 0 "register_operand" "=r") (popcount:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) @@ -12606,45 +12734,6 @@ (set_attr "type" "bitmanip") (set_attr "mode" "")]) -(define_insn "*popcount2_cmp" - [(set (reg FLAGS_REG) - (compare - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 (match_dup 1)))] - "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" -{ -#if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; -#else - return "popcnt{}\t{%1, %0|%0, %1}"; -#endif -} - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "")]) - -(define_insn "*popcountsi2_cmp_zext" - [(set (reg FLAGS_REG) - (compare - (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI(popcount:SI (match_dup 1))))] - "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" -{ -#if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; -#else - return "popcnt{l}\t{%1, %0|%0, %1}"; -#endif -} - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "SI")]) - (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index cb44dc3120cc..215c63c3957d 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6) DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", m_K8) +/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", + m_SANDYBRIDGE | m_HASWELL | m_GENERIC) + /*****************************************************************************/ /* This never worked well before. */ /*****************************************************************************/