From 26cd9add744f5649af087552808b388c3109b53a Mon Sep 17 00:00:00 2001 From: Enkovich Ilya Date: Wed, 21 Dec 2011 11:52:27 +0000 Subject: [PATCH] re PR target/50038 (redundant zero extensions) gcc/ 2011-12-21 Enkovich Ilya PR target/50038 * implicit-zee.c: Delete. * ree.c: New file. * Makefile.in: Replace implicit-zee.c with ree.c. * config/i386/i386.c (ix86_option_override_internal): Rename flag_zee to flag_ree. * common.opt (fzee): Ignored. (free): New. * passes.c (init_optimization_passes): Replace pass_implicit_zee with pass_ree. * tree-pass.h (pass_implicit_zee): Delete. (pass_ree): New. * timevar.def (TV_ZEE): Delete. (TV_REE): New. * doc/invoke.texi: Add -free description. gcc/testsuite/ 2011-12-21 Enkovich Ilya PR target/50038 From-SVN: r182574 --- gcc/ChangeLog | 18 ++ gcc/Makefile.in | 4 +- gcc/common.opt | 8 +- gcc/config/i386/i386.c | 4 +- gcc/doc/invoke.texi | 10 +- gcc/passes.c | 2 +- gcc/{implicit-zee.c => ree.c} | 504 ++++++++++++++++++--------------- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/pr50038.c | 20 ++ gcc/timevar.def | 2 +- gcc/tree-pass.h | 2 +- 11 files changed, 337 insertions(+), 242 deletions(-) rename gcc/{implicit-zee.c => ree.c} (59%) create mode 100644 gcc/testsuite/gcc.dg/pr50038.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8cded1cf1787..e8f974abb9fa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2011-12-21 Enkovich Ilya + + PR target/50038 + * implicit-zee.c: Delete. + * ree.c: New file. + * Makefile.in: Replace implicit-zee.c with ree.c. + * config/i386/i386.c (ix86_option_override_internal): Rename + flag_zee to flag_ree. + * common.opt (fzee): Ignored. + (free): New. + * passes.c (init_optimization_passes): Replace pass_implicit_zee + with pass_ree. + * tree-pass.h (pass_implicit_zee): Delete. + (pass_ree): New. + * timevar.def (TV_ZEE): Delete. + (TV_REE): New. + * doc/invoke.texi: Add -free description. + 2011-12-21 Tristan Gingold * config/vms/vms-protos.h (vms_function_section): New prototype. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 57e155dc5c6e..33e156962933 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1251,7 +1251,7 @@ OBJS = \ hw-doloop.o \ hwint.o \ ifcvt.o \ - implicit-zee.o \ + ree.o \ incpath.o \ init-regs.o \ integrate.o \ @@ -3028,7 +3028,7 @@ fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ web.o : web.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h $(DIAGNOSTIC_CORE_H) \ insn-config.h $(RECOG_H) $(DF_H) $(OBSTACK_H) $(TIMEVAR_H) $(TREE_PASS_H) -implicit-zee.o : implicit-zee.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ +ree.o : ree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h \ $(DF_H) $(TIMEVAR_H) tree-pass.h $(RECOG_H) $(EXPR_H) \ $(REGS_H) $(TREE_H) $(TM_P_H) insn-config.h $(INSN_ATTR_H) $(DIAGNOSTIC_CORE_H) \ diff --git a/gcc/common.opt b/gcc/common.opt index 55d3f2d44d12..2a2c679cc130 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1781,8 +1781,12 @@ Common Ignore Does nothing. Preserved for backward compatibility. fzee -Common Report Var(flag_zee) Init(0) -Eliminate redundant zero extensions on targets that support implicit extensions. +Common Ignore +Does nothing. Preserved for backward compatibility. + +free +Common Report Var(flag_ree) Init(0) +Turn on Redundant Extensions Elimination pass. fshow-column Common Report Var(flag_show_column) Init(1) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f2ab36377b4b..c913c13d479f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3449,8 +3449,8 @@ ix86_option_override_internal (bool main_args_p) in case they weren't overwritten by command line options. */ if (TARGET_64BIT) { - if (optimize > 1 && !global_options_set.x_flag_zee) - flag_zee = 1; + if (optimize > 1 && !global_options_set.x_flag_ree) + flag_ree = 1; if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer) flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; if (flag_asynchronous_unwind_tables == 2) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 36c37e081ee5..99427553edcc 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -385,7 +385,7 @@ Objective-C and Objective-C++ Dialects}. -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol -fprofile-generate=@var{path} @gol -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol --freciprocal-math -fregmove -frename-registers -freorder-blocks @gol +-freciprocal-math -free -fregmove -frename-registers -freorder-blocks @gol -freorder-blocks-and-partition -freorder-functions @gol -frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol -frounding-math -fsched2-use-superblocks -fsched-pressure @gol @@ -6709,6 +6709,14 @@ Perform a number of minor optimizations that are relatively expensive. Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. +@item -free +@opindex free +Attempt to remove redundant extension instructions. This is especially +helpful for the x86-64 architecture which implicitly zero-extends in 64-bit +registers after writing to their lower 32-bit half. + +Enabled for x86 at levels @option{-O2}, @option{-O3}. + @item -foptimize-register-move @itemx -fregmove @opindex foptimize-register-move diff --git a/gcc/passes.c b/gcc/passes.c index d060bb4ae3d9..1125cfe76fcf 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1490,7 +1490,7 @@ init_optimization_passes (void) NEXT_PASS (pass_postreload_cse); NEXT_PASS (pass_gcse2); NEXT_PASS (pass_split_after_reload); - NEXT_PASS (pass_implicit_zee); + NEXT_PASS (pass_ree); NEXT_PASS (pass_compare_elim_after_reload); NEXT_PASS (pass_branch_target_load_optimize1); NEXT_PASS (pass_thread_prologue_and_epilogue); diff --git a/gcc/implicit-zee.c b/gcc/ree.c similarity index 59% rename from gcc/implicit-zee.c rename to gcc/ree.c index 0795c29ff7b9..ce714dcacc67 100644 --- a/gcc/implicit-zee.c +++ b/gcc/ree.c @@ -1,8 +1,9 @@ -/* Redundant Zero-extension elimination for targets that implicitly - zero-extend writes to the lower 32-bit portion of 64-bit registers. - Copyright (C) 2010 Free Software Foundation, Inc. - Contributed by Sriraman Tallam (tmsriram@google.com) and - Silvius Rus (rus@google.com) +/* Redundant Extension Elimination pass for the GNU compiler. + Copyright (C) 2010-2011 Free Software Foundation, Inc. + Contributed by Ilya Enkovich (ilya.enkovich@intel.com) + + Based on the Redundant Zero-extension elimination pass contributed by + Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com). This file is part of GCC. @@ -23,63 +24,63 @@ along with GCC; see the file COPYING3. If not see /* Problem Description : -------------------- - This pass is intended to be applicable only to targets that implicitly - zero-extend 64-bit registers after writing to their lower 32-bit half. - For instance, x86_64 zero-extends the upper bits of a register - implicitly whenever an instruction writes to its lower 32-bit half. - For example, the instruction *add edi,eax* also zero-extends the upper - 32-bits of rax after doing the addition. These zero extensions come - for free and GCC does not always exploit this well. That is, it has - been observed that there are plenty of cases where GCC explicitly - zero-extends registers for x86_64 that are actually useless because - these registers were already implicitly zero-extended in a prior - instruction. This pass tries to eliminate such useless zero extension - instructions. + This pass is intended to remove redundant extension instructions. + Such instructions appear for different reasons. We expect some of + them due to implicit zero-extension in 64-bit registers after writing + to their lower 32-bit half (e.g. for the x86-64 architecture). + Another possible reason is a type cast which follows a load (for + instance a register restore) and which can be combined into a single + instruction, and for which earlier local passes, e.g. the combiner, + weren't able to optimize. How does this pass work ? -------------------------- This pass is run after register allocation. Hence, all registers that - this pass deals with are hard registers. This pass first looks for a - zero-extension instruction that could possibly be redundant. Such zero - extension instructions show up in RTL with the pattern : - (set (reg:DI x) (zero_extend:DI (reg:SI x))). - where x can be any one of the 64-bit hard registers. + this pass deals with are hard registers. This pass first looks for an + extension instruction that could possibly be redundant. Such extension + instructions show up in RTL with the pattern : + (set (reg: x) (any_extend: (reg: x))), + where x can be any hard register. Now, this pass tries to eliminate this instruction by merging the - zero-extension with the definitions of register x. For instance, if + extension with the definitions of register x. For instance, if one of the definitions of register x was : (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), + followed by extension : + (set (reg:DI x) (zero_extend:DI (reg:SI x))) then the combination converts this into : (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). If all the merged definitions are recognizable assembly instructions, - the zero-extension is effectively eliminated. For example, in x86_64, - implicit zero-extensions are captured with appropriate patterns in the - i386.md file. Hence, these merged definition can be matched to a single - assembly instruction. The original zero-extension instruction is then - deleted if all the definitions can be merged. + the extension is effectively eliminated. + + For example, for the x86-64 architecture, implicit zero-extensions + are captured with appropriate patterns in the i386.md file. Hence, + these merged definition can be matched to a single assembly instruction. + The original extension instruction is then deleted if all the + definitions can be merged. However, there are cases where the definition instruction cannot be - merged with a zero-extend. Examples are CALL instructions. In such - cases, the original zero extension is not redundant and this pass does + merged with an extension. Examples are CALL instructions. In such + cases, the original extension is not redundant and this pass does not delete it. Handling conditional moves : ---------------------------- - Architectures like x86_64 support conditional moves whose semantics for - zero-extension differ from the other instructions. For instance, the + Architectures like x86-64 support conditional moves whose semantics for + extension differ from the other instructions. For instance, the instruction *cmov ebx, eax* zero-extends eax onto rax only when the move from ebx to eax happens. - Otherwise, eax may not be zero-extended. Conditional moves appear as + Otherwise, eax may not be zero-extended. Consider conditional move as RTL instructions of the form (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). - This pass tries to merge a zero-extension with a conditional move by - actually merging the defintions of y and z with a zero-extend and then + This pass tries to merge an extension with a conditional move by + actually merging the defintions of y and z with an extension and then converting the conditional move into : (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). - Since registers y and z are zero-extended, register x will also be - zero-extended after the conditional move. Note that this step has to - be done transitively since the definition of a conditional copy can be + Since registers y and z are extended, register x will also be extended + after the conditional move. Note that this step has to be done + transitively since the definition of a conditional copy can be another conditional copy. Motivating Example I : @@ -100,7 +101,7 @@ along with GCC; see the file COPYING3. If not see } ********************************************** - $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination) + $ gcc -O2 bad_code.c ........ 400315: b8 4e 00 00 00 mov $0x4e,%eax 40031a: 0f af f8 imul %eax,%edi @@ -114,7 +115,7 @@ along with GCC; see the file COPYING3. If not see 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax 400341: c3 retq - $ gcc -O2 -fzee bad_code.c + $ gcc -O2 -free bad_code.c ...... 400315: 6b ff 4e imul $0x4e,%edi,%edi 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax @@ -141,7 +142,7 @@ along with GCC; see the file COPYING3. If not see return (unsigned long long)(z); } - $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination) + $ gcc -O2 bad_code.c ............ 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx 400363: 89 f8 mov %edi,%eax @@ -151,7 +152,7 @@ along with GCC; see the file COPYING3. If not see 40036d: 89 c0 mov %eax,%eax --> Useless extend 40036f: c3 retq - $ gcc -O2 -fzee bad_code.c + $ gcc -O2 -free bad_code.c ............. 400360: 89 fa mov %edi,%edx 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax @@ -161,17 +162,57 @@ along with GCC; see the file COPYING3. If not see 40036c: 48 0f 42 c6 cmovb %rsi,%rax 400370: c3 retq + Motivating Example III : + --------------------- + + Here is an example with a type cast. + + For this program : + ********************************************** + + void test(int size, unsigned char *in, unsigned char *out) + { + int i; + unsigned char xr, xg, xy=0; + + for (i = 0; i < size; i++) { + xr = *in++; + xg = *in++; + xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); + *out++ = xy; + } + } + + $ gcc -O2 bad_code.c + ............ + 10: 0f b6 0e movzbl (%rsi),%ecx + 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax + 17: 48 83 c6 02 add $0x2,%rsi + 1b: 0f b6 c9 movzbl %cl,%ecx --> Useless extend + 1e: 0f b6 c0 movzbl %al,%eax --> Useless extend + 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx + 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax + + $ gcc -O2 -free bad_code.c + ............. + 10: 0f b6 0e movzbl (%rsi),%ecx + 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax + 17: 48 83 c6 02 add $0x2,%rsi + 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx + 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax Usefulness : ---------- - This pass reduces the dynamic instruction count of a compression benchmark - by 2.8% and improves its run time by about 1%. The compression benchmark - had the following code sequence in a very hot region of code before ZEE - optimized it : + The original redundant zero-extension elimination pass reported reduction + of the dynamic instruction count of a compression benchmark by 2.8% and + improvement of its run time by about 1%. - shr $0x5, %edx - mov %edx, %edx --> Useless zero-extend */ + The additional performance gain with the enhanced pass is mostly expected + on in-order architectures where redundancy cannot be compensated by out of + order execution. Measurements showed up to 10% performance gain (reduced + run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance + gain 1%. */ #include "config.h" @@ -205,7 +246,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" /* This says if a register is newly created for the purpose of - zero-extension. */ + extension. */ enum insn_merge_code { @@ -213,13 +254,26 @@ enum insn_merge_code MERGE_SUCCESS }; +/* This structure is used to hold data about candidate for + elimination. */ + +typedef struct GTY(()) ext_cand +{ + rtx insn; + const_rtx expr; + enum machine_mode src_mode; +} ext_cand, *ext_cand_ref; + +DEF_VEC_O(ext_cand); +DEF_VEC_ALLOC_O(ext_cand, heap); + /* This says if a INSN UID or its definition has already been merged - with a zero-extend or not. */ + with a extension or not. */ static enum insn_merge_code *is_insn_merge_attempted; static int max_insn_uid; -/* Returns the merge code status for INSN. */ +/* Return the merge code status for INSN. */ static enum insn_merge_code get_insn_status (rtx insn) @@ -228,7 +282,7 @@ get_insn_status (rtx insn) return is_insn_merge_attempted[INSN_UID (insn)]; } -/* Sets the merge code status of INSN to CODE. */ +/* Set the merge code status of INSN to CODE. */ static void set_insn_status (rtx insn, enum insn_merge_code code) @@ -237,78 +291,70 @@ set_insn_status (rtx insn, enum insn_merge_code code) is_insn_merge_attempted[INSN_UID (insn)] = code; } -/* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET) - that needs to be modified, this code modifies the SET rtx to a - new SET rtx that zero_extends the right hand expression into a DImode - register (NEWREG) on the left hand side. Note that multiple - assumptions are made about the nature of the set that needs - to be true for this to work and is called from merge_def_and_ze. +/* Given a insn (CURR_INSN), an extension candidate for removal (CAND) + and a pointer to the SET rtx (ORIG_SET) that needs to be modified, + this code modifies the SET rtx to a new SET rtx that extends the + right hand expression into a register on the left hand side. Note + that multiple assumptions are made about the nature of the set that + needs to be true for this to work and is called from merge_def_and_ext. Original : - (set (reg:SI a) (expression)) + (set (reg a) (expression)) Transform : - (set (reg:DI a) (zero_extend (expression))) + (set (reg a) (extend (expression))) Special Cases : - If the expression is a constant or another zero_extend directly - assign it to the DI mode register. */ + If the expression is a constant or another extend directly + assign it to the register. */ static bool -combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) +combine_set_extend (ext_cand_ref cand, rtx curr_insn, rtx *orig_set) { rtx temp_extension, simplified_temp_extension, new_set, new_const_int; - rtx orig_src; - HOST_WIDE_INT val; - unsigned int mask, delta_width; + rtx orig_src, cand_src; + rtx newreg; + enum machine_mode dst_mode = GET_MODE (SET_DEST (cand->expr)); /* Change the SET rtx and validate it. */ orig_src = SET_SRC (*orig_set); + cand_src = SET_SRC (cand->expr); new_set = NULL_RTX; - /* The right hand side can also be VOIDmode. These cases have to be - handled differently. */ + newreg = gen_rtx_REG (dst_mode, REGNO (SET_DEST (*orig_set))); - if (GET_MODE (orig_src) != SImode) + /* Merge constants by directly moving the constant into the + register under some conditions. */ + + if (GET_CODE (orig_src) == CONST_INT + && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (dst_mode)) { - /* Merge constants by directly moving the constant into the - DImode register under some conditions. */ - - if (GET_CODE (orig_src) == CONST_INT - && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode)) - { - if (INTVAL (orig_src) >= 0) - new_set = gen_rtx_SET (VOIDmode, newreg, orig_src); - else if (INTVAL (orig_src) < 0) - { - /* Zero-extending a negative SImode integer into DImode - makes it a positive integer. Convert the given negative - integer into the appropriate integer when zero-extended. */ - - delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode); - mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width; - val = INTVAL (orig_src); - val = val & mask; - new_const_int = gen_rtx_CONST_INT (VOIDmode, val); - new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int); - } - else - return false; - } + if (INTVAL (orig_src) >= 0 || GET_CODE (cand_src) == SIGN_EXTEND) + new_set = gen_rtx_SET (VOIDmode, newreg, orig_src); else - { - /* This is mostly due to a call insn that should not be - optimized. */ - - return false; - } + { + /* Zero-extend the negative constant by masking out the bits outside + the source mode. */ + enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set)); + new_const_int + = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode)); + new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int); + } } - else if (GET_CODE (orig_src) == ZERO_EXTEND) + else if (GET_MODE (orig_src) == VOIDmode) { - /* Here a zero-extend is used to get to SI. Why not make it - all the way till DI. */ + /* This is mostly due to a call insn that should not be + optimized. */ - temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0)); + return false; + } + else if (GET_CODE (orig_src) == GET_CODE (cand_src)) + { + /* Here is a sequence of two extensions. Try to merge them into a + single one. */ + + temp_extension + = gen_rtx_fmt_e (GET_CODE (orig_src), dst_mode, XEXP (orig_src, 0)); simplified_temp_extension = simplify_rtx (temp_extension); if (simplified_temp_extension) temp_extension = simplified_temp_extension; @@ -316,7 +362,7 @@ combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) } else if (GET_CODE (orig_src) == IF_THEN_ELSE) { - /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, + /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, in general, IF_THEN_ELSE should not be combined. */ return false; @@ -325,7 +371,8 @@ combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) { /* This is the normal case we expect. */ - temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src); + temp_extension + = gen_rtx_fmt_e (GET_CODE (cand_src), dst_mode, orig_src); simplified_temp_extension = simplify_rtx (temp_extension); if (simplified_temp_extension) temp_extension = simplified_temp_extension; @@ -334,14 +381,14 @@ combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) gcc_assert (new_set != NULL_RTX); - /* This change is a part of a group of changes. Hence, + /* This change is a part of a group of changes. Hence, validate_change will not try to commit the change. */ if (validate_change (curr_insn, orig_set, new_set, true)) { if (dump_file) { - fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n"); + fprintf (dump_file, "Merged Instruction with EXTEND:\n"); print_rtl_single (dump_file, curr_insn); } return true; @@ -349,20 +396,8 @@ combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg) return false; } -/* This returns the DI mode for the SI register REG_SI. */ - -static rtx -get_reg_di (rtx reg_si) -{ - rtx newreg; - - newreg = gen_rtx_REG (DImode, REGNO (reg_si)); - gcc_assert (newreg); - return newreg; -} - /* Treat if_then_else insns, where the operands of both branches - are registers, as copies. For instance, + are registers, as copies. For instance, Original : (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) Transformed : @@ -370,7 +405,7 @@ get_reg_di (rtx reg_si) DEF_INSN is the if_then_else insn. */ static bool -transform_ifelse (rtx def_insn) +transform_ifelse (ext_cand_ref cand, rtx def_insn) { rtx set_insn = PATTERN (def_insn); rtx srcreg, dstreg, srcreg2; @@ -378,16 +413,17 @@ transform_ifelse (rtx def_insn) rtx ifexpr; rtx cond; rtx new_set; + enum machine_mode dst_mode = GET_MODE (SET_DEST (cand->expr)); gcc_assert (GET_CODE (set_insn) == SET); cond = XEXP (SET_SRC (set_insn), 0); dstreg = SET_DEST (set_insn); srcreg = XEXP (SET_SRC (set_insn), 1); srcreg2 = XEXP (SET_SRC (set_insn), 2); - map_srcreg = get_reg_di (srcreg); - map_srcreg2 = get_reg_di (srcreg2); - map_dstreg = get_reg_di (dstreg); - ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2); + map_srcreg = gen_rtx_REG (dst_mode, REGNO (srcreg)); + map_srcreg2 = gen_rtx_REG (dst_mode, REGNO (srcreg2)); + map_dstreg = gen_rtx_REG (dst_mode, REGNO (dstreg)); + ifexpr = gen_rtx_IF_THEN_ELSE (dst_mode, cond, map_srcreg, map_srcreg2); new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr); if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)) @@ -458,7 +494,7 @@ get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest) } /* rtx function to check if this SET insn, EXPR, is a conditional copy insn : - (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c))) + (set (reg a ) (IF_THEN_ELSE (cond) (reg b) (reg c))) Called from is_insn_cond_copy. DATA stores the two registers on each side of the condition. */ @@ -469,12 +505,9 @@ is_this_a_cmove (rtx expr, void *data) if (GET_CODE (expr) == SET && GET_CODE (SET_DEST (expr)) == REG - && GET_MODE (SET_DEST (expr)) == SImode && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG - && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode - && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG - && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode) + && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG) { ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1); ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2); @@ -484,7 +517,7 @@ is_this_a_cmove (rtx expr, void *data) } /* This returns 1 if it found - (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2))) + (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2))) in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1 and COPY_REG_2. */ @@ -515,22 +548,22 @@ is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2) return 0; } -/* Reaching Definitions of the zero-extended register could be conditional - copies or regular definitions. This function separates the two types into - two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a - reaching definition is a conditional copy, combining the zero_extend with - this definition is wrong. Conditional copies are merged by transitively - merging its definitions. The defs_list is populated with all the reaching - definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must - be merged with a zero_extend. The copies_list contains all the conditional - moves that will later be extended into a DI mode conditonal move if all the - merges are successful. The function returns false when there is a failure - in getting some definitions, like that of parameters. It returns 1 upon - success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN - were merged previously. */ +/* Reaching Definitions of the extended register could be conditional copies + or regular definitions. This function separates the two types into two + lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a reaching + definition is a conditional copy, combining the extend with this definition + is wrong. Conditional copies are merged by transitively merging its + definitions. The defs_list is populated with all the reaching definitions + of the extension instruction (EXTEND_INSN) which must be merged with an + extension. The copies_list contains all the conditional moves that will + later be extended into a wider mode conditonal move if all the merges are + successful. The function returns false when there is a failure in getting + some definitions, like that of parameters. It returns 1 upon success, 0 + upon failure and 2 when all definitions of the EXTEND_INSN were merged + previously. */ static int -make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat, +make_defs_and_copies_lists (rtx extend_insn, rtx set_pat, VEC (rtx,heap) **defs_list, VEC (rtx,heap) **copies_list) { @@ -547,7 +580,7 @@ make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat, work_list = VEC_alloc (rtx, heap, 8); /* Initialize the Work List */ - n_worklist = get_defs (zero_extend_insn, srcreg, &work_list); + n_worklist = get_defs (extend_insn, srcreg, &work_list); if (n_worklist == 0) { @@ -619,18 +652,19 @@ make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat, return 1; } -/* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend +/* Merge the DEF_INSN with an extension. Calls combine_set_extend on the SET pattern. */ static bool -merge_def_and_ze (rtx def_insn) +merge_def_and_ext (ext_cand_ref cand, rtx def_insn) { + enum machine_mode ext_src_mode; enum rtx_code code; - rtx setreg; rtx *sub_rtx; rtx s_expr; int i; + ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0)); code = GET_CODE (PATTERN (def_insn)); sub_rtx = NULL; @@ -662,27 +696,25 @@ merge_def_and_ze (rtx def_insn) gcc_assert (sub_rtx != NULL); if (GET_CODE (SET_DEST (*sub_rtx)) == REG - && GET_MODE (SET_DEST (*sub_rtx)) == SImode) + && GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode) { - setreg = get_reg_di (SET_DEST (*sub_rtx)); - return combine_set_zero_extend (def_insn, sub_rtx, setreg); + return combine_set_extend (cand, def_insn, sub_rtx); } - else - return false; - return true; + + return false; } /* This function goes through all reaching defs of the source - of the zero extension instruction (ZERO_EXTEND_INSN) and - tries to combine the zero extension with the definition - instruction. The changes are made as a group so that even - if one definition cannot be merged, all reaching definitions - end up not being merged. When a conditional copy is encountered, - merging is attempted transitively on its definitions. It returns - true upon success and false upon failure. */ + of the candidate for elimination (CAND) and tries to combine + the extension with the definition instruction. The changes + are made as a group so that even if one definition cannot be + merged, all reaching definitions end up not being merged. + When a conditional copy is encountered, merging is attempted + transitively on its definitions. It returns true upon success + and false upon failure. */ static bool -combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) +combine_reaching_defs (ext_cand_ref cand, rtx set_pat) { rtx def_insn; bool merge_successful = true; @@ -698,11 +730,11 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) defs_list = VEC_alloc (rtx, heap, 8); copies_list = VEC_alloc (rtx, heap, 8); - outcome = make_defs_and_copies_lists (zero_extend_insn, + outcome = make_defs_and_copies_lists (cand->insn, set_pat, &defs_list, &copies_list); - /* outcome == 2 implies that all the definitions for this zero_extend were - merged while previously when handling other zero_extends. */ + /* outcome == 2 implies that all the definitions for this extension were + merged while previously when handling other extension. */ if (outcome == 2) { @@ -731,7 +763,7 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) merge_code = get_insn_status (def_insn); gcc_assert (merge_code == MERGE_NOT_ATTEMPTED); - if (merge_def_and_ze (def_insn)) + if (merge_def_and_ext (cand, def_insn)) VEC_safe_push (rtx, heap, vec, def_insn); else { @@ -747,7 +779,7 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) { FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn) { - if (transform_ifelse (def_insn)) + if (transform_ifelse (cand, def_insn)) { VEC_safe_push (rtx, heap, vec, def_insn); } @@ -763,7 +795,7 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) { /* Commit the changes here if possible */ /* XXX : Now, it is an all or nothing scenario. Even if one definition - cannot be merged we totally bail. In future, allow zero-extensions to + cannot be merged we totally fail. In future, allow extensions to be partially eliminated along those paths where the definitions could be merged. */ @@ -786,7 +818,7 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) { /* Changes need not be cancelled explicitly as apply_change_group does it. Print list of definitions in the dump_file for debug - purposes. This zero-extension cannot be deleted. */ + purposes. This extension cannot be deleted. */ if (dump_file) { @@ -810,27 +842,36 @@ combine_reaching_defs (rtx zero_extend_insn, rtx set_pat) return false; } -/* Carry information about zero-extensions while walking the RTL. */ +/* Carry information about extensions while walking the RTL. */ -struct zero_extend_info +struct extend_info { - /* The insn where the zero-extension is. */ + /* The insn where the extension is. */ rtx insn; /* The list of candidates. */ - VEC (rtx, heap) *insn_list; + VEC (ext_cand, heap) *insn_list; }; -/* Add a zero-extend pattern that could be eliminated. This is called via - note_stores from find_removable_zero_extends. */ +static void +add_ext_candidate (VEC (ext_cand, heap) **exts, + rtx insn, const_rtx expr) +{ + ext_cand_ref ec = VEC_safe_push (ext_cand, heap, *exts, NULL); + ec->insn = insn; + ec->expr = expr; +} + +/* Add an extension pattern that could be eliminated. This is called via + note_stores from find_removable_extensions. */ static void -add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data) +add_removable_extension (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data) { - struct zero_extend_info *zei = (struct zero_extend_info *)data; + struct extend_info *rei = (struct extend_info *)data; rtx src, dest; - /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)). */ + /* We are looking for SET (REG N) (EXTEND (REG N)). */ if (GET_CODE (expr) != SET) return; @@ -838,34 +879,32 @@ add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data) dest = SET_DEST (expr); if (REG_P (dest) - && GET_MODE (dest) == DImode - && GET_CODE (src) == ZERO_EXTEND + && (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND) && REG_P (XEXP (src, 0)) - && GET_MODE (XEXP (src, 0)) == SImode && REGNO (dest) == REGNO (XEXP (src, 0))) { - if (get_defs (zei->insn, XEXP (src, 0), NULL)) - VEC_safe_push (rtx, heap, zei->insn_list, zei->insn); + if (get_defs (rei->insn, XEXP (src, 0), NULL)) + add_ext_candidate (&rei->insn_list, rei->insn, expr); else if (dump_file) { - fprintf (dump_file, "Cannot eliminate zero-extension: \n"); - print_rtl_single (dump_file, zei->insn); + fprintf (dump_file, "Cannot eliminate extension: \n"); + print_rtl_single (dump_file, rei->insn); fprintf (dump_file, "No defs. Could be extending parameters.\n"); } } } -/* Traverse the instruction stream looking for zero-extends and return the +/* Traverse the instruction stream looking for extensions and return the list of candidates. */ -static VEC (rtx,heap)* -find_removable_zero_extends (void) +static VEC (ext_cand, heap)* +find_removable_extensions (void) { - struct zero_extend_info zei; + struct extend_info rei; basic_block bb; rtx insn; - zei.insn_list = VEC_alloc (rtx, heap, 8); + rei.insn_list = VEC_alloc (ext_cand, heap, 8); FOR_EACH_BB (bb) FOR_BB_INSNS (bb, insn) @@ -873,29 +912,30 @@ find_removable_zero_extends (void) if (!NONDEBUG_INSN_P (insn)) continue; - zei.insn = insn; - note_stores (PATTERN (insn), add_removable_zero_extend, &zei); + rei.insn = insn; + note_stores (PATTERN (insn), add_removable_extension, &rei); } - return zei.insn_list; + return rei.insn_list; } /* This is the main function that checks the insn stream for redundant - zero extensions and tries to remove them if possible. */ + extensions and tries to remove them if possible. */ static unsigned int -find_and_remove_ze (void) +find_and_remove_re (void) { + ext_cand_ref curr_cand; rtx curr_insn = NULL_RTX; int i; int ix; long num_realized = 0; - long num_ze_opportunities = 0; - VEC (rtx, heap) *zeinsn_list; - VEC (rtx, heap) *zeinsn_del_list; + long num_re_opportunities = 0; + VEC (ext_cand, heap) *reinsn_list; + VEC (rtx, heap) *reinsn_del_list; /* Construct DU chain to get all reaching definitions of each - zero-extension instruction. */ + extension instruction. */ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); df_analyze (); @@ -909,80 +949,80 @@ find_and_remove_ze (void) for (i = 0; i < max_insn_uid; i++) is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED; - num_ze_opportunities = num_realized = 0; + num_re_opportunities = num_realized = 0; - zeinsn_del_list = VEC_alloc (rtx, heap, 4); + reinsn_del_list = VEC_alloc (rtx, heap, 4); - zeinsn_list = find_removable_zero_extends (); + reinsn_list = find_removable_extensions (); - FOR_EACH_VEC_ELT (rtx, zeinsn_list, ix, curr_insn) + FOR_EACH_VEC_ELT (ext_cand, reinsn_list, ix, curr_cand) { - num_ze_opportunities++; - /* Try to combine the zero-extends with the definition here. */ + num_re_opportunities++; + /* Try to combine the extension with the definition here. */ if (dump_file) { - fprintf (dump_file, "Trying to eliminate zero extension : \n"); + fprintf (dump_file, "Trying to eliminate extension : \n"); print_rtl_single (dump_file, curr_insn); } - if (combine_reaching_defs (curr_insn, PATTERN (curr_insn))) + if (combine_reaching_defs (curr_cand, PATTERN (curr_cand->insn))) { if (dump_file) - fprintf (dump_file, "Eliminated the zero extension...\n"); + fprintf (dump_file, "Eliminated the extension...\n"); num_realized++; - VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn); + VEC_safe_push (rtx, heap, reinsn_del_list, curr_cand->insn); } } - /* Delete all useless zero extensions here in one sweep. */ - FOR_EACH_VEC_ELT (rtx, zeinsn_del_list, ix, curr_insn) + /* Delete all useless extensions here in one sweep. */ + FOR_EACH_VEC_ELT (rtx, reinsn_del_list, ix, curr_insn) delete_insn (curr_insn); free (is_insn_merge_attempted); - VEC_free (rtx, heap, zeinsn_list); - VEC_free (rtx, heap, zeinsn_del_list); + VEC_free (ext_cand, heap, reinsn_list); + VEC_free (rtx, heap, reinsn_del_list); - if (dump_file && num_ze_opportunities > 0) - fprintf (dump_file, "\n %s : num_zee_opportunities = %ld" - " num_realized = %ld\n", - current_function_name (), num_ze_opportunities, - num_realized); + if (dump_file && num_re_opportunities > 0) + fprintf (dump_file, "\n %s : num_re_opportunities = %ld " + "num_realized = %ld \n", + current_function_name (), + num_re_opportunities, num_realized); df_finish_pass (false); return 0; } -/* Find and remove redundant zero extensions. */ +/* Find and remove redundant extensions. */ static unsigned int -rest_of_handle_zee (void) +rest_of_handle_ree (void) { - timevar_push (TV_ZEE); - find_and_remove_ze (); - timevar_pop (TV_ZEE); + timevar_push (TV_REE); + find_and_remove_re (); + timevar_pop (TV_REE); return 0; } -/* Run zee pass when flag_zee is set at optimization level > 0. */ +/* Run REE pass when flag_ree is set at optimization level > 0. */ static bool -gate_handle_zee (void) +gate_handle_ree (void) { - return (optimize > 0 && flag_zee); + return (optimize > 0 && flag_ree); } -struct rtl_opt_pass pass_implicit_zee = +struct rtl_opt_pass pass_ree = { { RTL_PASS, - "zee", /* name */ - gate_handle_zee, /* gate */ - rest_of_handle_zee, /* execute */ + "ree", /* name */ + gate_handle_ree, /* gate */ + rest_of_handle_ree, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ - TV_ZEE, /* tv_id */ + TV_REE, /* tv_id */ 0, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 629c263a1f41..713b5609ff17 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2011-12-21 Enkovich Ilya + + PR target/50038 + * gcc.dg/pr50038.c: New test. + 2011-12-20 Paolo Carlini PR c++/51621 diff --git a/gcc/testsuite/gcc.dg/pr50038.c b/gcc/testsuite/gcc.dg/pr50038.c new file mode 100644 index 000000000000..4d1c8f71ffd7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr50038.c @@ -0,0 +1,20 @@ +/* PR target/50038 */ +/* { dg-do compile { target x86_64-*-* } } */ +/* { dg-options "-O2" } */ + +void pr50038(int len, unsigned char *in, unsigned char *out) +{ + int i; + unsigned char xr, xg; + unsigned char xy=0; + for (i = 0; i < len; i++) + { + xr = *in++; + xg = *in++; + xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); + + *out++ = xy; + } +} + +/* { dg-final { scan-assembler-times "movzbl" 2 } } */ diff --git a/gcc/timevar.def b/gcc/timevar.def index d4a60fceefad..1f5ea2a81bd1 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -230,7 +230,7 @@ DEFTIMEVAR (TV_RELOAD , "reload") DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") DEFTIMEVAR (TV_SEQABSTR , "sequence abstraction") DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") -DEFTIMEVAR (TV_ZEE , "zee") +DEFTIMEVAR (TV_REE , "ree") DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue") DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index cc8847e8dff3..984df376e041 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -530,7 +530,7 @@ extern struct rtl_opt_pass pass_stack_ptr_mod; extern struct rtl_opt_pass pass_initialize_regs; extern struct rtl_opt_pass pass_combine; extern struct rtl_opt_pass pass_if_after_combine; -extern struct rtl_opt_pass pass_implicit_zee; +extern struct rtl_opt_pass pass_ree; extern struct rtl_opt_pass pass_partition_blocks; extern struct rtl_opt_pass pass_match_asm_constraints; extern struct rtl_opt_pass pass_regmove;