re PR rtl-optimization/67609 (Generates wrong code for SSE2 _mm_load_pd)

PR rtl-opt/67609 * config/i386/i386.c (ix86_cannot_change_mode_class): Disallow narrowing subregs on SSE and MMX registers. * doc/tm.texi.in (CANNOT_CHANGE_MODE_CLASS): Clarify when subregs that appear to be sub-words of multi-register pseudos must be rejected. * doc/tm.texi: Regenerate. testsuite/ * gcc.target/i386/pr67609-2.c: New test. From-SVN: r229458
2025-04-19 18:10:54 +08:00 · 2015-10-27 12:59:41 -07:00 · 2015-10-27 12:59:41 -07:00 · a271b387e6
commit a271b387e6
parent 7263fa9ff1
6 changed files with 84 additions and 12 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,12 @@
+2015-10-27  Richard Henderson  <rth@redhat.com>
+
+	PR rtl-opt/67609
+	* config/i386/i386.c (ix86_cannot_change_mode_class): Disallow
+	narrowing subregs on SSE and MMX registers.
+	* doc/tm.texi.in (CANNOT_CHANGE_MODE_CLASS): Clarify when subregs that
+	appear to be sub-words of multi-register pseudos must be rejected.
+	* doc/tm.texi: Regenerate.
+
 2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

 	PR target/68102
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@ -43031,15 +43031,22 @@ ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
  if (MAYBE_FLOAT_CLASS_P (regclass))
    return true;

+  /* Vector registers do not support QI or HImode loads.  If we don't
+     disallow a change to these modes, reload will assume it's ok to
+     drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
+     the vec_dupv4hi pattern.
+
+     Further, we cannot allow word_mode subregs of full vector modes.
+     Otherwise the middle-end will assume it's ok to store to
+     (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
+     of the 128-bit register.  However, after reload the subreg will
+     be dropped leaving a plain DImode store.  This is indistinguishable
+     from a "normal" DImode move, and so we're justified to use movsd,
+     which modifies the entire 128-bit register.
+
+     Combining these two conditions, disallow all narrowing mode changes.  */
  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
-    {
-      /* Vector registers do not support QI or HImode loads.  If we don't
-	 disallow a change to these modes, reload will assume it's ok to
-	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
-	 the vec_dupv4hi pattern.  */
-      if (GET_MODE_SIZE (from) < 4)
-	return true;
-    }
+    return GET_MODE_SIZE (to) < GET_MODE_SIZE (from);

  return false;
 }
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@ -2823,8 +2823,8 @@ in the reload pass.
 If defined, a C expression that returns nonzero for a @var{class} for which
 a change from mode @var{from} to mode @var{to} is invalid.

-For the example, loading 32-bit integer or floating-point objects into
-floating-point registers on the Alpha extends them to 64 bits.
+For example, loading 32-bit integer or floating-point objects into
+floating-point registers on Alpha extends them to 64 bits.
 Therefore loading a 64-bit object and then storing it as a 32-bit object
 does not store the low-order 32 bits, as would be the case for a normal
 register.  Therefore, @file{alpha.h} defines @code{CANNOT_CHANGE_MODE_CLASS}
@ -2835,6 +2835,17 @@ as below:
  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
   ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0)
@end smallexample
+
+Even if storing from a register in mode @var{to} would be valid,
+if both @var{from} and @code{raw_reg_mode} for @var{class} are wider
+than @code{word_mode}, then we must prevent @var{to} narrowing the
+mode.  This happens when the middle-end assumes that it can load
+or store pieces of an @var{N}-word pseudo, and that the pseudo will
+eventually be allocated to @var{N} @code{word_mode} hard registers.
+Failure to prevent this kind of mode change will result in the
+entire @code{raw_reg_mode} being modified instead of the partial
+value that the middle-end intended.
+
@end defmac

@deftypefn {Target Hook} reg_class_t TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS (int, @var{reg_class_t})
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@ -2461,8 +2461,8 @@ in the reload pass.
 If defined, a C expression that returns nonzero for a @var{class} for which
 a change from mode @var{from} to mode @var{to} is invalid.

-For the example, loading 32-bit integer or floating-point objects into
-floating-point registers on the Alpha extends them to 64 bits.
+For example, loading 32-bit integer or floating-point objects into
+floating-point registers on Alpha extends them to 64 bits.
 Therefore loading a 64-bit object and then storing it as a 32-bit object
 does not store the low-order 32 bits, as would be the case for a normal
 register.  Therefore, @file{alpha.h} defines @code{CANNOT_CHANGE_MODE_CLASS}
@ -2473,6 +2473,17 @@ as below:
  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
   ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0)
@end smallexample
+
+Even if storing from a register in mode @var{to} would be valid,
+if both @var{from} and @code{raw_reg_mode} for @var{class} are wider
+than @code{word_mode}, then we must prevent @var{to} narrowing the
+mode.  This happens when the middle-end assumes that it can load
+or store pieces of an @var{N}-word pseudo, and that the pseudo will
+eventually be allocated to @var{N} @code{word_mode} hard registers.
+Failure to prevent this kind of mode change will result in the
+entire @code{raw_reg_mode} being modified instead of the partial
+value that the middle-end intended.
+
@end defmac

@hook TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2015-10-27  Richard Henderson  <rth@redhat.com>
+
+	PR rtl-opt/67609
+	* gcc.target/i386/pr67609-2.c: New test.
+
 2015-10-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

 	PR target/68102
--- a/gcc/testsuite/gcc.target/i386/pr67609-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr67609-2.c
@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include <stdlib.h>
+#include <emmintrin.h>
+
+__m128d reg = { 2.0, 4.0 };
+
+void
+__attribute__((noinline))
+set_lower (double b)
+{
+  double v[2];
+  _mm_store_pd(v, reg);
+  v[0] = b;
+  reg = _mm_load_pd(v);
+}
+
+int
+main ()
+{
+  set_lower (6.0);
+
+  if (reg[1] != 4.0)
+    abort ();
+
+  return 0;
+}