From fe94440235cfaa57ee1c18abfde29c20fa3ff863 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <uros@gcc.gnu.org>
Date: Wed, 18 Oct 2017 22:19:05 +0200
Subject: [PATCH] re PR target/82580 (Optimize comparisons for __int128 on
 x86-64)

	PR target/82580
	* config/i386/i386-modes.def (CCGZ): New CC mode.
	* config/i386/i386.md (sub<mode>3_carry_ccgz): New insn pattern.
	* config/i386/predicates.md (ix86_comparison_operator):
	Handle CCGZmode.
	* config/i386/i386.c (ix86_expand_branch) <case E_TImode>:
	Emulate LE, LEU, GT, GTU, LT, LTU, GE and GEU double-word comparisons
	with double-word subtraction.
	(put_condition_code): Handle CCGZmode.

testsuite/ChangeLog:

	PR target/82580
	* gcc.target/i386/pr82580.c: New test.

From-SVN: r253867
---
 gcc/ChangeLog                           | 16 ++++++-
 gcc/config/i386/i386-modes.def          | 10 +++--
 gcc/config/i386/i386.c                  | 58 +++++++++++++++++++++++--
 gcc/config/i386/i386.md                 | 13 ++++++
 gcc/config/i386/predicates.md           | 14 ++++--
 gcc/testsuite/ChangeLog                 | 10 ++++-
 gcc/testsuite/gcc.target/i386/pr82580.c | 38 ++++++++++++++++
 7 files changed, 145 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82580.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index da95f722c8e2..9ddb3fc5724a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2017-10-18  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/82580
+	* config/i386/i386-modes.def (CCGZ): New CC mode.
+	* config/i386/i386.md (sub<mode>3_carry_ccgz): New insn pattern.
+	* config/i386/predicates.md (ix86_comparison_operator):
+	Handle CCGZmode.
+	* config/i386/i386.c (ix86_expand_branch) <case E_TImode>:
+	Emulate LE, LEU, GT, GTU, LT, LTU, GE and GEU double-word comparisons
+	with double-word subtraction.
+	(put_condition_code): Handle CCGZmode.
+
 2017-10-18  Aldy Hernandez  <aldyh@redhat.com>
 
 	* wide-int.cc (debug) [const wide_int &]: New.
@@ -195,8 +207,8 @@
 2017-10-17  Qing Zhao <qing.zhao@oracle.com>
 	    Wilco Dijkstra <wilco.dijkstra@arm.com>
 
-        * builtins.c (expand_builtin_update_setjmp_buf): Add a
-        converstion to Pmode from the buf_addr.
+	* builtins.c (expand_builtin_update_setjmp_buf): Add a
+	converstion to Pmode from the buf_addr.
 
 2017-10-17  Richard Biener  <rguenther@suse.de>
 
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 83216e38758d..16bc1d8b71a8 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -39,19 +39,22 @@ ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
    For the i386, we need separate modes when floating-point
    equality comparisons are being done.
 
-   Add CCNO to indicate comparisons against zero that requires
+   Add CCNO to indicate comparisons against zero that require
    Overflow flag to be unset.  Sign bit test is used instead and
    thus can be used to form "a&b>0" type of tests.
 
-   Add CCGC to indicate comparisons against zero that allows
+   Add CCGC to indicate comparisons against zero that allow
    unspecified garbage in the Carry flag.  This mode is used
    by inc/dec instructions.
 
-   Add CCGOC to indicate comparisons against zero that allows
+   Add CCGOC to indicate comparisons against zero that allow
    unspecified garbage in the Carry and Overflow flag. This
    mode is used to simulate comparisons of (a-b) and (a+b)
    against zero using sub/cmp/add operations.
 
+   Add CCGZ to indicate comparisons that allow unspecified garbage
+   in the Zero flag.  This mode is used in double-word comparisons.
+
    Add CCA to indicate that only the Above flag is valid.
    Add CCC to indicate that only the Carry flag is valid.
    Add CCO to indicate that only the Overflow flag is valid.
@@ -62,6 +65,7 @@ ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
 CC_MODE (CCGC);
 CC_MODE (CCGOC);
 CC_MODE (CCNO);
+CC_MODE (CCGZ);
 CC_MODE (CCA);
 CC_MODE (CCC);
 CC_MODE (CCO);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 79eb4b5ea2c5..16cc7dbaba9d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16732,6 +16732,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
   switch (code)
     {
     case EQ:
+      gcc_assert (mode != CCGZmode);
       switch (mode)
 	{
 	case E_CCAmode:
@@ -16755,6 +16756,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 	}
       break;
     case NE:
+      gcc_assert (mode != CCGZmode);
       switch (mode)
 	{
 	case E_CCAmode:
@@ -16799,6 +16801,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 
 	case E_CCmode:
 	case E_CCGCmode:
+	case E_CCGZmode:
 	  suffix = "l";
 	  break;
 
@@ -16807,7 +16810,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 	}
       break;
     case LTU:
-      if (mode == CCmode)
+      if (mode == CCmode || mode == CCGZmode)
 	suffix = "b";
       else if (mode == CCCmode)
 	suffix = fp ? "b" : "c";
@@ -16824,6 +16827,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 
 	case E_CCmode:
 	case E_CCGCmode:
+	case E_CCGZmode:
 	  suffix = "ge";
 	  break;
 
@@ -16832,7 +16836,7 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 	}
       break;
     case GEU:
-      if (mode == CCmode)
+      if (mode == CCmode || mode == CCGZmode)
 	suffix = "nb";
       else if (mode == CCCmode)
 	suffix = fp ? "nb" : "nc";
@@ -18887,7 +18891,7 @@ output_fp_compare (rtx_insn *insn, rtx *operands,
 
   static char buf[40];
   const char *p, *r;
- 
+
   gcc_assert (STACK_TOP_P (xops[0]));
 
   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
@@ -21469,6 +21473,8 @@ ix86_match_ccmode (rtx insn, machine_mode req_mode)
     case E_CCZmode:
       break;
 
+    case E_CCGZmode:
+
     case E_CCAmode:
     case E_CCCmode:
     case E_CCOmode:
@@ -22177,6 +22183,52 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
 	      break;
 	    }
 
+	/* Emulate comparisons that do not depend on Zero flag with
+	   double-word subtraction.  Note that only Overflow, Sign
+	   and Carry flags are valid, so swap arguments and condition
+	   of comparisons that would otherwise test Zero flag.  */
+
+	switch (code)
+	  {
+	  case LE: case LEU: case GT: case GTU:
+	    std::swap (lo[0], lo[1]);
+	    std::swap (hi[0], hi[1]);
+	    code = swap_condition (code);
+	    /* FALLTHRU */
+
+	  case LT: case LTU: case GE: case GEU:
+	    {
+	      rtx (*cmp_insn) (rtx, rtx);
+	      rtx (*sbb_insn) (rtx, rtx, rtx);
+
+	      if (TARGET_64BIT)
+		cmp_insn = gen_cmpdi_1, sbb_insn = gen_subdi3_carry_ccgz;
+	      else
+		cmp_insn = gen_cmpsi_1, sbb_insn = gen_subsi3_carry_ccgz;
+
+	      if (!nonimmediate_operand (lo[0], submode))
+		lo[0] = force_reg (submode, lo[0]);
+	      if (!x86_64_general_operand (lo[1], submode))
+		lo[1] = force_reg (submode, lo[1]);
+
+	      if (!register_operand (hi[0], submode))
+		hi[0] = force_reg (submode, hi[0]);
+	      if (!x86_64_general_operand (hi[1], submode))
+		hi[1] = force_reg (submode, hi[1]);
+
+	      emit_insn (cmp_insn (lo[0], lo[1]));
+	      emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
+
+	      tmp = gen_rtx_REG (CCGZmode, FLAGS_REG);
+
+	      ix86_expand_branch (code, tmp, const0_rtx, label);
+	      return;
+	    }
+
+	  default:
+	    break;
+	  }
+
 	/* Otherwise, we need two or three jumps.  */
 
 	label2 = gen_label_rtx ();
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 512bd64c3c30..57a90dbe041d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6871,6 +6871,19 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+(define_insn "sub<mode>3_carry_ccgz"
+  [(set (reg:CCGZ FLAGS_REG)
+	(compare:CCGZ
+	  (match_operand:DWIH 1 "register_operand" "0")
+	  (plus:DWIH
+	    (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+	    (match_operand:DWIH 2 "x86_64_general_operand" "rme"))))
+   (clobber (match_scratch:DWIH 0 "=r"))]
+  ""
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "subborrow<mode>"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 0917fad15d41..4f3f1560f458 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1329,14 +1329,20 @@
   switch (code)
     {
     case EQ: case NE:
+      if (inmode == CCGZmode)
+	return false;
       return true;
-    case LT: case GE:
+    case GE: case LT:
       if (inmode == CCmode || inmode == CCGCmode
-	  || inmode == CCGOCmode || inmode == CCNOmode)
+	  || inmode == CCGOCmode || inmode == CCNOmode || inmode == CCGZmode)
 	return true;
       return false;
-    case LTU: case GTU: case LEU: case GEU:
-      if (inmode == CCmode || inmode == CCCmode)
+    case GEU: case LTU:
+      if (inmode == CCGZmode)
+	return true;
+      /* FALLTHRU */
+    case GTU: case LEU:
+      if (inmode == CCmode || inmode == CCCmode || inmode == CCGZmode)
 	return true;
       return false;
     case ORDERED: case UNORDERED:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7009460ec649..c44b420782f1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-10-18  Uros Bizjak  <ubizjak@gmail.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/82580
+	* gcc.target/i386/pr82580.c: New test.
+
 2017-10-18  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR libfortran/82233
@@ -91,8 +97,8 @@
 2017-10-17  Qing Zhao <qing.zhao@oracle.com>
 	    Wilco Dijkstra <wilco.dijkstra@arm.com>
 
-        PR middle-end/80295
-        * gcc.target/aarch64/pr80295.c: New test.
+	PR middle-end/80295
+	* gcc.target/aarch64/pr80295.c: New test.
 
 2017-10-17  Richard Biener  <rguenther@suse.de>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82580.c b/gcc/testsuite/gcc.target/i386/pr82580.c
new file mode 100644
index 000000000000..ce4bf9230a8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82580.c
@@ -0,0 +1,38 @@
+/* PR target/82580 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#ifdef __SIZEOF_INT128__
+typedef unsigned __int128 U;
+typedef signed __int128 S;
+#else
+typedef unsigned long long U;
+typedef signed long long S;
+#endif
+void bar (void);
+int f0 (U x, U y) { return x == y; }
+int f1 (U x, U y) { return x != y; }
+int f2 (U x, U y) { return x > y; }
+int f3 (U x, U y) { return x >= y; }
+int f4 (U x, U y) { return x < y; }
+int f5 (U x, U y) { return x <= y; }
+int f6 (S x, S y) { return x == y; }
+int f7 (S x, S y) { return x != y; }
+int f8 (S x, S y) { return x > y; }
+int f9 (S x, S y) { return x >= y; }
+int f10 (S x, S y) { return x < y; }
+int f11 (S x, S y) { return x <= y; }
+void f12 (U x, U y) { if (x == y) bar (); }
+void f13 (U x, U y) { if (x != y) bar (); }
+void f14 (U x, U y) { if (x > y) bar (); }
+void f15 (U x, U y) { if (x >= y) bar (); }
+void f16 (U x, U y) { if (x < y) bar (); }
+void f17 (U x, U y) { if (x <= y) bar (); }
+void f18 (S x, S y) { if (x == y) bar (); }
+void f19 (S x, S y) { if (x != y) bar (); }
+void f20 (S x, S y) { if (x > y) bar (); }
+void f21 (S x, S y) { if (x >= y) bar (); }
+void f22 (S x, S y) { if (x < y) bar (); }
+void f23 (S x, S y) { if (x <= y) bar (); }
+
+/* { dg-final { scan-assembler-times "sbb" 16 } } */