S/390 Add vector scalar instruction support.

With this patch GCC makes use of the vector instruction which are available in single element mode. By using these instructions scalar double operations can use 32 registers. gcc/ * config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and CCVFHE. * config/s390/s390.c (s390_match_ccmode_set): Handle new modes. (s390_select_ccmode): Likewise. (s390_canonicalize_comparison): Swap operands if necessary. (s390_expand_vec_compare_scalar): Expand DFmode compare using single element vector instructions. (s390_emit_compare): Call s390_expand_vec_compare_scalar. (s390_branch_condition_mask): Generate CC masks for the new modes. * config/s390/s390.md (v0, vf, vd): New mode attributes. (VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes. (*vec_cmp<insn_cmp>df_cconly, *fixuns_truncdfdi2_z13) (*fix_trunc<BFP:mode><GPR:mode>2_bfp, *floatunsdidf2_z13) (*floatuns<GPR:mode><FP:mode>2, *extendsfdf2_z13) (*extend<DSF:mode><BFP:mode>2): New insn definition. (fix_trunc<BFP:mode><GPR:mode>2_bfp, loatuns<GPR:mode><FP:mode>2) (extend<DSF:mode><BFP:mode>2): Turn into expander. (floatdi<mode>2, truncdfsf2, add<mode>3, sub<mode>3, mul<mode>3) (div<mode>3, *neg<mode>2, *abs<mode>2, *negabs<mode>2) (sqrt<mode>2): Add vector instruction. gcc/testsuite/ * gcc.target/s390/vector/vec-scalar-cmp-1.c: New test. From-SVN: r223397
2025-04-19 06:20:27 +08:00 · 2015-05-19 17:30:25 +00:00 · 2015-05-19 17:30:25 +00:00 · 6e5b5de88b
commit 6e5b5de88b
parent 91b019a388
6 changed files with 430 additions and 92 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,26 @@
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and
+	CCVFHE.
+	* config/s390/s390.c (s390_match_ccmode_set): Handle new modes.
+	(s390_select_ccmode): Likewise.
+	(s390_canonicalize_comparison): Swap operands if necessary.
+	(s390_expand_vec_compare_scalar): Expand DFmode compare using
+	single element vector instructions.
+	(s390_emit_compare): Call s390_expand_vec_compare_scalar.
+	(s390_branch_condition_mask): Generate CC masks for the new modes.
+	* config/s390/s390.md (v0, vf, vd): New mode attributes.
+	(VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes.
+	(*vec_cmp<insn_cmp>df_cconly, *fixuns_truncdfdi2_z13)
+	(*fix_trunc<BFP:mode><GPR:mode>2_bfp, *floatunsdidf2_z13)
+	(*floatuns<GPR:mode><FP:mode>2, *extendsfdf2_z13)
+	(*extend<DSF:mode><BFP:mode>2): New insn definition.
+	(fix_trunc<BFP:mode><GPR:mode>2_bfp, loatuns<GPR:mode><FP:mode>2)
+	(extend<DSF:mode><BFP:mode>2): Turn into expander.
+	(floatdi<mode>2, truncdfsf2, add<mode>3, sub<mode>3, mul<mode>3)
+	(div<mode>3, *neg<mode>2, *abs<mode>2, *negabs<mode>2)
+	(sqrt<mode>2): Add vector instruction.
+
 2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

 	* config/s390/constraints.md (j00, jm1, jxx, jyy, v): New
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@ -84,7 +84,12 @@ Requested mode            -> Destination CC register mode
 CCS, CCU, CCT, CCSR, CCUR -> CCZ
 CCA                       -> CCAP, CCAN

+Vector comparison modes

+CCVEQ  	  EQ	  - 	       - 	   NE	      (VCEQ)
+
+CCVFH	  GT	  -   	       -   	   UNLE	      (VFCH)
+CCVFHE	  GE	  -   	       -   	   UNLT	      (VFCHE)
 *** Comments ***

 CCAP, CCAN
@ -182,6 +187,11 @@ CC_MODE (CCT2);
 CC_MODE (CCT3);
 CC_MODE (CCRAW);

+CC_MODE (CCVEQ);
+CC_MODE (CCVFH);
+CC_MODE (CCVFHE);
+
+
 /* Vector modes.  */

 VECTOR_MODES (INT, 2);        /*                 V2QI */
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@ -681,6 +681,9 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
    case CCT1mode:
    case CCT2mode:
    case CCT3mode:
+    case CCVEQmode:
+    case CCVFHmode:
+    case CCVFHEmode:
      if (req_mode != set_mode)
        return 0;
      break;
@ -781,6 +784,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
 machine_mode
 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
 {
+  if (TARGET_VX
+      && register_operand (op0, DFmode)
+      && register_operand (op1, DFmode))
+    {
+      /* LT, LE, UNGT, UNGE require swapping OP0 and OP1.  Either
+	 s390_emit_compare or s390_canonicalize_comparison will take
+	 care of it.  */
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	  return CCVEQmode;
+	case GT:
+	case UNLE:
+	  return CCVFHmode;
+	case GE:
+	case UNLT:
+	  return CCVFHEmode;
+	default:
+	  ;
+	}
+    }
+
  switch (code)
    {
      case EQ:
@ -1058,8 +1084,73 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
      rtx tem = *op0; *op0 = *op1; *op1 = tem;
      *code = (int)swap_condition ((enum rtx_code)*code);
    }
+
+  /* Using the scalar variants of vector instructions for 64 bit FP
+     comparisons might require swapping the operands.  */
+  if (TARGET_VX
+      && register_operand (*op0, DFmode)
+      && register_operand (*op1, DFmode)
+      && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
+    {
+      rtx tmp;
+
+      switch (*code)
+	{
+	case LT:   *code = GT; break;
+	case LE:   *code = GE; break;
+	case UNGT: *code = UNLE; break;
+	case UNGE: *code = UNLT; break;
+	default: ;
+	}
+      tmp = *op0; *op0 = *op1; *op1 = tmp;
+    }
 }

+/* Helper function for s390_emit_compare.  If possible emit a 64 bit
+   FP compare using the single element variant of vector instructions.
+   Replace CODE with the comparison code to be used in the CC reg
+   compare and return the condition code register RTX in CC.  */
+
+static bool
+s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
+				rtx *cc)
+{
+  machine_mode cmp_mode;
+  bool swap_p = false;
+
+  switch (*code)
+    {
+    case EQ:   cmp_mode = CCVEQmode;  break;
+    case NE:   cmp_mode = CCVEQmode;  break;
+    case GT:   cmp_mode = CCVFHmode;  break;
+    case GE:   cmp_mode = CCVFHEmode; break;
+    case UNLE: cmp_mode = CCVFHmode;  break;
+    case UNLT: cmp_mode = CCVFHEmode; break;
+    case LT:   cmp_mode = CCVFHmode;  *code = GT;   swap_p = true; break;
+    case LE:   cmp_mode = CCVFHEmode; *code = GE;   swap_p = true; break;
+    case UNGE: cmp_mode = CCVFHmode;  *code = UNLE; swap_p = true; break;
+    case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
+    default: return false;
+    }
+
+  if (swap_p)
+    {
+      rtx tmp = cmp2;
+      cmp2 = cmp1;
+      cmp1 = tmp;
+    }
+  *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+	       gen_rtvec (2,
+			  gen_rtx_SET (*cc,
+				       gen_rtx_COMPARE (cmp_mode, cmp1,
+							cmp2)),
+			  gen_rtx_CLOBBER (VOIDmode,
+					   gen_rtx_SCRATCH (V2DImode)))));
+  return true;
+}
+
+
 /* Emit a compare instruction suitable to implement the comparison
   OP0 CODE OP1.  Return the correct condition RTL to be placed in
   the IF_THEN_ELSE of the conditional branch testing the result.  */
@ -1070,10 +1161,18 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
  machine_mode mode = s390_select_ccmode (code, op0, op1);
  rtx cc;

-  /* Do not output a redundant compare instruction if a compare_and_swap
-     pattern already computed the result and the machine modes are compatible.  */
-  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+  if (TARGET_VX
+      && register_operand (op0, DFmode)
+      && register_operand (op1, DFmode)
+      && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
    {
+      /* Work has been done by s390_expand_vec_compare_scalar already.  */
+    }
+  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+    {
+      /* Do not output a redundant compare instruction if a
+	 compare_and_swap pattern already computed the result and the
+	 machine modes are compatible.  */
      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
 		  == GET_MODE (op0));
      cc = op0;
@ -1308,6 +1407,31 @@ s390_branch_condition_mask (rtx code)
        }
      break;

+      /* Vector comparison modes.  */
+
+    case CCVEQmode:
+      switch (GET_CODE (code))
+	{
+	case EQ:        return CC0;
+	case NE:        return CC3;
+	default:        return -1;
+	}
+      /* FP vector compare modes.  */
+
+    case CCVFHmode:
+      switch (GET_CODE (code))
+	{
+	case GT:        return CC0;
+	case UNLE:      return CC3;
+	default:        return -1;
+	}
+    case CCVFHEmode:
+      switch (GET_CODE (code))
+	{
+	case GE:        return CC0;
+	case UNLT:      return CC3;
+	default:        return -1;
+	}
    case CCRAWmode:
      switch (GET_CODE (code))
 	{
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@ -524,6 +524,14 @@
 ;; first and the second operand match for bfp modes.
 (define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")])

+;; This attribute is used to merge the scalar vector instructions into
+;; the FP patterns.  For non-supported modes (all but DF) it expands
+;; to constraints which are supposed to be matched by an earlier
+;; variant.
+(define_mode_attr v0      [(TF "0") (DF "v") (SF "0") (TD "0") (DD "0") (DD "0") (TI "0") (DI "v") (SI "0")])
+(define_mode_attr vf      [(TF "f") (DF "v") (SF "f") (TD "f") (DD "f") (DD "f") (TI "f") (DI "v") (SI "f")])
+(define_mode_attr vd      [(TF "d") (DF "v") (SF "d") (TD "d") (DD "d") (DD "d") (TI "d") (DI "v") (SI "d")])
+
 ;; This attribute is used in the operand list of the instruction to have an
 ;; additional operand for the dfp instructions.
 (define_mode_attr op1 [(TF "") (DF "") (SF "")
@ -635,6 +643,17 @@
 ;; Allow return and simple_return to be defined from a single template.
 (define_code_iterator ANY_RETURN [return simple_return])

+
+
+; Condition code modes generated by vector fp comparisons.  These will
+; be used also in single element mode.
+(define_mode_iterator VFCMP [CCVEQ CCVFH CCVFHE])
+; Used with VFCMP to expand part of the mnemonic
+; For fp we have a mismatch: eq in the insn name - e in asm
+(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
+(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVFH "h") (CCVFHE "he")])
+
+
 (include "vector.md")

 ;;
@ -1144,6 +1163,15 @@
   [(set_attr "op_type" "RRE,RXE")
    (set_attr "type"  "fsimp<mode>")])

+; wfcedbs, wfchdbs, wfchedbs
+(define_insn "*vec_cmp<insn_cmp>df_cconly"
+  [(set (reg:VFCMP CC_REGNUM)
+	(compare:VFCMP (match_operand:DF 0 "register_operand" "v")
+		       (match_operand:DF 1 "register_operand" "v")))
+   (clobber (match_scratch:V2DI 2 "=v"))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "wfc<asm_fcmp>dbs\t%v2,%v0,%v1"
+  [(set_attr "op_type" "VRR")])

 ; Compare and Branch instructions

@ -4360,14 +4388,27 @@

 ; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns.

+(define_insn "*fixuns_truncdfdi2_z13"
+  [(set (match_operand:DI                  0 "register_operand" "=d,v")
+	(unsigned_fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
+   (unspec:DI [(match_operand:DI           2 "immediate_operand" "K,K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_Z13 && TARGET_HARD_FLOAT"
+   "@
+    clgdbr\t%0,%h2,%1,0
+    wclgdb\t%v0,%v1,0,%h2"
+   [(set_attr "op_type" "RRF,VRR")
+    (set_attr "type"    "ftoi")])
+
 ; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr
 ;         clfdtr, clfxtr,         clgdtr, clgxtr
 (define_insn "*fixuns_trunc<FP:mode><GPR:mode>2_z196"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-	(unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f")))
-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+  [(set (match_operand:GPR                  0 "register_operand" "=d")
+	(unsigned_fix:GPR (match_operand:FP 1 "register_operand"  "f")))
+   (unspec:GPR [(match_operand:GPR          2 "immediate_operand" "K")] UNSPEC_ROUND)
   (clobber (reg:CC CC_REGNUM))]
-   "TARGET_Z196"
+   "TARGET_Z196 && TARGET_HARD_FLOAT
+    && (!TARGET_Z13 || <GPR:MODE>mode != DImode || <FP:MODE>mode != DFmode)"
   "cl<GPR:gf><FP:xde><FP:bt>r\t%0,%h2,%1,0"
   [(set_attr "op_type" "RRF")
    (set_attr "type"    "ftoi")])
@ -4382,18 +4423,37 @@
  DONE;
 })

-; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
-(define_insn "fix_trunc<BFP:mode><GPR:mode>2_bfp"
-  [(set (match_operand:GPR 0 "register_operand" "=d")
-        (fix:GPR (match_operand:BFP 1 "register_operand" "f")))
-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+(define_insn "*fix_truncdfdi2_bfp_z13"
+  [(set (match_operand:DI         0 "register_operand" "=d,v")
+        (fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
+   (unspec:DI [(match_operand:DI  2 "immediate_operand" "K,K")] UNSPEC_ROUND)
   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   cgdbr\t%0,%h2,%1
+   wcgdb\t%v0,%v1,0,%h2"
+  [(set_attr "op_type" "RRE,VRR")
+   (set_attr "type"    "ftoi")])
+
+; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
+(define_insn "*fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(set (match_operand:GPR          0 "register_operand" "=d")
+        (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
+   (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT
+    && (!TARGET_VX || <GPR:MODE>mode != DImode || <BFP:MODE>mode != DFmode)"
  "c<GPR:gf><BFP:xde>br\t%0,%h2,%1"
  [(set_attr "op_type" "RRE")
   (set_attr "type"    "ftoi")])

-
+(define_expand "fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(parallel
+    [(set (match_operand:GPR          0 "register_operand" "=d")
+	  (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
+     (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT")
 ;
 ; fix_trunc(td|dd)di2 instruction pattern(s).
 ;
@ -4440,12 +4500,15 @@

 ; cxgbr, cdgbr, cegbr, cxgtr, cdgtr
 (define_insn "floatdi<mode>2"
-  [(set (match_operand:FP 0 "register_operand" "=f")
-        (float:FP (match_operand:DI 1 "register_operand" "d")))]
+  [(set (match_operand:FP           0 "register_operand" "=f,<vf>")
+        (float:FP (match_operand:DI 1 "register_operand"  "d,<vd>")))]
  "TARGET_ZARCH && TARGET_HARD_FLOAT"
-  "c<xde>g<bt>r\t%0,%1"
-  [(set_attr "op_type" "RRE")
-   (set_attr "type"    "itof<mode>" )])
+  "@
+   c<xde>g<bt>r\t%0,%1
+   wcdgb\t%v0,%v1,0,0"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "type"         "itof<mode>" )
+   (set_attr "cpu_facility" "*,vec")])

 ; cxfbr, cdfbr, cefbr
 (define_insn "floatsi<mode>2"
@ -4469,27 +4532,47 @@
 ; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
 ;

+(define_insn "*floatunsdidf2_z13"
+  [(set (match_operand:DF                    0 "register_operand" "=f,v")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"  "d,v")))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   cdlgbr\t%0,0,%1,0
+   wcdlgb\t%v0,%v1,0,0"
+  [(set_attr "op_type" "RRE,VRR")
+   (set_attr "type"    "itofdf")])
+
 ; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr
 ; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr
-(define_insn "floatuns<GPR:mode><FP:mode>2"
-  [(set (match_operand:FP 0 "register_operand" "=f")
-        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))]
-  "TARGET_Z196 && TARGET_HARD_FLOAT"
+(define_insn "*floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP                     0 "register_operand" "=f")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand"  "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT
+   && (!TARGET_VX || <FP:MODE>mode != DFmode || <GPR:MODE>mode != DImode)"
  "c<FP:xde>l<GPR:gf><FP:bt>r\t%0,0,%1,0"
  [(set_attr "op_type" "RRE")
-   (set_attr "type"    "itof<FP:mode>" )])
+   (set_attr "type"    "itof<FP:mode>")])
+
+(define_expand "floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP                     0 "register_operand" "")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT")

 ;
 ; truncdfsf2 instruction pattern(s).
 ;

 (define_insn "truncdfsf2"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-        (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  [(set (match_operand:SF                    0 "register_operand" "=f,v")
+        (float_truncate:SF (match_operand:DF 1 "register_operand"  "f,v")))]
  "TARGET_HARD_FLOAT"
-  "ledbr\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"   "ftruncdf")])
+  "@
+   ledbr\t%0,%1
+   wledb\t%v0,%v1,0,0" ; IEEE inexact exception not suppressed
+                       ; According to BFP rounding mode
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "type"         "ftruncdf")
+   (set_attr "cpu_facility" "*,vec")])

 ;
 ; trunctf(df|sf)2 instruction pattern(s).
@ -4542,17 +4625,35 @@
 ; extend(sf|df)(df|tf)2 instruction pattern(s).
 ;

+(define_insn "*extendsfdf2_z13"
+  [(set (match_operand:DF                  0 "register_operand"     "=f,f,v")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand"  "f,R,v")))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   ldebr\t%0,%1
+   ldeb\t%0,%1
+   wldeb\t%v0,%v1"
+  [(set_attr "op_type" "RRE,RXE,VRR")
+   (set_attr "type"    "fsimpdf, floaddf,fsimpdf")])
+
 ; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb
-(define_insn "extend<DSF:mode><BFP:mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+(define_insn "*extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP                   0 "register_operand"     "=f,f")
        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand"  "f,R")))]
  "TARGET_HARD_FLOAT
-   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)"
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)
+   && (!TARGET_VX || <BFP:MODE>mode != DFmode || <DSF:MODE>mode != SFmode)"
  "@
   l<BFP:xde><DSF:xde>br\t%0,%1
   l<BFP:xde><DSF:xde>b\t%0,%1"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"   "fsimp<BFP:mode>, fload<BFP:mode>")])
+  [(set_attr "op_type" "RRE,RXE")
+   (set_attr "type"    "fsimp<BFP:mode>, fload<BFP:mode>")])
+
+(define_expand "extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP                   0 "register_operand"     "")
+        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)")

 ;
 ; extendddtd2 and extendsddd2 instruction pattern(s).
@ -5156,17 +5257,20 @@
 ;

 ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+; FIXME: wfadb does not clobber cc
 (define_insn "add<mode>3"
-  [(set (match_operand:FP 0 "register_operand"              "=f,   f")
-        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
-		 (match_operand:FP 2 "general_operand"      " f,<Rf>")))
+  [(set (match_operand:FP 0 "register_operand"                 "=f,   f,<vf>")
+        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
+		 (match_operand:FP 2 "general_operand"          "f,<Rf>,<vf>")))
   (clobber (reg:CC CC_REGNUM))]
  "TARGET_HARD_FLOAT"
  "@
   a<xde><bt>r\t%0,<op1>%2
-   a<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fsimp<mode>")])
+   a<xde>b\t%0,%2
+   wfadb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fsimp<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])

 ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
 (define_insn "*add<mode>3_cc"
@ -5579,16 +5683,18 @@

 ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
 (define_insn "sub<mode>3"
-  [(set (match_operand:FP 0 "register_operand"            "=f,  f")
-        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,0")
-                  (match_operand:FP 2 "general_operand"  "f,<Rf>")))
+  [(set (match_operand:FP           0 "register_operand"   "=f,   f,<vf>")
+        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
+                  (match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))
   (clobber (reg:CC CC_REGNUM))]
  "TARGET_HARD_FLOAT"
  "@
   s<xde><bt>r\t%0,<op1>%2
-   s<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fsimp<mode>")])
+   s<xde>b\t%0,%2
+   wfsdb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fsimp<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])

 ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
 (define_insn "*sub<mode>3_cc"
@ -5994,41 +6100,47 @@

 ; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr
 (define_insn "mul<mode>3"
-  [(set (match_operand:FP 0 "register_operand"              "=f,f")
-        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
-                 (match_operand:FP 2 "general_operand"      "f,<Rf>")))]
+  [(set (match_operand:FP          0 "register_operand"        "=f,   f,<vf>")
+        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
+                 (match_operand:FP 2 "general_operand"          "f,<Rf>,<vf>")))]
  "TARGET_HARD_FLOAT"
  "@
   m<xdee><bt>r\t%0,<op1>%2
-   m<xdee>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fmul<mode>")])
+   m<xdee>b\t%0,%2
+   wfmdb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fmul<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])

 ; madbr, maebr, maxb, madb, maeb
 (define_insn "fma<mode>4"
-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
-	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
-		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
-		 (match_operand:DSF 3 "register_operand" "0,0")))]
+  [(set (match_operand:DSF          0 "register_operand"     "=f,f,<vf>")
+	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,<vf>")
+		 (match_operand:DSF 2 "nonimmediate_operand"  "f,R,<vf>")
+		 (match_operand:DSF 3 "register_operand"      "0,0,<v0>")))]
  "TARGET_HARD_FLOAT"
  "@
   ma<xde>br\t%0,%1,%2
-   ma<xde>b\t%0,%1,%2"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"     "fmadd<mode>")])
+   ma<xde>b\t%0,%1,%2
+   wfmadb\t%v0,%v1,%v2,%v3"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fmadd<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])

 ; msxbr, msdbr, msebr, msxb, msdb, mseb
 (define_insn "fms<mode>4"
-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
-	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
-		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
-		 (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))]
+  [(set (match_operand:DSF                   0 "register_operand"     "=f,f,<vf>")
+	(fma:DSF (match_operand:DSF          1 "nonimmediate_operand" "%f,f,<vf>")
+		 (match_operand:DSF          2 "nonimmediate_operand"  "f,R,<vf>")
+		 (neg:DSF (match_operand:DSF 3 "register_operand"      "0,0,<v0>"))))]
  "TARGET_HARD_FLOAT"
  "@
   ms<xde>br\t%0,%1,%2
-   ms<xde>b\t%0,%1,%2"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"     "fmadd<mode>")])
+   ms<xde>b\t%0,%1,%2
+   wfmsdb\t%v0,%v1,%v2,%v3"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fmadd<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])

 ;;
 ;;- Divide and modulo instructions.
@ -6454,15 +6566,17 @@

 ; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr
 (define_insn "div<mode>3"
-  [(set (match_operand:FP 0 "register_operand"          "=f,f")
-        (div:FP (match_operand:FP 1 "register_operand" "<f0>,0")
-                 (match_operand:FP 2 "general_operand"  "f,<Rf>")))]
+  [(set (match_operand:FP         0 "register_operand"   "=f,   f,<vf>")
+        (div:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
+		(match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))]
  "TARGET_HARD_FLOAT"
  "@
   d<xde><bt>r\t%0,<op1>%2
-   d<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fdiv<mode>")])
+   d<xde>b\t%0,%2
+   wfddb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fdiv<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])


 ;;
@ -7671,14 +7785,18 @@
   (set_attr "type"     "fsimp<mode>")])

 ; lcxbr, lcdbr, lcebr
+; FIXME: wflcdb does not clobber cc
 (define_insn "*neg<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
+        (neg:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
   (clobber (reg:CC CC_REGNUM))]
  "TARGET_HARD_FLOAT"
-  "lc<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+   lc<xde>br\t%0,%1
+   wflcdb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])


 ;;
@ -7789,14 +7907,18 @@
   (set_attr "type"     "fsimp<mode>")])

 ; lpxbr, lpdbr, lpebr
+; FIXME: wflpdb does not clobber cc
 (define_insn "*abs<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
+        (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
   (clobber (reg:CC CC_REGNUM))]
  "TARGET_HARD_FLOAT"
-  "lp<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+    lp<xde>br\t%0,%1
+    wflpdb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])


 ;;
@ -7900,14 +8022,18 @@
   (set_attr "type"     "fsimp<mode>")])

 ; lnxbr, lndbr, lnebr
+; FIXME: wflndb does not clobber cc
 (define_insn "*negabs<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f"))))
+  [(set (match_operand:BFP                   0 "register_operand" "=f,<vf>")
+        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>"))))
   (clobber (reg:CC CC_REGNUM))]
  "TARGET_HARD_FLOAT"
-  "ln<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+   ln<xde>br\t%0,%1
+   wflndb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])

 ;;
 ;;- Square root instructions.
@ -7919,14 +8045,16 @@

 ; sqxbr, sqdbr, sqebr, sqdb, sqeb
 (define_insn "sqrt<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
-	(sqrt:BFP (match_operand:BFP 1 "general_operand" "f,<Rf>")))]
+  [(set (match_operand:BFP           0 "register_operand" "=f,   f,<vf>")
+	(sqrt:BFP (match_operand:BFP 1 "general_operand"   "f,<Rf>,<vf>")))]
  "TARGET_HARD_FLOAT"
  "@
   sq<xde>br\t%0,%1
-   sq<xde>b\t%0,%1"
-  [(set_attr "op_type" "RRE,RXE")
-   (set_attr "type" "fsqrt<mode>")])
+   sq<xde>b\t%0,%1
+   wfsqdb\t%v0,%v1"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fsqrt<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])


 ;;
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,7 @@
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.target/s390/vector/vec-scalar-cmp-1.c: New test.
+
 2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

 	* gcc.target/s390/s390.exp
--- a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
@ -0,0 +1,49 @@
+/* Check that we use the scalar variants of vector compares.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "wfcedbs\t%v\[0-9\]*,%v0,%v2" 2 } } */
+/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v0,%v2" 1 } } */
+/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "locrne" 5 } } */
+/* { dg-final { scan-assembler-times "locrno" 1 } } */
+
+
+int
+eq (double a, double b)
+{
+  return a == b;
+}
+
+int
+ne (double a, double b)
+{
+  return a != b;
+}
+
+int
+gt (double a, double b)
+{
+  return a > b;
+}
+
+int
+ge (double a, double b)
+{
+  return a >= b;
+}
+
+int
+lt (double a, double b)
+{
+  return a < b;
+}
+
+int
+le (double a, double b)
+{
+  return a <= b;
+}