From d2ba6dcff5dc590c7cc7f230d2826fde0c005d79 Mon Sep 17 00:00:00 2001
From: Jim Wilson <wilson@cygnus.com>
Date: Thu, 11 May 2000 04:38:29 +0000
Subject: [PATCH] Add performance related explanatory comment.

	* config/ia64/ia64.md (insv): Add comment.

From-SVN: r33836
---
 gcc/ChangeLog           |  4 ++++
 gcc/config/ia64/ia64.md | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f133e5fe633..a17ca24d092 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+Wed May 10 21:31:44 2000  Jim Wilson  <wilson@cygnus.com>
+
+	* config/ia64/ia64.md (insv): Add comment.
+
 2000-05-10  Richard Henderson  <rth@cygnus.com>
 
 	* Makefile.in (libgcc.a, stmp-multilib): Depend on FPBIT and DPBIT.
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 7ccf89dd110..4a55b030a91 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -684,6 +684,21 @@
 	  DONE;
 	}
 
+      /* We could handle remaining cases by emitting multiple dep
+	 instructions.
+
+	 If we need more than two dep instructions then we lose.  A 6
+	 insn sequence mov mask1,mov mask2,shl;;and,and;;or is better than
+	 mov;;dep,shr;;dep,shr;;dep.  The former can be executed in 3 cycles,
+	 the latter is 6 cycles on an Itanium (TM) processor, because there is
+	 only one function unit that can execute dep and shr immed.
+
+	 If we only need two dep instruction, then we still lose.
+	 mov;;dep,shr;;dep is still 4 cycles.  Even if we optimize away
+	 the unnecessary mov, this is still undesirable because it will be
+	 hard to optimize, and it creates unnecessary pressure on the I0
+	 function unit.  */
+
       FAIL;
 
 #if 0