Faster __mulsi routine

From-SVN: r38736
2025-03-08 07:56:45 +08:00 · 2001-01-06 00:46:33 +00:00 · 2001-01-06 00:46:33 +00:00 · 0426e04997
commit 0426e04997
parent f7114e1750
2 changed files with 53 additions and 45 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,8 @@
+Fri Jan  5 16:34:18 2001  Nick Clifton  <nickc@redhat.com>
+
+	* config/v850/lib1funcs.asm: Replace __mulsi3 routine with faster
+	version supplied by Matteo Frigo.
+
 2001-01-05  Neil Booth  <neil@daikokuya.demon.co.uk>

        * cpp.texi: Update for -MQ.
--- a/gcc/config/v850/lib1funcs.asm
+++ b/gcc/config/v850/lib1funcs.asm
@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA.  */
 	.type  ___mulsi3,@function

 /*
- * In order to not deal with negative numbers (mulh is a signed multiply
- * and we want an unsigned multiply, code the multiplication as a series
- * of 7 bit multiplies).
+ * #define SHIFT 12
+ * #define MASK ((1 << SHIFT) - 1)
+ *  
+ * #define STEP(i, j)                             \
+ * ({                                             \
+ *     short a_part = (a >> (i)) & MASK;          \
+ *     short b_part = (b >> (j)) & MASK;          \
+ *     int res = (((int)a_part) * ((int)b_part)); \
+ *     res;                                       \
+ * })
 *
- * int __mulsi3 (unsigned a, unsigned b)
+ * int
+ * __mulsi3 (unsigned a, unsigned b)
 * {
- *   int i, j;
- *   int ret = 0;
- *
- *   for (i = 0; i < 32; i += 7)
- *     {
- *       short a_part = a & 0x7f;
- *       unsigned b_tmp = b;
- *       a >>= 7;
- * 
- *       for (j = 0; (i+j) < 32; j += 7)
- * 	{
- * 	  short b_part = b_tmp & 0x7f;
- * 	  ret += (((int)a_part) * ((int)b_part)) << (i+j);
- * 	  b_tmp >>= 7;
- * 	}
- *    }
- *
- *   return ret;
+ *    return STEP (0, 0) +
+ *        ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+ *        ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+ *         << (2 * SHIFT));
 * }
 */

 ___mulsi3:
-	mov 0,r10			/* total */
-	mov 0,r14			/* i = 0, index for multiply a's part */
-	movea lo(31),r0,r16		/* upper bounds for loop */
-.L5:
-	mov r7,r13			/* b_tmp = b */
-	andi 0x7f,r6,r15		/* a_part = (a & 127) */
-	shr 7,r6			/* a >>= 7 */
-	mov r14,r12			/* i+j = i */
-.L9:
-	andi 0x7f,r13,r11		/* b_part = (b_tmp & 127) */
-	mulh r15,r11			/* ((int)a_part) * ((int)b_part) */
-	shr 7,r13			/* b_tmp >>= 7 */
-	shl r12,r11			/* (((int)a_part) * ((int)b_part)) << (i+j) */
-	add r11,r10			/* ret += (((int)a_part) * ((int)b_part)) << (i+j) */
-	add 7,r12			/* i+j += 7 */
-	cmp r16,r12			/* i+j < 32 */
-	ble .L9
-
-	add 7,r14			/* i += 7 */
-	cmp r16,r14			/* i < 32 */
-	ble .L5
-
-	jmp [r31]			/* return */
+        mov r6,r13
+        movea lo(4095),r0,r16
+        and r16,r13
+        mov r7,r15
+        and r16,r15
+        mov r13,r10
+        mulh r15,r10
+        shr 12,r6
+        mov r6,r14
+        and r16,r14
+        mov r14,r11
+        mulh r15,r11
+        shr 12,r7
+        mov r7,r12
+        and r16,r12
+        shr 12,r7
+        and r16,r7
+        mulh r13,r7
+        shr 12,r6
+        mulh r12,r13
+        and r16,r6
+        add r13,r11
+        shl 12,r11
+        add r11,r10
+        mov r14,r11
+        mulh r12,r11
+        mulh r15,r6
+        add r11,r7
+        add r6,r7
+        shl 24,r7
+        add r7,r10
+        jmp [r31]
 	.size ___mulsi3,.-___mulsi3
 #endif