From 6d87c306c6a44f8db81a9a9f2f5de4d943d5e57f Mon Sep 17 00:00:00 2001
From: Bernd Schmidt <bernds@redhat.com>
Date: Tue, 16 Feb 2016 14:42:59 +0000
Subject: [PATCH] Fix bswap optimization on big-endian (PR69714, 67781).

	PR tree-optimization/69714
	* tree-ssa-math-opts.c (find_bswap_or_nop): Revert previous change.
	Return NULL if we have irrelevant high bytes on BIG_ENDIAN.

testsuite/
	PR tree-optimization/69714
	* gcc.dg/torture/pr69714.c: New test.

From-SVN: r233452
---
 gcc/ChangeLog                          |   6 +
 gcc/testsuite/ChangeLog                |   7 +-
 gcc/testsuite/gcc.dg/torture/pr69714.c | 156 +++++++++++++++++++++++++
 gcc/tree-ssa-math-opts.c               |  46 +++-----
 4 files changed, 185 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr69714.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d41547a2da02..741fbd095c60 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-02-16  Bernd Schmidt  <bschmidt@redhat.com>
+
+	PR tree-optimization/69714
+	* tree-ssa-math-opts.c (find_bswap_or_nop): Revert previous change.
+	Return NULL if we have irrelevant high bytes on BIG_ENDIAN.
+
 2016-02-16  Claudiu Zissulescu  <claziss@synopsys.com>
 
 	* config/arc/arc-modes.def (CC_FPU, CC_FPU_UNEQ): New modes.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4d6884ed5c49..38751605e80a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-02-16  Bernd Schmidt  <bschmidt@redhat.com>
+
+	PR tree-optimization/69714
+	* gcc.dg/torture/pr69714.c: New test.
+
 2016-02-16  Jakub Jelinek  <jakub@redhat.com>
 
 	PR tree-optimization/69802
@@ -152,7 +157,7 @@
 2016-02-12  Bernd Schmidt  <bschmidt@redhat.com>
 
 	PR c/69522
-	gcc.dg/pr69522.c: New test.
+	* gcc.dg/pr69522.c: New test.
 
 2016-02-12  Patrick Palka  <ppalka@gcc.gnu.org>
 
diff --git a/gcc/testsuite/gcc.dg/torture/pr69714.c b/gcc/testsuite/gcc.dg/torture/pr69714.c
new file mode 100644
index 000000000000..229b7ad58a11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr69714.c
@@ -0,0 +1,156 @@
+/* { dg-do run } */
+/* { dg-options "-fno-strict-aliasing" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define av_le2ne32(x) (x)
+#else
+#define av_le2ne32(x) av_bswap32(x)
+#endif
+
+static __attribute__((always_inline)) inline __attribute__((const)) uint32_t av_bswap32(uint32_t x)
+{
+    return ((((x) << 8 & 0xff00) | ((x) >> 8 & 0x00ff)) << 16 | ((((x) >> 16) << 8 & 0xff00) | (((x) >> 16) >> 8 & 0x00ff)));
+}
+
+typedef uint32_t AVCRC;
+
+typedef enum {
+    AV_CRC_8_ATM,
+    AV_CRC_16_ANSI,
+    AV_CRC_16_CCITT,
+    AV_CRC_32_IEEE,
+    AV_CRC_32_IEEE_LE,
+    AV_CRC_16_ANSI_LE,
+    AV_CRC_24_IEEE = 12,
+    AV_CRC_MAX,
+} AVCRCId;
+
+int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size);
+
+
+
+
+
+
+uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
+                const uint8_t *buffer, size_t length) __attribute__((pure));
+static struct {
+    uint8_t le;
+    uint8_t bits;
+    uint32_t poly;
+} av_crc_table_params[AV_CRC_MAX] = {
+    [AV_CRC_8_ATM] = { 0, 8, 0x07 },
+    [AV_CRC_16_ANSI] = { 0, 16, 0x8005 },
+    [AV_CRC_16_CCITT] = { 0, 16, 0x1021 },
+    [AV_CRC_24_IEEE] = { 0, 24, 0x864CFB },
+    [AV_CRC_32_IEEE] = { 0, 32, 0x04C11DB7 },
+    [AV_CRC_32_IEEE_LE] = { 1, 32, 0xEDB88320 },
+    [AV_CRC_16_ANSI_LE] = { 1, 16, 0xA001 },
+};
+static AVCRC av_crc_table[AV_CRC_MAX][1024];
+
+
+int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
+{
+    unsigned i, j;
+    uint32_t c;
+
+    if (bits < 8 || bits > 32 || poly >= (1LL << bits))
+        return -1;
+    if (ctx_size != sizeof(AVCRC) * 257 && ctx_size != sizeof(AVCRC) * 1024)
+        return -1;
+
+    for (i = 0; i < 256; i++) {
+        if (le) {
+            for (c = i, j = 0; j < 8; j++)
+                c = (c >> 1) ^ (poly & (-(c & 1)));
+            ctx[i] = c;
+        } else {
+            for (c = i << 24, j = 0; j < 8; j++)
+                c = (c << 1) ^ ((poly << (32 - bits)) & (((int32_t) c) >> 31));
+            ctx[i] = av_bswap32(c);
+        }
+    }
+    ctx[256] = 1;
+
+    if (ctx_size >= sizeof(AVCRC) * 1024)
+        for (i = 0; i < 256; i++)
+            for (j = 0; j < 3; j++)
+                ctx[256 *(j + 1) + i] =
+                    (ctx[256 * j + i] >> 8) ^ ctx[ctx[256 * j + i] & 0xFF];
+
+
+    return 0;
+}
+
+const AVCRC *av_crc_get_table(AVCRCId crc_id)
+{
+    if (!av_crc_table[crc_id][(sizeof(av_crc_table[crc_id]) / sizeof((av_crc_table[crc_id])[0])) - 1])
+        if (av_crc_init(av_crc_table[crc_id],
+                        av_crc_table_params[crc_id].le,
+                        av_crc_table_params[crc_id].bits,
+                        av_crc_table_params[crc_id].poly,
+                        sizeof(av_crc_table[crc_id])) < 0)
+            return ((void *)0);
+
+    return av_crc_table[crc_id];
+}
+
+uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
+                const uint8_t *buffer, size_t length)
+{
+    const uint8_t *end = buffer + length;
+
+
+    if (!ctx[256]) {
+        while (((intptr_t) buffer & 3) && buffer < end)
+            crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8);
+
+        while (buffer < end - 3) {
+            crc ^= av_le2ne32(*(const uint32_t *) buffer); buffer += 4;
+            crc = ctx[3 * 256 + ( crc & 0xFF)] ^
+                  ctx[2 * 256 + ((crc >> 8 ) & 0xFF)] ^
+                  ctx[1 * 256 + ((crc >> 16) & 0xFF)] ^
+                  ctx[0 * 256 + ((crc >> 24) )];
+        }
+    }
+
+    while (buffer < end)
+        crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8);
+
+    return crc;
+}
+
+
+int main(void)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    uint8_t buf[1999];
+    int i;
+    unsigned
+      p[6][3] = { { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 },
+		  { AV_CRC_32_IEEE , 0x04C11DB7, 0xE0BAF5C0 },
+		  { AV_CRC_24_IEEE , 0x864CFB , 0x326039 },
+		  { AV_CRC_16_ANSI_LE, 0xA001 , 0xBFD8 },
+		  { AV_CRC_16_ANSI , 0x8005 , 0xBB1F },
+		  { AV_CRC_8_ATM , 0x07 , 0xE3 }
+    };
+    const AVCRC *ctx;
+
+    for (i = 0; i < sizeof(buf); i++)
+        buf[i] = i + i * i;
+
+    for (i = 0; i < 6; i++) {
+        int id = p[i][0];
+	uint32_t result;
+        ctx = av_crc_get_table (id);
+	result = av_crc(ctx, 0, buf, sizeof(buf));
+	if (result != p[i][2])
+	  __builtin_abort ();
+    }
+#endif
+    return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 3aa4a07435f9..2215b4dc7092 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2449,11 +2449,9 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
 static gimple *
 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
 {
-  unsigned rsize;
-  uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
-   to have a full byte swap.  The number is shifted to the right
-   according to the size of the symbolic number before using it.  */
+  /* The number which the find_bswap_or_nop_1 result should match in order
+     to have a full byte swap.  The number is shifted to the right
+     according to the size of the symbolic number before using it.  */
   uint64_t cmpxchg = CMPXCHG;
   uint64_t cmpnop = CMPNOP;
 
@@ -2474,38 +2472,28 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
 
   /* Find real size of result (highest non-zero byte).  */
   if (n->base_addr)
-    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
-  else
-    rsize = n->range;
+    {
+      unsigned HOST_WIDE_INT rsize;
+      uint64_t tmpn;
 
-  /* Zero out the bits corresponding to untouched bytes in original gimple
-     expression.  */
+      for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+      if (BYTES_BIG_ENDIAN && n->range != rsize)
+	/* This implies an offset, which is currently not handled by
+	   bswap_replace.  */
+	return NULL;
+      n->range = rsize;
+    }
+
+  /* Zero out the extra bits of N and CMP*.  */
   if (n->range < (int) sizeof (int64_t))
     {
+      uint64_t mask;
+
       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
       cmpnop &= mask;
     }
 
-  /* Zero out the bits corresponding to unused bytes in the result of the
-     gimple expression.  */
-  if (rsize < n->range)
-    {
-      if (BYTES_BIG_ENDIAN)
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg &= mask;
-	  cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
-	}
-      else
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
-	  cmpnop &= mask;
-	}
-      n->range = rsize;
-    }
-
   /* A complete byte swap should make the symbolic number to start with
      the largest digit in the highest order byte. Unchanged symbolic
      number indicates a read with same endianness as target architecture.  */