From 6d87c306c6a44f8db81a9a9f2f5de4d943d5e57f Mon Sep 17 00:00:00 2001 From: Bernd Schmidt <bernds@redhat.com> Date: Tue, 16 Feb 2016 14:42:59 +0000 Subject: [PATCH] Fix bswap optimization on big-endian (PR69714, 67781). PR tree-optimization/69714 * tree-ssa-math-opts.c (find_bswap_or_nop): Revert previous change. Return NULL if we have irrelevant high bytes on BIG_ENDIAN. testsuite/ PR tree-optimization/69714 * gcc.dg/torture/pr69714.c: New test. From-SVN: r233452 --- gcc/ChangeLog | 6 + gcc/testsuite/ChangeLog | 7 +- gcc/testsuite/gcc.dg/torture/pr69714.c | 156 +++++++++++++++++++++++++ gcc/tree-ssa-math-opts.c | 46 +++----- 4 files changed, 185 insertions(+), 30 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr69714.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d41547a2da02..741fbd095c60 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2016-02-16 Bernd Schmidt <bschmidt@redhat.com> + + PR tree-optimization/69714 + * tree-ssa-math-opts.c (find_bswap_or_nop): Revert previous change. + Return NULL if we have irrelevant high bytes on BIG_ENDIAN. + 2016-02-16 Claudiu Zissulescu <claziss@synopsys.com> * config/arc/arc-modes.def (CC_FPU, CC_FPU_UNEQ): New modes. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4d6884ed5c49..38751605e80a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-02-16 Bernd Schmidt <bschmidt@redhat.com> + + PR tree-optimization/69714 + * gcc.dg/torture/pr69714.c: New test. + 2016-02-16 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/69802 @@ -152,7 +157,7 @@ 2016-02-12 Bernd Schmidt <bschmidt@redhat.com> PR c/69522 - gcc.dg/pr69522.c: New test. + * gcc.dg/pr69522.c: New test. 2016-02-12 Patrick Palka <ppalka@gcc.gnu.org> diff --git a/gcc/testsuite/gcc.dg/torture/pr69714.c b/gcc/testsuite/gcc.dg/torture/pr69714.c new file mode 100644 index 000000000000..229b7ad58a11 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr69714.c @@ -0,0 +1,156 @@ +/* { dg-do run } */ +/* { dg-options "-fno-strict-aliasing" } */ + +#include <stdint.h> +#include <stdio.h> + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define av_le2ne32(x) (x) +#else +#define av_le2ne32(x) av_bswap32(x) +#endif + +static __attribute__((always_inline)) inline __attribute__((const)) uint32_t av_bswap32(uint32_t x) +{ + return ((((x) << 8 & 0xff00) | ((x) >> 8 & 0x00ff)) << 16 | ((((x) >> 16) << 8 & 0xff00) | (((x) >> 16) >> 8 & 0x00ff))); +} + +typedef uint32_t AVCRC; + +typedef enum { + AV_CRC_8_ATM, + AV_CRC_16_ANSI, + AV_CRC_16_CCITT, + AV_CRC_32_IEEE, + AV_CRC_32_IEEE_LE, + AV_CRC_16_ANSI_LE, + AV_CRC_24_IEEE = 12, + AV_CRC_MAX, +} AVCRCId; + +int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size); + + + + + + +uint32_t av_crc(const AVCRC *ctx, uint32_t crc, + const uint8_t *buffer, size_t length) __attribute__((pure)); +static struct { + uint8_t le; + uint8_t bits; + uint32_t poly; +} av_crc_table_params[AV_CRC_MAX] = { + [AV_CRC_8_ATM] = { 0, 8, 0x07 }, + [AV_CRC_16_ANSI] = { 0, 16, 0x8005 }, + [AV_CRC_16_CCITT] = { 0, 16, 0x1021 }, + [AV_CRC_24_IEEE] = { 0, 24, 0x864CFB }, + [AV_CRC_32_IEEE] = { 0, 32, 0x04C11DB7 }, + [AV_CRC_32_IEEE_LE] = { 1, 32, 0xEDB88320 }, + [AV_CRC_16_ANSI_LE] = { 1, 16, 0xA001 }, +}; +static AVCRC av_crc_table[AV_CRC_MAX][1024]; + + +int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size) +{ + unsigned i, j; + uint32_t c; + + if (bits < 8 || bits > 32 || poly >= (1LL << bits)) + return -1; + if (ctx_size != sizeof(AVCRC) * 257 && ctx_size != sizeof(AVCRC) * 1024) + return -1; + + for (i = 0; i < 256; i++) { + if (le) { + for (c = i, j = 0; j < 8; j++) + c = (c >> 1) ^ (poly & (-(c & 1))); + ctx[i] = c; + } else { + for (c = i << 24, j = 0; j < 8; j++) + c = (c << 1) ^ ((poly << (32 - bits)) & (((int32_t) c) >> 31)); + ctx[i] = av_bswap32(c); + } + } + ctx[256] = 1; + + if (ctx_size >= sizeof(AVCRC) * 1024) + for (i = 0; i < 256; i++) + for (j = 0; j < 3; j++) + ctx[256 *(j + 1) + i] = + (ctx[256 * j + i] >> 8) ^ ctx[ctx[256 * j + i] & 0xFF]; + + + return 0; +} + +const AVCRC *av_crc_get_table(AVCRCId crc_id) +{ + if (!av_crc_table[crc_id][(sizeof(av_crc_table[crc_id]) / sizeof((av_crc_table[crc_id])[0])) - 1]) + if (av_crc_init(av_crc_table[crc_id], + av_crc_table_params[crc_id].le, + av_crc_table_params[crc_id].bits, + av_crc_table_params[crc_id].poly, + sizeof(av_crc_table[crc_id])) < 0) + return ((void *)0); + + return av_crc_table[crc_id]; +} + +uint32_t av_crc(const AVCRC *ctx, uint32_t crc, + const uint8_t *buffer, size_t length) +{ + const uint8_t *end = buffer + length; + + + if (!ctx[256]) { + while (((intptr_t) buffer & 3) && buffer < end) + crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8); + + while (buffer < end - 3) { + crc ^= av_le2ne32(*(const uint32_t *) buffer); buffer += 4; + crc = ctx[3 * 256 + ( crc & 0xFF)] ^ + ctx[2 * 256 + ((crc >> 8 ) & 0xFF)] ^ + ctx[1 * 256 + ((crc >> 16) & 0xFF)] ^ + ctx[0 * 256 + ((crc >> 24) )]; + } + } + + while (buffer < end) + crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8); + + return crc; +} + + +int main(void) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + uint8_t buf[1999]; + int i; + unsigned + p[6][3] = { { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 }, + { AV_CRC_32_IEEE , 0x04C11DB7, 0xE0BAF5C0 }, + { AV_CRC_24_IEEE , 0x864CFB , 0x326039 }, + { AV_CRC_16_ANSI_LE, 0xA001 , 0xBFD8 }, + { AV_CRC_16_ANSI , 0x8005 , 0xBB1F }, + { AV_CRC_8_ATM , 0x07 , 0xE3 } + }; + const AVCRC *ctx; + + for (i = 0; i < sizeof(buf); i++) + buf[i] = i + i * i; + + for (i = 0; i < 6; i++) { + int id = p[i][0]; + uint32_t result; + ctx = av_crc_get_table (id); + result = av_crc(ctx, 0, buf, sizeof(buf)); + if (result != p[i][2]) + __builtin_abort (); + } +#endif + return 0; +} diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 3aa4a07435f9..2215b4dc7092 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2449,11 +2449,9 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) static gimple * find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) { - unsigned rsize; - uint64_t tmpn, mask; -/* The number which the find_bswap_or_nop_1 result should match in order - to have a full byte swap. The number is shifted to the right - according to the size of the symbolic number before using it. */ + /* The number which the find_bswap_or_nop_1 result should match in order + to have a full byte swap. The number is shifted to the right + according to the size of the symbolic number before using it. */ uint64_t cmpxchg = CMPXCHG; uint64_t cmpnop = CMPNOP; @@ -2474,38 +2472,28 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) /* Find real size of result (highest non-zero byte). */ if (n->base_addr) - for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); - else - rsize = n->range; + { + unsigned HOST_WIDE_INT rsize; + uint64_t tmpn; - /* Zero out the bits corresponding to untouched bytes in original gimple - expression. */ + for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); + if (BYTES_BIG_ENDIAN && n->range != rsize) + /* This implies an offset, which is currently not handled by + bswap_replace. */ + return NULL; + n->range = rsize; + } + + /* Zero out the extra bits of N and CMP*. */ if (n->range < (int) sizeof (int64_t)) { + uint64_t mask; + mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; cmpnop &= mask; } - /* Zero out the bits corresponding to unused bytes in the result of the - gimple expression. */ - if (rsize < n->range) - { - if (BYTES_BIG_ENDIAN) - { - mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; - cmpxchg &= mask; - cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; - } - else - { - mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; - cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; - cmpnop &= mask; - } - n->range = rsize; - } - /* A complete byte swap should make the symbolic number to start with the largest digit in the highest order byte. Unchanged symbolic number indicates a read with same endianness as target architecture. */