mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-12 18:34:36 +08:00
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special instructions, crc32b and crc32q, for calculating CRC-32C. They greatly speed up CRC calculation. Whether the instructions can be used or not depends on the compiler and the target architecture. If generation of SSE 4.2 instructions is allowed for the target (-msse4.2 flag on gcc and clang), use them. If they are not allowed by default, but the compiler supports the -msse4.2 flag to enable them, compile just the CRC-32C function with -msse4.2 flag, and check at runtime whether the processor we're running on supports it. If it doesn't, fall back to the slicing-by-8 algorithm. (With the common defaults on current operating systems, the runtime-check variant is what you get in practice.) Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
This commit is contained in:
parent
4f700bcd20
commit
3dc2d62d04
@ -473,3 +473,30 @@ AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS],
|
||||
if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
|
||||
AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).])
|
||||
fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
|
||||
|
||||
# PGAC_SSE42_CRC32_INTRINSICS
|
||||
# -----------------------
|
||||
# Check if the compiler supports _mm_crc32_u8 and _mm_crc32_u64 intrinsics.
|
||||
# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
|
||||
# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42.
|
||||
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
|
||||
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
|
||||
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=$1], [Ac_cachevar],
|
||||
[pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS $1"
|
||||
ac_save_c_werror_flag=$ac_c_werror_flag
|
||||
ac_c_werror_flag=yes
|
||||
AC_TRY_LINK([#include <nmmintrin.h>],
|
||||
[unsigned int crc = 0;
|
||||
crc = _mm_crc32_u8(crc, 0);
|
||||
crc = (unsigned int) _mm_crc32_u64(crc, 0);],
|
||||
[Ac_cachevar=yes],
|
||||
[Ac_cachevar=no])
|
||||
ac_c_werror_flag=$ac_save_c_werror_flag
|
||||
CFLAGS="$pgac_save_CFLAGS"])
|
||||
if test x"$Ac_cachevar" = x"yes"; then
|
||||
CFLAGS_SSE42="$1"
|
||||
pgac_sse42_crc32_intrinsics=yes
|
||||
fi
|
||||
undefine([Ac_cachevar])dnl
|
||||
])# PGAC_SSE42_CRC32_INTRINSICS
|
||||
|
212
configure
vendored
212
configure
vendored
@ -650,6 +650,8 @@ MSGMERGE
|
||||
MSGFMT_FLAGS
|
||||
MSGFMT
|
||||
HAVE_POSIX_SIGNALS
|
||||
PG_CRC32C_OBJS
|
||||
CFLAGS_SSE42
|
||||
LDAP_LIBS_BE
|
||||
LDAP_LIBS_FE
|
||||
PTHREAD_CFLAGS
|
||||
@ -14095,6 +14097,216 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
|
||||
# Check for x86 cpuid instruction
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5
|
||||
$as_echo_n "checking for __get_cpuid... " >&6; }
|
||||
if ${pgac_cv__get_cpuid+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <cpuid.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv__get_cpuid="yes"
|
||||
else
|
||||
pgac_cv__get_cpuid="no"
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid" >&5
|
||||
$as_echo "$pgac_cv__get_cpuid" >&6; }
|
||||
if test x"$pgac_cv__get_cpuid" = x"yes"; then
|
||||
|
||||
$as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5
|
||||
$as_echo_n "checking for __cpuid... " >&6; }
|
||||
if ${pgac_cv__cpuid+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <intrin.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
__get_cpuid(exx[0], 1);
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv__cpuid="yes"
|
||||
else
|
||||
pgac_cv__cpuid="no"
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5
|
||||
$as_echo "$pgac_cv__cpuid" >&6; }
|
||||
if test x"$pgac_cv__cpuid" = x"yes"; then
|
||||
|
||||
$as_echo "#define HAVE__CPUID 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
|
||||
#
|
||||
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
|
||||
# with the default compiler flags. If not, check if adding the -msse4.2
|
||||
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=" >&5
|
||||
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=... " >&6; }
|
||||
if ${pgac_cv_sse42_crc32_intrinsics_+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS "
|
||||
ac_save_c_werror_flag=$ac_c_werror_flag
|
||||
ac_c_werror_flag=yes
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <nmmintrin.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int crc = 0;
|
||||
crc = _mm_crc32_u8(crc, 0);
|
||||
crc = (unsigned int) _mm_crc32_u64(crc, 0);
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_sse42_crc32_intrinsics_=yes
|
||||
else
|
||||
pgac_cv_sse42_crc32_intrinsics_=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
ac_c_werror_flag=$ac_save_c_werror_flag
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics_" >&5
|
||||
$as_echo "$pgac_cv_sse42_crc32_intrinsics_" >&6; }
|
||||
if test x"$pgac_cv_sse42_crc32_intrinsics_" = x"yes"; then
|
||||
CFLAGS_SSE42=""
|
||||
pgac_sse42_crc32_intrinsics=yes
|
||||
fi
|
||||
|
||||
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2" >&5
|
||||
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2... " >&6; }
|
||||
if ${pgac_cv_sse42_crc32_intrinsics__msse4_2+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -msse4.2"
|
||||
ac_save_c_werror_flag=$ac_c_werror_flag
|
||||
ac_c_werror_flag=yes
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <nmmintrin.h>
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int crc = 0;
|
||||
crc = _mm_crc32_u8(crc, 0);
|
||||
crc = (unsigned int) _mm_crc32_u64(crc, 0);
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv_sse42_crc32_intrinsics__msse4_2=yes
|
||||
else
|
||||
pgac_cv_sse42_crc32_intrinsics__msse4_2=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
ac_c_werror_flag=$ac_save_c_werror_flag
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics__msse4_2" >&5
|
||||
$as_echo "$pgac_cv_sse42_crc32_intrinsics__msse4_2" >&6; }
|
||||
if test x"$pgac_cv_sse42_crc32_intrinsics__msse4_2" = x"yes"; then
|
||||
CFLAGS_SSE42="-msse4.2"
|
||||
pgac_sse42_crc32_intrinsics=yes
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
|
||||
# Select CRC-32C implementation.
|
||||
#
|
||||
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
|
||||
# always. If they require extra CFLAGS, compile both implementations and
|
||||
# select which one to use at runtime, depending on whether SSE 4.2 is
|
||||
# supported by the processor we're running on.
|
||||
#
|
||||
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
|
||||
# in the template or configure command line.
|
||||
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
|
||||
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
|
||||
USE_SSE42_CRC32C=1
|
||||
else
|
||||
# the CPUID instruction is needed for the runtime check.
|
||||
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
|
||||
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
|
||||
else
|
||||
USE_SLICING_BY_8_CRC32C=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which CRC-32C implementation to use" >&5
|
||||
$as_echo_n "checking which CRC-32C implementation to use... " >&6; }
|
||||
if test x"$USE_SSE42_CRC32C" = x"1"; then
|
||||
|
||||
$as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h
|
||||
|
||||
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5
|
||||
$as_echo "SSE 4.2" >&6; }
|
||||
else
|
||||
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
|
||||
|
||||
$as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
|
||||
|
||||
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
|
||||
$as_echo "SSE 4.2 with runtime check" >&6; }
|
||||
else
|
||||
|
||||
$as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h
|
||||
|
||||
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
|
||||
$as_echo "slicing-by-8" >&6; }
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Check that POSIX signals are available if thread safety is enabled.
|
||||
if test "$PORTNAME" != "win32"
|
||||
then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for POSIX signal interface" >&5
|
||||
|
78
configure.in
78
configure.in
@ -1790,6 +1790,84 @@ PGAC_HAVE_GCC__SYNC_INT64_CAS
|
||||
PGAC_HAVE_GCC__ATOMIC_INT32_CAS
|
||||
PGAC_HAVE_GCC__ATOMIC_INT64_CAS
|
||||
|
||||
|
||||
# Check for x86 cpuid instruction
|
||||
AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid],
|
||||
[AC_TRY_LINK([#include <cpuid.h>],
|
||||
[unsigned int exx[4] = {0, 0, 0, 0};
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
],
|
||||
[pgac_cv__get_cpuid="yes"],
|
||||
[pgac_cv__get_cpuid="no"])])
|
||||
if test x"$pgac_cv__get_cpuid" = x"yes"; then
|
||||
AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.])
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid],
|
||||
[AC_TRY_LINK([#include <intrin.h>],
|
||||
[unsigned int exx[4] = {0, 0, 0, 0};
|
||||
__get_cpuid(exx[0], 1);
|
||||
],
|
||||
[pgac_cv__cpuid="yes"],
|
||||
[pgac_cv__cpuid="no"])])
|
||||
if test x"$pgac_cv__cpuid" = x"yes"; then
|
||||
AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.])
|
||||
fi
|
||||
|
||||
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
|
||||
#
|
||||
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
|
||||
# with the default compiler flags. If not, check if adding the -msse4.2
|
||||
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
|
||||
PGAC_SSE42_CRC32_INTRINSICS([])
|
||||
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
|
||||
PGAC_SSE42_CRC32_INTRINSICS([-msse4.2])
|
||||
fi
|
||||
AC_SUBST(CFLAGS_SSE42)
|
||||
|
||||
# Select CRC-32C implementation.
|
||||
#
|
||||
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
|
||||
# always. If they require extra CFLAGS, compile both implementations and
|
||||
# select which one to use at runtime, depending on whether SSE 4.2 is
|
||||
# supported by the processor we're running on.
|
||||
#
|
||||
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
|
||||
# in the template or configure command line.
|
||||
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
|
||||
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
|
||||
USE_SSE42_CRC32C=1
|
||||
else
|
||||
# the CPUID instruction is needed for the runtime check.
|
||||
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
|
||||
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
|
||||
else
|
||||
USE_SLICING_BY_8_CRC32C=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
|
||||
AC_MSG_CHECKING([which CRC-32C implementation to use])
|
||||
if test x"$USE_SSE42_CRC32C" = x"1"; then
|
||||
AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.])
|
||||
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
|
||||
AC_MSG_RESULT(SSE 4.2)
|
||||
else
|
||||
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
|
||||
AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.])
|
||||
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
|
||||
AC_MSG_RESULT(SSE 4.2 with runtime check)
|
||||
else
|
||||
AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
|
||||
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
|
||||
AC_MSG_RESULT(slicing-by-8)
|
||||
fi
|
||||
fi
|
||||
AC_SUBST(PG_CRC32C_OBJS)
|
||||
|
||||
|
||||
# Check that POSIX signals are available if thread safety is enabled.
|
||||
if test "$PORTNAME" != "win32"
|
||||
then
|
||||
PGAC_FUNC_POSIX_SIGNALS
|
||||
|
@ -225,6 +225,7 @@ GCC = @GCC@
|
||||
SUN_STUDIO_CC = @SUN_STUDIO_CC@
|
||||
CFLAGS = @CFLAGS@
|
||||
CFLAGS_VECTOR = @CFLAGS_VECTOR@
|
||||
CFLAGS_SSE42 = @CFLAGS_SSE42@
|
||||
|
||||
# Kind-of compilers
|
||||
|
||||
@ -548,6 +549,9 @@ endif
|
||||
|
||||
LIBOBJS = @LIBOBJS@
|
||||
|
||||
# files needed for the chosen CRC-32C implementation
|
||||
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
|
||||
|
||||
LIBS := -lpgcommon -lpgport $(LIBS)
|
||||
|
||||
# to make ws2_32.lib the last library
|
||||
|
@ -675,6 +675,12 @@
|
||||
/* Define to 1 if your compiler understands __builtin_unreachable. */
|
||||
#undef HAVE__BUILTIN_UNREACHABLE
|
||||
|
||||
/* Define to 1 if you have __cpuid. */
|
||||
#undef HAVE__CPUID
|
||||
|
||||
/* Define to 1 if you have __get_cpuid. */
|
||||
#undef HAVE__GET_CPUID
|
||||
|
||||
/* Define to 1 if your compiler understands _Static_assert. */
|
||||
#undef HAVE__STATIC_ASSERT
|
||||
|
||||
@ -818,6 +824,15 @@
|
||||
/* Use replacement snprintf() functions. */
|
||||
#undef USE_REPL_SNPRINTF
|
||||
|
||||
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
|
||||
#undef USE_SLICING_BY_8_CRC32C
|
||||
|
||||
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
|
||||
#undef USE_SSE42_CRC32C
|
||||
|
||||
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
|
||||
#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
|
||||
|
||||
/* Define to select SysV-style semaphores. */
|
||||
#undef USE_SYSV_SEMAPHORES
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* HAVE_CBRT, HAVE_FUNCNAME_FUNC, HAVE_GETOPT, HAVE_GETOPT_H, HAVE_INTTYPES_H,
|
||||
* HAVE_GETOPT_LONG, HAVE_LOCALE_T, HAVE_RINT, HAVE_STRINGS_H, HAVE_STRTOLL,
|
||||
* HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY,
|
||||
* PG_USE_INLINE, inline
|
||||
* HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, PG_USE_INLINE,
|
||||
* inline, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
|
||||
*/
|
||||
|
||||
/* Define to the type of arg 1 of 'accept' */
|
||||
@ -529,6 +529,12 @@
|
||||
/* Define to 1 if your compiler understands __builtin_unreachable. */
|
||||
/* #undef HAVE__BUILTIN_UNREACHABLE */
|
||||
|
||||
/* Define to 1 if you have __cpuid. */
|
||||
#define HAVE__CPUID 1
|
||||
|
||||
/* Define to 1 if you have __get_cpuid. */
|
||||
#undef HAVE__GET_CPUID
|
||||
|
||||
/* Define to 1 if your compiler understands _Static_assert. */
|
||||
/* #undef HAVE__STATIC_ASSERT */
|
||||
|
||||
@ -639,6 +645,19 @@
|
||||
/* Use replacement snprintf() functions. */
|
||||
#define USE_REPL_SNPRINTF 1
|
||||
|
||||
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
|
||||
#if (_MSC_VER < 1500)
|
||||
#define USE_SLICING_BY_8_CRC32C 1
|
||||
#end
|
||||
|
||||
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
|
||||
/* #undef USE_SSE42_CRC32C */
|
||||
|
||||
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
|
||||
#if (_MSC_VER >= 1500)
|
||||
#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
|
||||
#endif
|
||||
|
||||
/* Define to select SysV-style semaphores. */
|
||||
/* #undef USE_SYSV_SEMAPHORES */
|
||||
|
||||
|
@ -3,6 +3,25 @@
|
||||
* pg_crc32c.h
|
||||
* Routines for computing CRC-32C checksums.
|
||||
*
|
||||
* The speed of CRC-32C calculation has a big impact on performance, so we
|
||||
* jump through some hoops to get the best implementation for each
|
||||
* platform. Some CPU architectures have special instructions for speeding
|
||||
* up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the
|
||||
* Slicing-by-8 algorithm which uses lookup tables.
|
||||
*
|
||||
* The public interface consists of four macros:
|
||||
*
|
||||
* INIT_CRC32C(crc)
|
||||
* Initialize a CRC accumulator
|
||||
*
|
||||
* COMP_CRC32C(crc, data, len)
|
||||
* Accumulate some (more) bytes into a CRC
|
||||
*
|
||||
* FIN_CRC32C(crc)
|
||||
* Finish a CRC calculation
|
||||
*
|
||||
* EQ_CRC32C(c1, c2)
|
||||
* Check for equality of two CRCs.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@ -16,9 +35,32 @@
|
||||
|
||||
typedef uint32 pg_crc32c;
|
||||
|
||||
/* The INIT and EQ macros are the same for all implementations. */
|
||||
#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
|
||||
#define EQ_CRC32C(c1, c2) ((c1) == (c2))
|
||||
|
||||
#if defined(USE_SSE42_CRC32C)
|
||||
/* Use SSE4.2 instructions. */
|
||||
#define COMP_CRC32C(crc, data, len) \
|
||||
((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
|
||||
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
|
||||
|
||||
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
|
||||
|
||||
#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
|
||||
/*
|
||||
* Use SSE4.2 instructions, but perform a runtime check first to check that
|
||||
* they are available.
|
||||
*/
|
||||
#define COMP_CRC32C(crc, data, len) \
|
||||
((crc) = pg_comp_crc32c((crc), (data), (len)))
|
||||
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
|
||||
|
||||
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
|
||||
extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
|
||||
extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
|
||||
|
||||
#else
|
||||
/*
|
||||
* Use slicing-by-8 algorithm.
|
||||
*
|
||||
@ -46,4 +88,6 @@ typedef uint32 pg_crc32c;
|
||||
|
||||
extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* PG_CRC32C_H */
|
||||
|
@ -30,10 +30,10 @@ include $(top_builddir)/src/Makefile.global
|
||||
override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
|
||||
LIBS += $(PTHREAD_LIBS)
|
||||
|
||||
OBJS = $(LIBOBJS) chklocale.o erand48.o inet_net_ntop.o \
|
||||
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
|
||||
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
|
||||
pgstrcasecmp.o pqsignal.o \
|
||||
qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o pg_crc32c_sb8.o
|
||||
qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o
|
||||
|
||||
# foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
|
||||
OBJS_SRV = $(OBJS:%.o=%_srv.o)
|
||||
@ -57,6 +57,10 @@ libpgport.a: $(OBJS)
|
||||
# thread.o needs PTHREAD_CFLAGS (but thread_srv.o does not)
|
||||
thread.o: CFLAGS+=$(PTHREAD_CFLAGS)
|
||||
|
||||
# pg_crc32c_sse42.o and its _srv.o version need CFLAGS_SSE42
|
||||
pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
|
||||
pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42)
|
||||
|
||||
#
|
||||
# Server versions of object files
|
||||
#
|
||||
|
63
src/port/pg_crc32c_choose.c
Normal file
63
src/port/pg_crc32c_choose.c
Normal file
@ -0,0 +1,63 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_crc32c_choose.c
|
||||
* Choose which CRC-32C implementation to use, at runtime.
|
||||
*
|
||||
* Try to the special CRC instructions introduced in Intel SSE 4.2,
|
||||
* if available on the platform we're running on, but fall back to the
|
||||
* slicing-by-8 implementation otherwise.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/port/pg_crc32c_choose.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#ifdef HAVE__GET_CPUID
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE__CPUID
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_crc32c.h"
|
||||
|
||||
static bool
|
||||
pg_crc32c_sse42_available(void)
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
__cpuid(exx, 1);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
|
||||
return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */
|
||||
}
|
||||
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static pg_crc32c
|
||||
pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
|
||||
{
|
||||
if (pg_crc32c_sse42_available())
|
||||
pg_comp_crc32c = pg_comp_crc32c_sse42;
|
||||
else
|
||||
pg_comp_crc32c = pg_comp_crc32c_sb8;
|
||||
|
||||
return pg_comp_crc32c(crc, data, len);
|
||||
}
|
||||
|
||||
pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
|
52
src/port/pg_crc32c_sse42.c
Normal file
52
src/port/pg_crc32c_sse42.c
Normal file
@ -0,0 +1,52 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_crc32c_sse42.c
|
||||
* Compute CRC-32C checksum using Intel SSE 4.2 instructions.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/port/pg_crc32c_sse42.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "c.h"
|
||||
|
||||
#include "port/pg_crc32c.h"
|
||||
|
||||
#include <nmmintrin.h>
|
||||
|
||||
pg_crc32c
|
||||
pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *p = data;
|
||||
const uint64 *p8;
|
||||
|
||||
/*
|
||||
* Process eight bytes of data at a time.
|
||||
*
|
||||
* NB: We do unaligned 8-byte accesses here. The Intel architecture
|
||||
* allows that, and performance testing didn't show any performance
|
||||
* gain from aligning the beginning address.
|
||||
*/
|
||||
p8 = (const uint64 *) p;
|
||||
while (len >= 8)
|
||||
{
|
||||
crc = (uint32) _mm_crc32_u64(crc, *p8++);
|
||||
len -= 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle any remaining bytes one at a time.
|
||||
*/
|
||||
p = (const unsigned char *) p8;
|
||||
while (len > 0)
|
||||
{
|
||||
crc = _mm_crc32_u8(crc, *p++);
|
||||
len--;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
@ -92,10 +92,21 @@ sub mkvcbuild
|
||||
pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c
|
||||
mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
|
||||
sprompt.c tar.c thread.c getopt.c getopt_long.c dirent.c
|
||||
win32env.c win32error.c win32setlocale.c pg_crc32c_sb8.c);
|
||||
win32env.c win32error.c win32setlocale.c);
|
||||
|
||||
push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00');
|
||||
|
||||
if ($vsVersion >= '9.00')
|
||||
{
|
||||
push(@pgportfiles, 'pg_crc32c_choose.c');
|
||||
push(@pgportfiles, 'pg_crc32c_sse42.c');
|
||||
push(@pgportfiles, 'pg_crc32c_sb8.c');
|
||||
}
|
||||
else
|
||||
{
|
||||
push(@pgportfiles, 'pg_crc32c_sb8.c')
|
||||
}
|
||||
|
||||
our @pgcommonallfiles = qw(
|
||||
exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
|
||||
string.c username.c wait_error.c);
|
||||
|
Loading…
Reference in New Issue
Block a user