mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
Add basic support for using the POPCNT and SSE4.2s LZCNT opcodes
These opcodes have been around in the AMD world since 2007, and 2008 in the case of intel. They're supported in GCC and Clang via some __builtin macros. The opcodes may be unavailable during runtime, in which case we fall back on a C-based implementation of the code. In order to get the POPCNT instruction we must pass the -mpopcnt option to the compiler. We do this only for the pg_bitutils.c file. David Rowley (with fragments taken from a patch by Thomas Munro) Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
This commit is contained in:
parent
754ca99314
commit
711bab1e4d
@ -378,6 +378,122 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_POPCOUNT
|
||||
# -------------------------
|
||||
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT],
|
||||
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl
|
||||
AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar],
|
||||
[pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_popcount(255);])],
|
||||
[Ac_cachevar=yes],
|
||||
[Ac_cachevar=no])
|
||||
CFLAGS="$pgac_save_CFLAGS"])
|
||||
if test x"$Ac_cachevar" = x"yes"; then
|
||||
CFLAGS_POPCNT="-mpopcnt"
|
||||
AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1,
|
||||
[Define to 1 if your compiler understands __builtin_popcount.])
|
||||
fi
|
||||
undefine([Ac_cachevar])dnl
|
||||
])# PGAC_C_BUILTIN_POPCOUNT
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_POPCOUNTL
|
||||
# -------------------------
|
||||
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNTL],
|
||||
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcountl])])dnl
|
||||
AC_CACHE_CHECK([for __builtin_popcountl], [Ac_cachevar],
|
||||
[pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_popcountl(255);])],
|
||||
[Ac_cachevar=yes],
|
||||
[Ac_cachevar=no])
|
||||
CFLAGS="$pgac_save_CFLAGS"])
|
||||
if test x"$Ac_cachevar" = x"yes"; then
|
||||
CFLAGS_POPCNT="-mpopcnt"
|
||||
AC_DEFINE(HAVE__BUILTIN_POPCOUNTL, 1,
|
||||
[Define to 1 if your compiler understands __builtin_popcountl.])
|
||||
fi
|
||||
undefine([Ac_cachevar])dnl
|
||||
])# PGAC_C_BUILTIN_POPCOUNTL
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_CTZ
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_ctz(),
|
||||
# and define HAVE__BUILTIN_CTZ if so.
|
||||
AC_DEFUN([PGAC_C_BUILTIN_CTZ],
|
||||
[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz,
|
||||
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_ctz(256);]
|
||||
)],
|
||||
[pgac_cv__builtin_ctz=yes],
|
||||
[pgac_cv__builtin_ctz=no])])
|
||||
if test x"$pgac_cv__builtin_ctz" = xyes ; then
|
||||
AC_DEFINE(HAVE__BUILTIN_CTZ, 1,
|
||||
[Define to 1 if your compiler understands __builtin_ctz.])
|
||||
fi])# PGAC_C_BUILTIN_CTZ
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_CTZL
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_ctzl(),
|
||||
# and define HAVE__BUILTIN_CTZL if so.
|
||||
AC_DEFUN([PGAC_C_BUILTIN_CTZL],
|
||||
[AC_CACHE_CHECK(for __builtin_ctzl, pgac_cv__builtin_ctzl,
|
||||
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_ctzl(256);]
|
||||
)],
|
||||
[pgac_cv__builtin_ctzl=yes],
|
||||
[pgac_cv__builtin_ctzl=no])])
|
||||
if test x"$pgac_cv__builtin_ctzl" = xyes ; then
|
||||
AC_DEFINE(HAVE__BUILTIN_CTZL, 1,
|
||||
[Define to 1 if your compiler understands __builtin_ctzl.])
|
||||
fi])# PGAC_C_BUILTIN_CTZL
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_CLZ
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_clz(),
|
||||
# and define HAVE__BUILTIN_CLZ if so.
|
||||
AC_DEFUN([PGAC_C_BUILTIN_CLZ],
|
||||
[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz,
|
||||
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_clz(256);]
|
||||
)],
|
||||
[pgac_cv__builtin_clz=yes],
|
||||
[pgac_cv__builtin_clz=no])])
|
||||
if test x"$pgac_cv__builtin_clz" = xyes ; then
|
||||
AC_DEFINE(HAVE__BUILTIN_CLZ, 1,
|
||||
[Define to 1 if your compiler understands __builtin_clz.])
|
||||
fi])# PGAC_C_BUILTIN_CLZ
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_CLZL
|
||||
# -------------------------
|
||||
# Check if the C compiler understands __builtin_clzl(),
|
||||
# and define HAVE__BUILTIN_CLZL if so.
|
||||
AC_DEFUN([PGAC_C_BUILTIN_CLZL],
|
||||
[AC_CACHE_CHECK(for __builtin_clzl, pgac_cv__builtin_clzl,
|
||||
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
||||
[static int x = __builtin_clzl(256);]
|
||||
)],
|
||||
[pgac_cv__builtin_clzl=yes],
|
||||
[pgac_cv__builtin_clzl=no])])
|
||||
if test x"$pgac_cv__builtin_clzl" = xyes ; then
|
||||
AC_DEFINE(HAVE__BUILTIN_CLZL, 1,
|
||||
[Define to 1 if your compiler understands __builtin_clzl.])
|
||||
fi])# PGAC_C_BUILTIN_CLZL
|
||||
|
||||
|
||||
|
||||
# PGAC_C_BUILTIN_UNREACHABLE
|
||||
# --------------------------
|
||||
# Check if the C compiler understands __builtin_unreachable(),
|
||||
|
155
configure
vendored
155
configure
vendored
@ -651,6 +651,7 @@ CFLAGS_ARMV8_CRC32C
|
||||
CFLAGS_SSE42
|
||||
have_win32_dbghelp
|
||||
LIBOBJS
|
||||
CFLAGS_POPCNT
|
||||
UUID_LIBS
|
||||
LDAP_LIBS_BE
|
||||
LDAP_LIBS_FE
|
||||
@ -14059,6 +14060,158 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
|
||||
$as_echo_n "checking for __builtin_popcount... " >&6; }
|
||||
if ${pgac_cv_popcount+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_popcount(255);
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv_popcount=yes
|
||||
else
|
||||
pgac_cv_popcount=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5
|
||||
$as_echo "$pgac_cv_popcount" >&6; }
|
||||
if test x"$pgac_cv_popcount" = x"yes"; then
|
||||
CFLAGS_POPCNT="-mpopcnt"
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcountl" >&5
|
||||
$as_echo_n "checking for __builtin_popcountl... " >&6; }
|
||||
if ${pgac_cv_popcountl+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
pgac_save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$pgac_save_CFLAGS -mpopcnt"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_popcountl(255);
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv_popcountl=yes
|
||||
else
|
||||
pgac_cv_popcountl=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
CFLAGS="$pgac_save_CFLAGS"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcountl" >&5
|
||||
$as_echo "$pgac_cv_popcountl" >&6; }
|
||||
if test x"$pgac_cv_popcountl" = x"yes"; then
|
||||
CFLAGS_POPCNT="-mpopcnt"
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_POPCOUNTL 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
|
||||
$as_echo_n "checking for __builtin_ctz... " >&6; }
|
||||
if ${pgac_cv__builtin_ctz+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_ctz(256);
|
||||
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv__builtin_ctz=yes
|
||||
else
|
||||
pgac_cv__builtin_ctz=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5
|
||||
$as_echo "$pgac_cv__builtin_ctz" >&6; }
|
||||
if test x"$pgac_cv__builtin_ctz" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5
|
||||
$as_echo_n "checking for __builtin_ctzl... " >&6; }
|
||||
if ${pgac_cv__builtin_ctzl+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_ctzl(256);
|
||||
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv__builtin_ctzl=yes
|
||||
else
|
||||
pgac_cv__builtin_ctzl=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctzl" >&5
|
||||
$as_echo "$pgac_cv__builtin_ctzl" >&6; }
|
||||
if test x"$pgac_cv__builtin_ctzl" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_CTZL 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
|
||||
$as_echo_n "checking for __builtin_clz... " >&6; }
|
||||
if ${pgac_cv__builtin_clz+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_clz(256);
|
||||
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv__builtin_clz=yes
|
||||
else
|
||||
pgac_cv__builtin_clz=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
|
||||
$as_echo "$pgac_cv__builtin_clz" >&6; }
|
||||
if test x"$pgac_cv__builtin_clz" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5
|
||||
$as_echo_n "checking for __builtin_clzl... " >&6; }
|
||||
if ${pgac_cv__builtin_clzl+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
static int x = __builtin_clzl(256);
|
||||
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
pgac_cv__builtin_clzl=yes
|
||||
else
|
||||
pgac_cv__builtin_clzl=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzl" >&5
|
||||
$as_echo "$pgac_cv__builtin_clzl" >&6; }
|
||||
if test x"$pgac_cv__builtin_clzl" = xyes ; then
|
||||
|
||||
$as_echo "#define HAVE__BUILTIN_CLZL 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5
|
||||
$as_echo_n "checking for __builtin_unreachable... " >&6; }
|
||||
@ -14577,6 +14730,8 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# MSVC doesn't cope well with defining restrict to __restrict, the
|
||||
# spelling it understands, because it conflicts with
|
||||
# __declspec(restrict). Therefore we define pg_restrict to the
|
||||
|
@ -1489,6 +1489,12 @@ PGAC_C_BUILTIN_BSWAP16
|
||||
PGAC_C_BUILTIN_BSWAP32
|
||||
PGAC_C_BUILTIN_BSWAP64
|
||||
PGAC_C_BUILTIN_CONSTANT_P
|
||||
PGAC_C_BUILTIN_POPCOUNT
|
||||
PGAC_C_BUILTIN_POPCOUNTL
|
||||
PGAC_C_BUILTIN_CTZ
|
||||
PGAC_C_BUILTIN_CTZL
|
||||
PGAC_C_BUILTIN_CLZ
|
||||
PGAC_C_BUILTIN_CLZL
|
||||
PGAC_C_BUILTIN_UNREACHABLE
|
||||
PGAC_C_COMPUTED_GOTO
|
||||
PGAC_STRUCT_TIMEZONE
|
||||
@ -1503,6 +1509,8 @@ AC_TYPE_LONG_LONG_INT
|
||||
|
||||
PGAC_TYPE_LOCALE_T
|
||||
|
||||
AC_SUBST(CFLAGS_POPCNT)
|
||||
|
||||
# MSVC doesn't cope well with defining restrict to __restrict, the
|
||||
# spelling it understands, because it conflicts with
|
||||
# __declspec(restrict). Therefore we define pg_restrict to the
|
||||
|
@ -260,6 +260,7 @@ CXX = @CXX@
|
||||
CFLAGS = @CFLAGS@
|
||||
CFLAGS_VECTOR = @CFLAGS_VECTOR@
|
||||
CFLAGS_SSE42 = @CFLAGS_SSE42@
|
||||
CFLAGS_POPCNT = @CFLAGS_POPCNT@
|
||||
CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@
|
||||
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
|
@ -89,12 +89,12 @@
|
||||
#include "access/visibilitymap.h"
|
||||
#include "access/xlog.h"
|
||||
#include "miscadmin.h"
|
||||
#include "port/pg_bitutils.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/inval.h"
|
||||
|
||||
|
||||
/*#define TRACE_VISIBILITYMAP */
|
||||
|
||||
/*
|
||||
@ -115,43 +115,9 @@
|
||||
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
|
||||
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
|
||||
|
||||
/* tables for fast counting of set bits for visible and frozen */
|
||||
static const uint8 number_of_ones_for_visible[256] = {
|
||||
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
|
||||
};
|
||||
static const uint8 number_of_ones_for_frozen[256] = {
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
|
||||
};
|
||||
/* Masks for bit counting bits in the visibility map. */
|
||||
#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */
|
||||
#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */
|
||||
|
||||
/* prototypes for internal routines */
|
||||
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
|
||||
@ -408,18 +374,16 @@ void
|
||||
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
|
||||
{
|
||||
BlockNumber mapBlock;
|
||||
BlockNumber nvisible = 0;
|
||||
BlockNumber nfrozen = 0;
|
||||
|
||||
/* all_visible must be specified */
|
||||
Assert(all_visible);
|
||||
|
||||
*all_visible = 0;
|
||||
if (all_frozen)
|
||||
*all_frozen = 0;
|
||||
|
||||
for (mapBlock = 0;; mapBlock++)
|
||||
{
|
||||
Buffer mapBuffer;
|
||||
unsigned char *map;
|
||||
uint64 *map;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@ -436,17 +400,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
|
||||
* immediately stale anyway if anyone is concurrently setting or
|
||||
* clearing bits, and we only really need an approximate value.
|
||||
*/
|
||||
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
|
||||
map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
|
||||
|
||||
for (i = 0; i < MAPSIZE; i++)
|
||||
StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
|
||||
"unsupported MAPSIZE");
|
||||
if (all_frozen == NULL)
|
||||
{
|
||||
*all_visible += number_of_ones_for_visible[map[i]];
|
||||
if (all_frozen)
|
||||
*all_frozen += number_of_ones_for_frozen[map[i]];
|
||||
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
|
||||
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
|
||||
{
|
||||
nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
|
||||
nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseBuffer(mapBuffer);
|
||||
}
|
||||
|
||||
*all_visible = nvisible;
|
||||
if (all_frozen)
|
||||
*all_frozen = nfrozen;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -37,6 +37,7 @@
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "lib/bloomfilter.h"
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
#define MAX_HASH_FUNCS 10
|
||||
|
||||
@ -187,19 +188,7 @@ double
|
||||
bloom_prop_bits_set(bloom_filter *filter)
|
||||
{
|
||||
int bitset_bytes = filter->m / BITS_PER_BYTE;
|
||||
uint64 bits_set = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bitset_bytes; i++)
|
||||
{
|
||||
unsigned char byte = filter->bitset[i];
|
||||
|
||||
while (byte)
|
||||
{
|
||||
bits_set++;
|
||||
byte &= (byte - 1);
|
||||
}
|
||||
}
|
||||
uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
|
||||
|
||||
return bits_set / (double) filter->m;
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
|
||||
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
|
||||
@ -51,79 +52,23 @@
|
||||
|
||||
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
|
||||
|
||||
/* Set the bitwise macro version we must use based on the bitmapword size */
|
||||
#if BITS_PER_BITMAPWORD == 32
|
||||
|
||||
/*
|
||||
* Lookup tables to avoid need for bit-by-bit groveling
|
||||
*
|
||||
* rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
|
||||
* in a nonzero byte value x. The entry for x=0 is never used.
|
||||
*
|
||||
* leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
|
||||
* nonzero byte value x. The entry for x=0 is never used.
|
||||
*
|
||||
* number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
|
||||
*
|
||||
* We could make these tables larger and reduce the number of iterations
|
||||
* in the functions that use them, but bytewise shifts and masks are
|
||||
* especially fast on many machines, so working a byte at a time seems best.
|
||||
*/
|
||||
#define bmw_popcount(w) pg_popcount32(w)
|
||||
#define bmw_rightmost_one(w) pg_rightmost_one32(w)
|
||||
#define bmw_leftmost_one(w) pg_leftmost_one32(w)
|
||||
|
||||
static const uint8 rightmost_one_pos[256] = {
|
||||
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
|
||||
};
|
||||
#elif BITS_PER_BITMAPWORD == 64
|
||||
|
||||
static const uint8 leftmost_one_pos[256] = {
|
||||
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
};
|
||||
#define bmw_popcount(w) pg_popcount64(w)
|
||||
#define bmw_rightmost_one(w) pg_rightmost_one64(w)
|
||||
#define bmw_leftmost_one(w) pg_leftmost_one64(w)
|
||||
|
||||
#else
|
||||
#error "invalid BITS_PER_BITMAPWORD"
|
||||
#endif
|
||||
|
||||
static const uint8 number_of_ones[256] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
@ -607,12 +552,7 @@ bms_singleton_member(const Bitmapset *a)
|
||||
if (result >= 0 || HAS_MULTIPLE_ONES(w))
|
||||
elog(ERROR, "bitmapset has multiple members");
|
||||
result = wordnum * BITS_PER_BITMAPWORD;
|
||||
while ((w & 255) == 0)
|
||||
{
|
||||
w >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[w & 255];
|
||||
result += bmw_rightmost_one(w);
|
||||
}
|
||||
}
|
||||
if (result < 0)
|
||||
@ -650,12 +590,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
|
||||
if (result >= 0 || HAS_MULTIPLE_ONES(w))
|
||||
return false;
|
||||
result = wordnum * BITS_PER_BITMAPWORD;
|
||||
while ((w & 255) == 0)
|
||||
{
|
||||
w >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[w & 255];
|
||||
result += bmw_rightmost_one(w);
|
||||
}
|
||||
}
|
||||
if (result < 0)
|
||||
@ -681,12 +616,9 @@ bms_num_members(const Bitmapset *a)
|
||||
{
|
||||
bitmapword w = a->words[wordnum];
|
||||
|
||||
/* we assume here that bitmapword is an unsigned type */
|
||||
while (w != 0)
|
||||
{
|
||||
result += number_of_ones[w & 255];
|
||||
w >>= 8;
|
||||
}
|
||||
/* No need to count the bits in a zero word */
|
||||
if (w != 0)
|
||||
result += bmw_popcount(w);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -1041,12 +973,7 @@ bms_first_member(Bitmapset *a)
|
||||
a->words[wordnum] &= ~w;
|
||||
|
||||
result = wordnum * BITS_PER_BITMAPWORD;
|
||||
while ((w & 255) == 0)
|
||||
{
|
||||
w >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[w & 255];
|
||||
result += bmw_rightmost_one(w);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@ -1096,12 +1023,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
|
||||
int result;
|
||||
|
||||
result = wordnum * BITS_PER_BITMAPWORD;
|
||||
while ((w & 255) == 0)
|
||||
{
|
||||
w >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[w & 255];
|
||||
result += bmw_rightmost_one(w);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1168,14 +1090,9 @@ bms_prev_member(const Bitmapset *a, int prevbit)
|
||||
if (w != 0)
|
||||
{
|
||||
int result;
|
||||
int shift = BITS_PER_BITMAPWORD - 8;
|
||||
|
||||
result = wordnum * BITS_PER_BITMAPWORD;
|
||||
|
||||
while ((w >> shift) == 0)
|
||||
shift -= 8;
|
||||
|
||||
result += shift + leftmost_one_pos[(w >> shift) & 255];
|
||||
result += bmw_leftmost_one(w);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -754,6 +754,24 @@
|
||||
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
|
||||
#undef HAVE__BUILTIN_OP_OVERFLOW
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_popcount. */
|
||||
#undef HAVE__BUILTIN_POPCOUNT
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_popcountl. */
|
||||
#undef HAVE__BUILTIN_POPCOUNTL
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_ctz. */
|
||||
#undef HAVE__BUILTIN_CTZ
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_ctzl. */
|
||||
#undef HAVE__BUILTIN_CTZL
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_clz. */
|
||||
#undef HAVE__BUILTIN_CLZ
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_clzl. */
|
||||
#undef HAVE__BUILTIN_CLZL
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
|
||||
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
|
||||
|
||||
|
@ -593,6 +593,24 @@
|
||||
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
|
||||
/* #undef HAVE__BUILTIN_OP_OVERFLOW */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_popcount. */
|
||||
/* #undef HAVE__BUILTIN_POPCOUNT */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_popcountl. */
|
||||
/* #undef HAVE__BUILTIN_POPCOUNTL */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_ctz. */
|
||||
/* #undef HAVE__BUILTIN_CTZ */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_ctzl. */
|
||||
/* #undef HAVE__BUILTIN_CTZL */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_clz. */
|
||||
/* #undef HAVE__BUILTIN_CLZ */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_clzl. */
|
||||
/* #undef HAVE__BUILTIN_CLZL */
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
|
||||
/* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
|
||||
|
||||
|
26
src/include/port/pg_bitutils.h
Normal file
26
src/include/port/pg_bitutils.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*------------------------------------------------------------------------ -
|
||||
*
|
||||
* pg_bitutils.h
|
||||
* miscellaneous functions for bit-wise operations.
|
||||
*
|
||||
*
|
||||
* Portions Copyright(c) 2019, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/include/port/pg_bitutils.h
|
||||
*
|
||||
*------------------------------------------------------------------------ -
|
||||
*/
|
||||
|
||||
#ifndef PG_BITUTILS_H
|
||||
#define PG_BITUTILS_H
|
||||
|
||||
extern int (*pg_popcount32) (uint32 word);
|
||||
extern int (*pg_popcount64) (uint64 word);
|
||||
extern int (*pg_rightmost_one32) (uint32 word);
|
||||
extern int (*pg_rightmost_one64) (uint64 word);
|
||||
extern int (*pg_leftmost_one32) (uint32 word);
|
||||
extern int (*pg_leftmost_one64) (uint64 word);
|
||||
|
||||
extern uint64 pg_popcount(const char *buf, int bytes);
|
||||
|
||||
#endif /* PG_BITUTILS_H */
|
@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
|
||||
LIBS += $(PTHREAD_LIBS)
|
||||
|
||||
OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
|
||||
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
|
||||
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
|
||||
pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \
|
||||
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
|
||||
tar.o thread.o
|
||||
@ -78,6 +78,9 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
|
||||
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
|
||||
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
|
||||
|
||||
# pg_bitutils.c needs CFLAGS_POPCNT
|
||||
pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT)
|
||||
|
||||
#
|
||||
# Shared library versions of object files
|
||||
#
|
||||
|
516
src/port/pg_bitutils.c
Normal file
516
src/port/pg_bitutils.c
Normal file
@ -0,0 +1,516 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_bitutils.c
|
||||
* miscellaneous functions for bit-wise operations.
|
||||
*
|
||||
* Portions Copyright (c) 2019, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/port/pg_bitutils.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#ifdef HAVE__GET_CPUID
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE__CPUID
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL))
|
||||
static bool pg_popcount_available(void);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
|
||||
static int pg_popcount32_choose(uint32 word);
|
||||
static int pg_popcount32_sse42(uint32 word);
|
||||
#endif
|
||||
static int pg_popcount32_slow(uint32 word);
|
||||
|
||||
#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID)
|
||||
static int pg_popcount64_choose(uint64 word);
|
||||
static int pg_popcount64_sse42(uint64 word);
|
||||
#endif
|
||||
static int pg_popcount64_slow(uint64 word);
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL))
|
||||
static bool pg_lzcnt_available(void);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
|
||||
static int pg_rightmost_one32_choose(uint32 word);
|
||||
static int pg_rightmost_one32_abm(uint32 word);
|
||||
#endif
|
||||
static int pg_rightmost_one32_slow(uint32 word);
|
||||
|
||||
#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID)
|
||||
static int pg_rightmost_one64_choose(uint64 word);
|
||||
static int pg_rightmost_one64_abm(uint64 word);
|
||||
#endif
|
||||
static int pg_rightmost_one64_slow(uint64 word);
|
||||
|
||||
#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
|
||||
static int pg_leftmost_one32_choose(uint32 word);
|
||||
static int pg_leftmost_one32_abm(uint32 word);
|
||||
#endif
|
||||
static int pg_leftmost_one32_slow(uint32 word);
|
||||
|
||||
#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID)
|
||||
static int pg_leftmost_one64_choose(uint64 word);
|
||||
static int pg_leftmost_one64_abm(uint64 word);
|
||||
#endif
|
||||
static int pg_leftmost_one64_slow(uint64 word);
|
||||
|
||||
#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
|
||||
#else
|
||||
int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
|
||||
#else
|
||||
int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose;
|
||||
#else
|
||||
int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose;
|
||||
#else
|
||||
int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose;
|
||||
#else
|
||||
int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID)
|
||||
int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose;
|
||||
#else
|
||||
int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow;
|
||||
#endif
|
||||
|
||||
|
||||
/* Array marking the number of 1-bits for each value of 0-255. */
|
||||
static const uint8 number_of_ones[256] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
/*
|
||||
* Array marking the position of the right-most set bit for each value of
|
||||
* 1-255. We count the right-most position as the 0th bit, and the
|
||||
* left-most the 7th bit. The 0th index of the array must not be used.
|
||||
*/
|
||||
static const uint8 rightmost_one_pos[256] = {
|
||||
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
|
||||
};
|
||||
|
||||
/*
|
||||
* Array marking the position of the left-most set bit for each value of
|
||||
* 1-255. We count the right-most position as the 0th bit, and the
|
||||
* left-most the 7th bit. The 0th index of the array must not be used.
|
||||
*/
|
||||
static const uint8 leftmost_one_pos[256] = {
|
||||
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
};
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL))
|
||||
|
||||
static bool
|
||||
pg_popcount_available(void)
|
||||
{
|
||||
unsigned int exx[4] = { 0, 0, 0, 0 };
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
__cpuid(exx, 1);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
|
||||
return (exx[2] & (1 << 23)) != 0; /* POPCNT */
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT)
|
||||
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_popcount32_choose(uint32 word)
|
||||
{
|
||||
if (pg_popcount_available())
|
||||
pg_popcount32 = pg_popcount32_sse42;
|
||||
else
|
||||
pg_popcount32 = pg_popcount32_slow;
|
||||
|
||||
return pg_popcount32(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_popcount32_sse42(uint32 word)
|
||||
{
|
||||
return __builtin_popcount(word);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_popcount32_slow
|
||||
* Return the number of 1 bits set in word
|
||||
*/
|
||||
static int
|
||||
pg_popcount32_slow(uint32 word)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
while (word != 0)
|
||||
{
|
||||
result += number_of_ones[word & 255];
|
||||
word >>= 8;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_popcount
|
||||
* Returns the number of 1-bits in buf
|
||||
*/
|
||||
uint64
|
||||
pg_popcount(const char *buf, int bytes)
|
||||
{
|
||||
uint64 popcnt = 0;
|
||||
|
||||
#if SIZEOF_VOID_P >= 8
|
||||
/* Process in 64-bit chunks if the buffer is aligned. */
|
||||
if (buf == (char *) TYPEALIGN(8, buf))
|
||||
{
|
||||
uint64 *words = (uint64 *) buf;
|
||||
|
||||
while (bytes >= 8)
|
||||
{
|
||||
popcnt += pg_popcount64(*words++);
|
||||
bytes -= 8;
|
||||
}
|
||||
|
||||
buf = (char *) words;
|
||||
}
|
||||
#else
|
||||
/* Process in 32-bit chunks if the buffer is aligned. */
|
||||
if (buf == (char *) TYPEALIGN(4, buf))
|
||||
{
|
||||
uint32 *words = (uint32 *) buf;
|
||||
|
||||
while (bytes >= 4)
|
||||
{
|
||||
popcnt += pg_popcount32(*words++);
|
||||
bytes -= 4;
|
||||
}
|
||||
|
||||
buf = (char *) words;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Process any remaining bytes */
|
||||
while (bytes--)
|
||||
popcnt += number_of_ones[(unsigned char) *buf++];
|
||||
|
||||
return popcnt;
|
||||
}
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNTL)
|
||||
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_popcount64_choose(uint64 word)
|
||||
{
|
||||
if (pg_popcount_available())
|
||||
pg_popcount64 = pg_popcount64_sse42;
|
||||
else
|
||||
pg_popcount64 = pg_popcount64_slow;
|
||||
|
||||
return pg_popcount64(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_popcount64_sse42(uint64 word)
|
||||
{
|
||||
return __builtin_popcountl(word);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_popcount64_slow
|
||||
* Return the number of 1 bits set in word
|
||||
*/
|
||||
static int
|
||||
pg_popcount64_slow(uint64 word)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
while (word != 0)
|
||||
{
|
||||
result += number_of_ones[word & 255];
|
||||
word >>= 8;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL))
|
||||
|
||||
static bool
|
||||
pg_lzcnt_available(void)
|
||||
{
|
||||
|
||||
unsigned int exx[4] = { 0, 0, 0, 0 };
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
__cpuid(exx, 0x80000001);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
|
||||
return (exx[2] & (1 << 5)) != 0; /* LZCNT */
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ)
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_rightmost_one32_choose(uint32 word)
|
||||
{
|
||||
if (pg_lzcnt_available())
|
||||
pg_rightmost_one32 = pg_rightmost_one32_abm;
|
||||
else
|
||||
pg_rightmost_one32 = pg_rightmost_one32_slow;
|
||||
|
||||
return pg_rightmost_one32(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_rightmost_one32_abm(uint32 word)
|
||||
{
|
||||
return __builtin_ctz(word);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_rightmost_one32_slow
|
||||
* Returns the number of trailing 0-bits in word, starting at the least
|
||||
* significant bit position. word must not be 0.
|
||||
*/
|
||||
static int
|
||||
pg_rightmost_one32_slow(uint32 word)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
Assert(word != 0);
|
||||
|
||||
while ((word & 255) == 0)
|
||||
{
|
||||
word >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[word & 255];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZL)
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_rightmost_one64_choose(uint64 word)
|
||||
{
|
||||
if (pg_lzcnt_available())
|
||||
pg_rightmost_one64 = pg_rightmost_one64_abm;
|
||||
else
|
||||
pg_rightmost_one64 = pg_rightmost_one64_slow;
|
||||
|
||||
return pg_rightmost_one64(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_rightmost_one64_abm(uint64 word)
|
||||
{
|
||||
return __builtin_ctzl(word);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_rightmost_one64_slow
|
||||
* Returns the number of trailing 0-bits in word, starting at the least
|
||||
* significant bit position. word must not be 0.
|
||||
*/
|
||||
static int
|
||||
pg_rightmost_one64_slow(uint64 word)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
Assert(word != 0);
|
||||
|
||||
while ((word & 255) == 0)
|
||||
{
|
||||
word >>= 8;
|
||||
result += 8;
|
||||
}
|
||||
result += rightmost_one_pos[word & 255];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ)
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_leftmost_one32_choose(uint32 word)
|
||||
{
|
||||
if (pg_lzcnt_available())
|
||||
pg_leftmost_one32 = pg_leftmost_one32_abm;
|
||||
else
|
||||
pg_leftmost_one32 = pg_leftmost_one32_slow;
|
||||
|
||||
return pg_leftmost_one32(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_leftmost_one32_abm(uint32 word)
|
||||
{
|
||||
return 31 - __builtin_clz(word);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_leftmost_one32_slow
|
||||
* Returns the 0-based position of the most significant set bit in word
|
||||
* measured from the least significant bit. word must not be 0.
|
||||
*/
|
||||
static int
|
||||
pg_leftmost_one32_slow(uint32 word)
|
||||
{
|
||||
int shift = 32 - 8;
|
||||
|
||||
Assert(word != 0);
|
||||
|
||||
while ((word >> shift) == 0)
|
||||
shift -= 8;
|
||||
|
||||
return shift + leftmost_one_pos[(word >> shift) & 255];
|
||||
}
|
||||
|
||||
#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZL)
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static int
|
||||
pg_leftmost_one64_choose(uint64 word)
|
||||
{
|
||||
if (pg_lzcnt_available())
|
||||
pg_leftmost_one64 = pg_leftmost_one64_abm;
|
||||
else
|
||||
pg_leftmost_one64 = pg_leftmost_one64_slow;
|
||||
|
||||
return pg_leftmost_one64(word);
|
||||
}
|
||||
|
||||
static int
|
||||
pg_leftmost_one64_abm(uint64 word)
|
||||
{
|
||||
return 63 - __builtin_clzl(word);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_leftmost_one64_slow
|
||||
* Returns the 0-based position of the most significant set bit in word
|
||||
* measured from the least significant bit. word must not be 0.
|
||||
*/
|
||||
static int
|
||||
pg_leftmost_one64_slow(uint64 word)
|
||||
{
|
||||
int shift = 64 - 8;
|
||||
|
||||
Assert(word != 0);
|
||||
|
||||
while ((word >> shift) == 0)
|
||||
shift -= 8;
|
||||
|
||||
return shift + leftmost_one_pos[(word >> shift) & 255];
|
||||
}
|
@ -112,6 +112,7 @@ sub mkvcbuild
|
||||
push(@pgportfiles, 'pg_crc32c_sse42_choose.c');
|
||||
push(@pgportfiles, 'pg_crc32c_sse42.c');
|
||||
push(@pgportfiles, 'pg_crc32c_sb8.c');
|
||||
push(@pgportfiles, 'pg_bitutils.c');
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user