diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index af2dea1c2a..7cdcaee0b2 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -378,6 +378,122 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW +# PGAC_C_BUILTIN_POPCOUNT +# ------------------------- +AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl +AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -mpopcnt" +AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_popcount(255);])], +[Ac_cachevar=yes], +[Ac_cachevar=no]) +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + CFLAGS_POPCNT="-mpopcnt" +AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1, + [Define to 1 if your compiler understands __builtin_popcount.]) +fi +undefine([Ac_cachevar])dnl +])# PGAC_C_BUILTIN_POPCOUNT + + + +# PGAC_C_BUILTIN_POPCOUNTL +# ------------------------- +AC_DEFUN([PGAC_C_BUILTIN_POPCOUNTL], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcountl])])dnl +AC_CACHE_CHECK([for __builtin_popcountl], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -mpopcnt" +AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_popcountl(255);])], +[Ac_cachevar=yes], +[Ac_cachevar=no]) +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + CFLAGS_POPCNT="-mpopcnt" +AC_DEFINE(HAVE__BUILTIN_POPCOUNTL, 1, + [Define to 1 if your compiler understands __builtin_popcountl.]) +fi +undefine([Ac_cachevar])dnl +])# PGAC_C_BUILTIN_POPCOUNTL + + + +# PGAC_C_BUILTIN_CTZ +# ------------------------- +# Check if the C compiler understands __builtin_ctz(), +# and define HAVE__BUILTIN_CTZ if so. +AC_DEFUN([PGAC_C_BUILTIN_CTZ], +[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_ctz(256);] +)], +[pgac_cv__builtin_ctz=yes], +[pgac_cv__builtin_ctz=no])]) +if test x"$pgac_cv__builtin_ctz" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CTZ, 1, + [Define to 1 if your compiler understands __builtin_ctz.]) +fi])# PGAC_C_BUILTIN_CTZ + + + +# PGAC_C_BUILTIN_CTZL +# ------------------------- +# Check if the C compiler understands __builtin_ctzl(), +# and define HAVE__BUILTIN_CTZL if so. +AC_DEFUN([PGAC_C_BUILTIN_CTZL], +[AC_CACHE_CHECK(for __builtin_ctzl, pgac_cv__builtin_ctzl, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_ctzl(256);] +)], +[pgac_cv__builtin_ctzl=yes], +[pgac_cv__builtin_ctzl=no])]) +if test x"$pgac_cv__builtin_ctzl" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CTZL, 1, + [Define to 1 if your compiler understands __builtin_ctzl.]) +fi])# PGAC_C_BUILTIN_CTZL + + + +# PGAC_C_BUILTIN_CLZ +# ------------------------- +# Check if the C compiler understands __builtin_clz(), +# and define HAVE__BUILTIN_CLZ if so. +AC_DEFUN([PGAC_C_BUILTIN_CLZ], +[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_clz(256);] +)], +[pgac_cv__builtin_clz=yes], +[pgac_cv__builtin_clz=no])]) +if test x"$pgac_cv__builtin_clz" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CLZ, 1, + [Define to 1 if your compiler understands __builtin_clz.]) +fi])# PGAC_C_BUILTIN_CLZ + + + +# PGAC_C_BUILTIN_CLZL +# ------------------------- +# Check if the C compiler understands __builtin_clzl(), +# and define HAVE__BUILTIN_CLZL if so. +AC_DEFUN([PGAC_C_BUILTIN_CLZL], +[AC_CACHE_CHECK(for __builtin_clzl, pgac_cv__builtin_clzl, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_clzl(256);] +)], +[pgac_cv__builtin_clzl=yes], +[pgac_cv__builtin_clzl=no])]) +if test x"$pgac_cv__builtin_clzl" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CLZL, 1, + [Define to 1 if your compiler understands __builtin_clzl.]) +fi])# PGAC_C_BUILTIN_CLZL + + + # PGAC_C_BUILTIN_UNREACHABLE # -------------------------- # Check if the C compiler understands __builtin_unreachable(), diff --git a/configure b/configure index 7291311ae3..c26b109df8 100755 --- a/configure +++ b/configure @@ -651,6 +651,7 @@ CFLAGS_ARMV8_CRC32C CFLAGS_SSE42 have_win32_dbghelp LIBOBJS +CFLAGS_POPCNT UUID_LIBS LDAP_LIBS_BE LDAP_LIBS_FE @@ -14059,6 +14060,158 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then $as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 +$as_echo_n "checking for __builtin_popcount... " >&6; } +if ${pgac_cv_popcount+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -mpopcnt" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_popcount(255); +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_popcount=yes +else + pgac_cv_popcount=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5 +$as_echo "$pgac_cv_popcount" >&6; } +if test x"$pgac_cv_popcount" = x"yes"; then + CFLAGS_POPCNT="-mpopcnt" + +$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcountl" >&5 +$as_echo_n "checking for __builtin_popcountl... " >&6; } +if ${pgac_cv_popcountl+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -mpopcnt" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_popcountl(255); +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_popcountl=yes +else + pgac_cv_popcountl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcountl" >&5 +$as_echo "$pgac_cv_popcountl" >&6; } +if test x"$pgac_cv_popcountl" = x"yes"; then + CFLAGS_POPCNT="-mpopcnt" + +$as_echo "#define HAVE__BUILTIN_POPCOUNTL 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5 +$as_echo_n "checking for __builtin_ctz... " >&6; } +if ${pgac_cv__builtin_ctz+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_ctz(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_ctz=yes +else + pgac_cv__builtin_ctz=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5 +$as_echo "$pgac_cv__builtin_ctz" >&6; } +if test x"$pgac_cv__builtin_ctz" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5 +$as_echo_n "checking for __builtin_ctzl... " >&6; } +if ${pgac_cv__builtin_ctzl+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_ctzl(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_ctzl=yes +else + pgac_cv__builtin_ctzl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctzl" >&5 +$as_echo "$pgac_cv__builtin_ctzl" >&6; } +if test x"$pgac_cv__builtin_ctzl" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CTZL 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5 +$as_echo_n "checking for __builtin_clz... " >&6; } +if ${pgac_cv__builtin_clz+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_clz(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_clz=yes +else + pgac_cv__builtin_clz=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5 +$as_echo "$pgac_cv__builtin_clz" >&6; } +if test x"$pgac_cv__builtin_clz" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5 +$as_echo_n "checking for __builtin_clzl... " >&6; } +if ${pgac_cv__builtin_clzl+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_clzl(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_clzl=yes +else + pgac_cv__builtin_clzl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzl" >&5 +$as_echo "$pgac_cv__builtin_clzl" >&6; } +if test x"$pgac_cv__builtin_clzl" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CLZL 1" >>confdefs.h + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5 $as_echo_n "checking for __builtin_unreachable... " >&6; } @@ -14577,6 +14730,8 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h fi + + # MSVC doesn't cope well with defining restrict to __restrict, the # spelling it understands, because it conflicts with # __declspec(restrict). Therefore we define pg_restrict to the diff --git a/configure.in b/configure.in index 8a55e8e8a5..702dfed024 100644 --- a/configure.in +++ b/configure.in @@ -1489,6 +1489,12 @@ PGAC_C_BUILTIN_BSWAP16 PGAC_C_BUILTIN_BSWAP32 PGAC_C_BUILTIN_BSWAP64 PGAC_C_BUILTIN_CONSTANT_P +PGAC_C_BUILTIN_POPCOUNT +PGAC_C_BUILTIN_POPCOUNTL +PGAC_C_BUILTIN_CTZ +PGAC_C_BUILTIN_CTZL +PGAC_C_BUILTIN_CLZ +PGAC_C_BUILTIN_CLZL PGAC_C_BUILTIN_UNREACHABLE PGAC_C_COMPUTED_GOTO PGAC_STRUCT_TIMEZONE @@ -1503,6 +1509,8 @@ AC_TYPE_LONG_LONG_INT PGAC_TYPE_LOCALE_T +AC_SUBST(CFLAGS_POPCNT) + # MSVC doesn't cope well with defining restrict to __restrict, the # spelling it understands, because it conflicts with # __declspec(restrict). Therefore we define pg_restrict to the diff --git a/src/Makefile.global.in b/src/Makefile.global.in index c118f64040..aa16da3e0f 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -260,6 +260,7 @@ CXX = @CXX@ CFLAGS = @CFLAGS@ CFLAGS_VECTOR = @CFLAGS_VECTOR@ CFLAGS_SSE42 = @CFLAGS_SSE42@ +CFLAGS_POPCNT = @CFLAGS_POPCNT@ CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@ PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@ CXXFLAGS = @CXXFLAGS@ diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 931ae81fd6..9657cd0a63 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -89,12 +89,12 @@ #include "access/visibilitymap.h" #include "access/xlog.h" #include "miscadmin.h" +#include "port/pg_bitutils.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/smgr.h" #include "utils/inval.h" - /*#define TRACE_VISIBILITYMAP */ /* @@ -115,43 +115,9 @@ #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) -/* tables for fast counting of set bits for visible and frozen */ -static const uint8 number_of_ones_for_visible[256] = { - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4 -}; -static const uint8 number_of_ones_for_frozen[256] = { - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4 -}; +/* Masks for bit counting bits in the visibility map. */ +#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */ +#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */ /* prototypes for internal routines */ static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); @@ -408,18 +374,16 @@ void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen) { BlockNumber mapBlock; + BlockNumber nvisible = 0; + BlockNumber nfrozen = 0; /* all_visible must be specified */ Assert(all_visible); - *all_visible = 0; - if (all_frozen) - *all_frozen = 0; - for (mapBlock = 0;; mapBlock++) { Buffer mapBuffer; - unsigned char *map; + uint64 *map; int i; /* @@ -436,17 +400,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro * immediately stale anyway if anyone is concurrently setting or * clearing bits, and we only really need an approximate value. */ - map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); + map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer)); - for (i = 0; i < MAPSIZE; i++) + StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0, + "unsupported MAPSIZE"); + if (all_frozen == NULL) { - *all_visible += number_of_ones_for_visible[map[i]]; - if (all_frozen) - *all_frozen += number_of_ones_for_frozen[map[i]]; + for (i = 0; i < MAPSIZE / sizeof(uint64); i++) + nvisible += pg_popcount64(map[i] & VISIBLE_MASK64); + } + else + { + for (i = 0; i < MAPSIZE / sizeof(uint64); i++) + { + nvisible += pg_popcount64(map[i] & VISIBLE_MASK64); + nfrozen += pg_popcount64(map[i] & FROZEN_MASK64); + } } ReleaseBuffer(mapBuffer); } + + *all_visible = nvisible; + if (all_frozen) + *all_frozen = nfrozen; } /* diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c index 1e907cabc6..e2c1276f21 100644 --- a/src/backend/lib/bloomfilter.c +++ b/src/backend/lib/bloomfilter.c @@ -37,6 +37,7 @@ #include "access/hash.h" #include "lib/bloomfilter.h" +#include "port/pg_bitutils.h" #define MAX_HASH_FUNCS 10 @@ -187,19 +188,7 @@ double bloom_prop_bits_set(bloom_filter *filter) { int bitset_bytes = filter->m / BITS_PER_BYTE; - uint64 bits_set = 0; - int i; - - for (i = 0; i < bitset_bytes; i++) - { - unsigned char byte = filter->bitset[i]; - - while (byte) - { - bits_set++; - byte &= (byte - 1); - } - } + uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes); return bits_set / (double) filter->m; } diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 62cd00903c..d0380abf3e 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -22,6 +22,7 @@ #include "access/hash.h" #include "nodes/pg_list.h" +#include "port/pg_bitutils.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) @@ -51,79 +52,23 @@ #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) +/* Set the bitwise macro version we must use based on the bitmapword size */ +#if BITS_PER_BITMAPWORD == 32 -/* - * Lookup tables to avoid need for bit-by-bit groveling - * - * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit - * in a nonzero byte value x. The entry for x=0 is never used. - * - * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a - * nonzero byte value x. The entry for x=0 is never used. - * - * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x. - * - * We could make these tables larger and reduce the number of iterations - * in the functions that use them, but bytewise shifts and masks are - * especially fast on many machines, so working a byte at a time seems best. - */ +#define bmw_popcount(w) pg_popcount32(w) +#define bmw_rightmost_one(w) pg_rightmost_one32(w) +#define bmw_leftmost_one(w) pg_leftmost_one32(w) -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; +#elif BITS_PER_BITMAPWORD == 64 -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; +#define bmw_popcount(w) pg_popcount64(w) +#define bmw_rightmost_one(w) pg_rightmost_one64(w) +#define bmw_leftmost_one(w) pg_leftmost_one64(w) + +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; /* @@ -607,12 +552,7 @@ bms_singleton_member(const Bitmapset *a) if (result >= 0 || HAS_MULTIPLE_ONES(w)) elog(ERROR, "bitmapset has multiple members"); result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one(w); } } if (result < 0) @@ -650,12 +590,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member) if (result >= 0 || HAS_MULTIPLE_ONES(w)) return false; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one(w); } } if (result < 0) @@ -681,12 +616,9 @@ bms_num_members(const Bitmapset *a) { bitmapword w = a->words[wordnum]; - /* we assume here that bitmapword is an unsigned type */ - while (w != 0) - { - result += number_of_ones[w & 255]; - w >>= 8; - } + /* No need to count the bits in a zero word */ + if (w != 0) + result += bmw_popcount(w); } return result; } @@ -1041,12 +973,7 @@ bms_first_member(Bitmapset *a) a->words[wordnum] &= ~w; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one(w); return result; } } @@ -1096,12 +1023,7 @@ bms_next_member(const Bitmapset *a, int prevbit) int result; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one(w); return result; } @@ -1168,14 +1090,9 @@ bms_prev_member(const Bitmapset *a, int prevbit) if (w != 0) { int result; - int shift = BITS_PER_BITMAPWORD - 8; result = wordnum * BITS_PER_BITMAPWORD; - - while ((w >> shift) == 0) - shift -= 8; - - result += shift + leftmost_one_pos[(w >> shift) & 255]; + result += bmw_leftmost_one(w); return result; } diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index b38b0ae189..53a11f2ab8 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -754,6 +754,24 @@ /* Define to 1 if your compiler understands __builtin_$op_overflow. */ #undef HAVE__BUILTIN_OP_OVERFLOW +/* Define to 1 if your compiler understands __builtin_popcount. */ +#undef HAVE__BUILTIN_POPCOUNT + +/* Define to 1 if your compiler understands __builtin_popcountl. */ +#undef HAVE__BUILTIN_POPCOUNTL + +/* Define to 1 if your compiler understands __builtin_ctz. */ +#undef HAVE__BUILTIN_CTZ + +/* Define to 1 if your compiler understands __builtin_ctzl. */ +#undef HAVE__BUILTIN_CTZL + +/* Define to 1 if your compiler understands __builtin_clz. */ +#undef HAVE__BUILTIN_CLZ + +/* Define to 1 if your compiler understands __builtin_clzl. */ +#undef HAVE__BUILTIN_CLZL + /* Define to 1 if your compiler understands __builtin_types_compatible_p. */ #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 160fa1279e..e45db7a12e 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -593,6 +593,24 @@ /* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* #undef HAVE__BUILTIN_OP_OVERFLOW */ +/* Define to 1 if your compiler understands __builtin_popcount. */ +/* #undef HAVE__BUILTIN_POPCOUNT */ + +/* Define to 1 if your compiler understands __builtin_popcountl. */ +/* #undef HAVE__BUILTIN_POPCOUNTL */ + +/* Define to 1 if your compiler understands __builtin_ctz. */ +/* #undef HAVE__BUILTIN_CTZ */ + +/* Define to 1 if your compiler understands __builtin_ctzl. */ +/* #undef HAVE__BUILTIN_CTZL */ + +/* Define to 1 if your compiler understands __builtin_clz. */ +/* #undef HAVE__BUILTIN_CLZ */ + +/* Define to 1 if your compiler understands __builtin_clzl. */ +/* #undef HAVE__BUILTIN_CLZL */ + /* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */ diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h new file mode 100644 index 0000000000..148c555057 --- /dev/null +++ b/src/include/port/pg_bitutils.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------ - + * + * pg_bitutils.h + * miscellaneous functions for bit-wise operations. + * + * + * Portions Copyright(c) 2019, PostgreSQL Global Development Group + * + * src/include/port/pg_bitutils.h + * + *------------------------------------------------------------------------ - + */ + +#ifndef PG_BITUTILS_H +#define PG_BITUTILS_H + +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); +extern int (*pg_rightmost_one32) (uint32 word); +extern int (*pg_rightmost_one64) (uint64 word); +extern int (*pg_leftmost_one32) (uint32 word); +extern int (*pg_leftmost_one64) (uint64 word); + +extern uint64 pg_popcount(const char *buf, int bytes); + +#endif /* PG_BITUTILS_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 9cfc0f9279..94ccece371 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -36,7 +36,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ - noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \ + noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \ pg_strong_random.o pgstrcasecmp.o pgstrsignal.o pqsignal.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ tar.o thread.o @@ -78,6 +78,9 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) +# pg_bitutils.c needs CFLAGS_POPCNT +pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT) + # # Shared library versions of object files # diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c new file mode 100644 index 0000000000..97422e0504 --- /dev/null +++ b/src/port/pg_bitutils.c @@ -0,0 +1,516 @@ +/*------------------------------------------------------------------------- + * + * pg_bitutils.c + * miscellaneous functions for bit-wise operations. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_bitutils.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#ifdef HAVE__GET_CPUID +#include +#endif + +#ifdef HAVE__CPUID +#include +#endif + +#include "port/pg_bitutils.h" + +#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL)) +static bool pg_popcount_available(void); +#endif + +#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +static int pg_popcount32_choose(uint32 word); +static int pg_popcount32_sse42(uint32 word); +#endif +static int pg_popcount32_slow(uint32 word); + +#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID) +static int pg_popcount64_choose(uint64 word); +static int pg_popcount64_sse42(uint64 word); +#endif +static int pg_popcount64_slow(uint64 word); + +#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL)) +static bool pg_lzcnt_available(void); +#endif + +#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) +static int pg_rightmost_one32_choose(uint32 word); +static int pg_rightmost_one32_abm(uint32 word); +#endif +static int pg_rightmost_one32_slow(uint32 word); + +#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID) +static int pg_rightmost_one64_choose(uint64 word); +static int pg_rightmost_one64_abm(uint64 word); +#endif +static int pg_rightmost_one64_slow(uint64 word); + +#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) +static int pg_leftmost_one32_choose(uint32 word); +static int pg_leftmost_one32_abm(uint32 word); +#endif +static int pg_leftmost_one32_slow(uint32 word); + +#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID) +static int pg_leftmost_one64_choose(uint64 word); +static int pg_leftmost_one64_abm(uint64 word); +#endif +static int pg_leftmost_one64_slow(uint64 word); + +#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; +#else +int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; +#endif + +#if defined(HAVE__BUILTIN_POPCOUNTL) && defined(HAVE__GET_CPUID) +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; +#else +int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; +#endif + +#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) +int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; +#else +int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; +#endif + +#if defined(HAVE__BUILTIN_CTZL) && defined(HAVE__GET_CPUID) +int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; +#else +int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; +#endif + +#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) +int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; +#else +int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; +#endif + +#if defined(HAVE__BUILTIN_CLZL) && defined(HAVE__GET_CPUID) +int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; +#else +int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; +#endif + + +/* Array marking the number of 1-bits for each value of 0-255. */ +static const uint8 number_of_ones[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +/* + * Array marking the position of the right-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 rightmost_one_pos[256] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* + * Array marking the position of the left-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + +#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL)) + +static bool +pg_popcount_available(void) +{ + unsigned int exx[4] = { 0, 0, 0, 0 }; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 23)) != 0; /* POPCNT */ +} +#endif + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_popcount32_choose(uint32 word) +{ + if (pg_popcount_available()) + pg_popcount32 = pg_popcount32_sse42; + else + pg_popcount32 = pg_popcount32_slow; + + return pg_popcount32(word); +} + +static int +pg_popcount32_sse42(uint32 word) +{ + return __builtin_popcount(word); +} +#endif + +/* + * pg_popcount32_slow + * Return the number of 1 bits set in word + */ +static int +pg_popcount32_slow(uint32 word) +{ + int result = 0; + + while (word != 0) + { + result += number_of_ones[word & 255]; + word >>= 8; + } + + return result; +} + +/* + * pg_popcount + * Returns the number of 1-bits in buf + */ +uint64 +pg_popcount(const char *buf, int bytes) +{ + uint64 popcnt = 0; + +#if SIZEOF_VOID_P >= 8 + /* Process in 64-bit chunks if the buffer is aligned. */ + if (buf == (char *) TYPEALIGN(8, buf)) + { + uint64 *words = (uint64 *) buf; + + while (bytes >= 8) + { + popcnt += pg_popcount64(*words++); + bytes -= 8; + } + + buf = (char *) words; + } +#else + /* Process in 32-bit chunks if the buffer is aligned. */ + if (buf == (char *) TYPEALIGN(4, buf)) + { + uint32 *words = (uint32 *) buf; + + while (bytes >= 4) + { + popcnt += pg_popcount32(*words++); + bytes -= 4; + } + + buf = (char *) words; + } +#endif + + /* Process any remaining bytes */ + while (bytes--) + popcnt += number_of_ones[(unsigned char) *buf++]; + + return popcnt; +} + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNTL) + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_popcount64_choose(uint64 word) +{ + if (pg_popcount_available()) + pg_popcount64 = pg_popcount64_sse42; + else + pg_popcount64 = pg_popcount64_slow; + + return pg_popcount64(word); +} + +static int +pg_popcount64_sse42(uint64 word) +{ + return __builtin_popcountl(word); +} + +#endif + +/* + * pg_popcount64_slow + * Return the number of 1 bits set in word + */ +static int +pg_popcount64_slow(uint64 word) +{ + int result = 0; + + while (word != 0) + { + result += number_of_ones[word & 255]; + word >>= 8; + } + + return result; +} + +#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL)) + +static bool +pg_lzcnt_available(void) +{ + + unsigned int exx[4] = { 0, 0, 0, 0 }; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 0x80000001); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 5)) != 0; /* LZCNT */ +} +#endif + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_rightmost_one32_choose(uint32 word) +{ + if (pg_lzcnt_available()) + pg_rightmost_one32 = pg_rightmost_one32_abm; + else + pg_rightmost_one32 = pg_rightmost_one32_slow; + + return pg_rightmost_one32(word); +} + +static int +pg_rightmost_one32_abm(uint32 word) +{ + return __builtin_ctz(word); +} + +#endif + +/* + * pg_rightmost_one32_slow + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static int +pg_rightmost_one32_slow(uint32 word) +{ + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; + + return result; +} + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZL) +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_rightmost_one64_choose(uint64 word) +{ + if (pg_lzcnt_available()) + pg_rightmost_one64 = pg_rightmost_one64_abm; + else + pg_rightmost_one64 = pg_rightmost_one64_slow; + + return pg_rightmost_one64(word); +} + +static int +pg_rightmost_one64_abm(uint64 word) +{ + return __builtin_ctzl(word); +} +#endif + +/* + * pg_rightmost_one64_slow + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static int +pg_rightmost_one64_slow(uint64 word) +{ + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; + + return result; +} + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_leftmost_one32_choose(uint32 word) +{ + if (pg_lzcnt_available()) + pg_leftmost_one32 = pg_leftmost_one32_abm; + else + pg_leftmost_one32 = pg_leftmost_one32_slow; + + return pg_leftmost_one32(word); +} + +static int +pg_leftmost_one32_abm(uint32 word) +{ + return 31 - __builtin_clz(word); +} +#endif + +/* + * pg_leftmost_one32_slow + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static int +pg_leftmost_one32_slow(uint32 word) +{ + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +} + +#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZL) +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_leftmost_one64_choose(uint64 word) +{ + if (pg_lzcnt_available()) + pg_leftmost_one64 = pg_leftmost_one64_abm; + else + pg_leftmost_one64 = pg_leftmost_one64_slow; + + return pg_leftmost_one64(word); +} + +static int +pg_leftmost_one64_abm(uint64 word) +{ + return 63 - __builtin_clzl(word); +} +#endif + +/* + * pg_leftmost_one64_slow + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static int +pg_leftmost_one64_slow(uint64 word) +{ + int shift = 64 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 5251a21d34..f5effdb029 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -112,6 +112,7 @@ sub mkvcbuild push(@pgportfiles, 'pg_crc32c_sse42_choose.c'); push(@pgportfiles, 'pg_crc32c_sse42.c'); push(@pgportfiles, 'pg_crc32c_sb8.c'); + push(@pgportfiles, 'pg_bitutils.c'); } else {