Use __attribute__((target(...))) for AVX-512 support.

Presently, we check for compiler support for the required
intrinsics both with and without extra compiler flags (e.g.,
-mxsave), and then depending on the results of those checks, we
pick which files to compile with which flags.  This is tedious and
complicated, and it results in unsustainable coding patterns such
as separate files for each portion of code may need to be built
with different compiler flags.

This commit introduces support for __attribute__((target(...))) and
uses it for the AVX-512 code.  This simplifies both the
configure-time checks and the build scripts, and it allows us to
place the functions that use the intrinsics in files that we
otherwise do not want to build with special CPU instructions.  We
are careful to avoid using __attribute__((target(...))) on
compilers that do not understand it, but we still perform the
configure-time checks in case the compiler allows using the
intrinsics without it (e.g., MSVC).

A similar change could likely be made for some of the CRC-32C code,
but that is left as a future exercise.

Suggested-by: Andres Freund
Reviewed-by: Raghuveer Devulapalli, Andres Freund
Discussion: https://postgr.es/m/20240731205254.vfpap7uxwmebqeaf%40awork3.anarazel.de
This commit is contained in:
Nathan Bossart 2024-11-07 13:58:43 -06:00
parent f56a01ebdb
commit f78667bd91
11 changed files with 185 additions and 312 deletions

View File

@ -700,20 +700,22 @@ undefine([Ac_cachevar])dnl
# Check if the compiler supports the XSAVE instructions using the _xgetbv
# intrinsic function.
#
# An optional compiler flag can be passed as argument (e.g., -mxsave). If the
# intrinsic is supported, sets pgac_xsave_intrinsics and CFLAGS_XSAVE.
# If the intrinsics are supported, sets pgac_xsave_intrinsics.
AC_DEFUN([PGAC_XSAVE_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics_$1])])dnl
AC_CACHE_CHECK([for _xgetbv with CFLAGS=$1], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS $1"
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
[return _xgetbv(0) & 0xe0;])],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics])])dnl
AC_CACHE_CHECK([for _xgetbv], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("xsave")))
#endif
static int xsave_test(void)
{
return _xgetbv(0) & 0xe0;
}],
[return xsave_test();])],
[Ac_cachevar=yes],
[Ac_cachevar=no])
CFLAGS="$pgac_save_CFLAGS"])
[Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
CFLAGS_XSAVE="$1"
pgac_xsave_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
@ -725,29 +727,29 @@ undefine([Ac_cachevar])dnl
# _mm512_setzero_si512, _mm512_maskz_loadu_epi8, _mm512_popcnt_epi64,
# _mm512_add_epi64, and _mm512_reduce_add_epi64 intrinsic functions.
#
# Optional compiler flags can be passed as argument (e.g., -mavx512vpopcntdq
# -mavx512bw). If the intrinsics are supported, sets
# pgac_avx512_popcnt_intrinsics and CFLAGS_POPCNT.
# If the intrinsics are supported, sets pgac_avx512_popcnt_intrinsics.
AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics_$1])])dnl
AC_CACHE_CHECK([for _mm512_popcnt_epi64 with CFLAGS=$1], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS $1"
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
[const char buf@<:@sizeof(__m512i)@:>@;
PG_INT64_TYPE popcnt = 0;
__m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum);
/* return computed value, to prevent the above being optimized away */
return popcnt == 0;])],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl
AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar],
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq","avx512bw")))
#endif
static int popcount_test(void)
{
const char buf@<:@sizeof(__m512i)@:>@;
PG_INT64_TYPE popcnt = 0;
__m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum);
return (int) popcnt;
}],
[return popcount_test();])],
[Ac_cachevar=yes],
[Ac_cachevar=no])
CFLAGS="$pgac_save_CFLAGS"])
[Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
CFLAGS_POPCNT="$1"
pgac_avx512_popcnt_intrinsics=yes
fi
undefine([Ac_cachevar])dnl

167
configure vendored
View File

@ -647,9 +647,6 @@ MSGFMT_FLAGS
MSGFMT
PG_CRC32C_OBJS
CFLAGS_CRC
PG_POPCNT_OBJS
CFLAGS_POPCNT
CFLAGS_XSAVE
LIBOBJS
OPENSSL
ZSTD
@ -17272,185 +17269,103 @@ fi
# Check for XSAVE intrinsics
#
CFLAGS_XSAVE=""
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=" >&5
$as_echo_n "checking for _xgetbv with CFLAGS=... " >&6; }
if ${pgac_cv_xsave_intrinsics_+:} false; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv" >&5
$as_echo_n "checking for _xgetbv... " >&6; }
if ${pgac_cv_xsave_intrinsics+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS "
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("xsave")))
#endif
static int xsave_test(void)
{
return _xgetbv(0) & 0xe0;
}
int
main ()
{
return _xgetbv(0) & 0xe0;
return xsave_test();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_xsave_intrinsics_=yes
pgac_cv_xsave_intrinsics=yes
else
pgac_cv_xsave_intrinsics_=no
pgac_cv_xsave_intrinsics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics_" >&5
$as_echo "$pgac_cv_xsave_intrinsics_" >&6; }
if test x"$pgac_cv_xsave_intrinsics_" = x"yes"; then
CFLAGS_XSAVE=""
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics" >&5
$as_echo "$pgac_cv_xsave_intrinsics" >&6; }
if test x"$pgac_cv_xsave_intrinsics" = x"yes"; then
pgac_xsave_intrinsics=yes
fi
if test x"$pgac_xsave_intrinsics" != x"yes"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=-mxsave" >&5
$as_echo_n "checking for _xgetbv with CFLAGS=-mxsave... " >&6; }
if ${pgac_cv_xsave_intrinsics__mxsave+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mxsave"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
int
main ()
{
return _xgetbv(0) & 0xe0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_xsave_intrinsics__mxsave=yes
else
pgac_cv_xsave_intrinsics__mxsave=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics__mxsave" >&5
$as_echo "$pgac_cv_xsave_intrinsics__mxsave" >&6; }
if test x"$pgac_cv_xsave_intrinsics__mxsave" = x"yes"; then
CFLAGS_XSAVE="-mxsave"
pgac_xsave_intrinsics=yes
fi
fi
if test x"$pgac_xsave_intrinsics" = x"yes"; then
$as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
# Check for AVX-512 popcount intrinsics
#
CFLAGS_POPCNT=""
PG_POPCNT_OBJS=""
if test x"$host_cpu" = x"x86_64"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=" >&5
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=... " >&6; }
if ${pgac_cv_avx512_popcnt_intrinsics_+:} false; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64" >&5
$as_echo_n "checking for _mm512_popcnt_epi64... " >&6; }
if ${pgac_cv_avx512_popcnt_intrinsics+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS "
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq","avx512bw")))
#endif
static int popcount_test(void)
{
const char buf[sizeof(__m512i)];
PG_INT64_TYPE popcnt = 0;
__m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum);
return (int) popcnt;
}
int
main ()
{
const char buf[sizeof(__m512i)];
PG_INT64_TYPE popcnt = 0;
__m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum);
/* return computed value, to prevent the above being optimized away */
return popcnt == 0;
return popcount_test();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_avx512_popcnt_intrinsics_=yes
pgac_cv_avx512_popcnt_intrinsics=yes
else
pgac_cv_avx512_popcnt_intrinsics_=no
pgac_cv_avx512_popcnt_intrinsics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics_" >&5
$as_echo "$pgac_cv_avx512_popcnt_intrinsics_" >&6; }
if test x"$pgac_cv_avx512_popcnt_intrinsics_" = x"yes"; then
CFLAGS_POPCNT=""
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics" >&5
$as_echo "$pgac_cv_avx512_popcnt_intrinsics" >&6; }
if test x"$pgac_cv_avx512_popcnt_intrinsics" = x"yes"; then
pgac_avx512_popcnt_intrinsics=yes
fi
if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw" >&5
$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw... " >&6; }
if ${pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS -mavx512vpopcntdq -mavx512bw"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
int
main ()
{
const char buf[sizeof(__m512i)];
PG_INT64_TYPE popcnt = 0;
__m512i accum = _mm512_setzero_si512();
const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
const __m512i cnt = _mm512_popcnt_epi64(val);
accum = _mm512_add_epi64(accum, cnt);
popcnt = _mm512_reduce_add_epi64(accum);
/* return computed value, to prevent the above being optimized away */
return popcnt == 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=yes
else
pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
CFLAGS="$pgac_save_CFLAGS"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&5
$as_echo "$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&6; }
if test x"$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" = x"yes"; then
CFLAGS_POPCNT="-mavx512vpopcntdq -mavx512bw"
pgac_avx512_popcnt_intrinsics=yes
fi
fi
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
$as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
fi
fi
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used

View File

@ -2050,32 +2050,19 @@ fi
# Check for XSAVE intrinsics
#
CFLAGS_XSAVE=""
PGAC_XSAVE_INTRINSICS([])
if test x"$pgac_xsave_intrinsics" != x"yes"; then
PGAC_XSAVE_INTRINSICS([-mxsave])
fi
PGAC_XSAVE_INTRINSICS()
if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
AC_SUBST(CFLAGS_XSAVE)
# Check for AVX-512 popcount intrinsics
#
CFLAGS_POPCNT=""
PG_POPCNT_OBJS=""
if test x"$host_cpu" = x"x86_64"; then
PGAC_AVX512_POPCNT_INTRINSICS([])
if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512bw])
fi
PGAC_AVX512_POPCNT_INTRINSICS()
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
AC_DEFINE(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 popcount instructions with a runtime check.])
fi
fi
AC_SUBST(CFLAGS_POPCNT)
AC_SUBST(PG_POPCNT_OBJS)
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#

View File

@ -2153,25 +2153,22 @@ endforeach
# Check for the availability of XSAVE intrinsics.
###############################################################
cflags_xsave = []
if host_cpu == 'x86' or host_cpu == 'x86_64'
prog = '''
#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("xsave")))
#endif
int main(void)
{
return _xgetbv(0) & 0xe0;
}
'''
if cc.links(prog, name: 'XSAVE intrinsics without -mxsave',
args: test_c_args)
if cc.links(prog, name: 'XSAVE intrinsics', args: test_c_args)
cdata.set('HAVE_XSAVE_INTRINSICS', 1)
elif cc.links(prog, name: 'XSAVE intrinsics with -mxsave',
args: test_c_args + ['-mxsave'])
cdata.set('HAVE_XSAVE_INTRINSICS', 1)
cflags_xsave += '-mxsave'
endif
endif
@ -2181,12 +2178,14 @@ endif
# Check for the availability of AVX-512 popcount intrinsics.
###############################################################
cflags_popcnt = []
if host_cpu == 'x86_64'
prog = '''
#include <immintrin.h>
#if defined(__has_attribute) && __has_attribute (target)
__attribute__((target("avx512vpopcntdq","avx512bw")))
#endif
int main(void)
{
const char buf[sizeof(__m512i)];
@ -2201,13 +2200,9 @@ int main(void)
}
'''
if cc.links(prog, name: 'AVX-512 popcount without -mavx512vpopcntdq -mavx512bw',
if cc.links(prog, name: 'AVX-512 popcount',
args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))])
cdata.set('USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 1)
elif cc.links(prog, name: 'AVX-512 popcount with -mavx512vpopcntdq -mavx512bw',
args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))] + ['-mavx512vpopcntdq'] + ['-mavx512bw'])
cdata.set('USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 1)
cflags_popcnt += ['-mavx512vpopcntdq'] + ['-mavx512bw']
endif
endif

View File

@ -262,9 +262,7 @@ CFLAGS_SL_MODULE = @CFLAGS_SL_MODULE@
CXXFLAGS_SL_MODULE = @CXXFLAGS_SL_MODULE@
CFLAGS_UNROLL_LOOPS = @CFLAGS_UNROLL_LOOPS@
CFLAGS_VECTORIZE = @CFLAGS_VECTORIZE@
CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_CRC = @CFLAGS_CRC@
CFLAGS_XSAVE = @CFLAGS_XSAVE@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
PERMIT_MISSING_VARIABLE_DECLARATIONS = @PERMIT_MISSING_VARIABLE_DECLARATIONS@
CXXFLAGS = @CXXFLAGS@
@ -772,9 +770,6 @@ LIBOBJS = @LIBOBJS@
# files needed for the chosen CRC-32C implementation
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
# files needed for the chosen popcount implementation
PG_POPCNT_OBJS = @PG_POPCNT_OBJS@
LIBS := -lpgcommon -lpgport $(LIBS)
# to make ws2_32.lib the last library

View File

@ -174,6 +174,18 @@
#define pg_attribute_nonnull(...)
#endif
/*
* pg_attribute_target allows specifying different target options that the
* function should be compiled with (e.g., for using special CPU instructions).
* Note that there still needs to be a configure-time check to verify that a
* specific target is understood by the compiler.
*/
#if __has_attribute (target)
#define pg_attribute_target(...) __attribute__((target(__VA_ARGS__)))
#else
#define pg_attribute_target(...)
#endif
/*
* Append PG_USED_FOR_ASSERTS_ONLY to definitions of variables that are only
* used in assert-enabled builds, to avoid compiler warnings about unused

View File

@ -102,10 +102,8 @@ pgxs_kv = {
' '.join(cflags_no_missing_var_decls),
'CFLAGS_CRC': ' '.join(cflags_crc),
'CFLAGS_POPCNT': ' '.join(cflags_popcnt),
'CFLAGS_UNROLL_LOOPS': ' '.join(unroll_loops_cflags),
'CFLAGS_VECTORIZE': ' '.join(vectorize_cflags),
'CFLAGS_XSAVE': ' '.join(cflags_xsave),
'LDFLAGS': var_ldflags,
'LDFLAGS_EX': var_ldflags_ex,
@ -181,7 +179,7 @@ pgxs_empty = [
'WANTED_LANGUAGES',
# Not needed because we don't build the server / PLs with the generated makefile
'LIBOBJS', 'PG_CRC32C_OBJS', 'PG_POPCNT_OBJS', 'TAS',
'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS',
'PG_TEST_EXTRA',
'DTRACEFLAGS', # only server has dtrace probes

View File

@ -38,13 +38,13 @@ LIBS += $(PTHREAD_LIBS)
OBJS = \
$(LIBOBJS) \
$(PG_CRC32C_OBJS) \
$(PG_POPCNT_OBJS) \
bsearch_arg.o \
chklocale.o \
inet_net_ntop.o \
noblock.o \
path.o \
pg_bitutils.o \
pg_popcount_avx512.o \
pg_strong_random.o \
pgcheckdir.o \
pgmkdirp.o \
@ -92,16 +92,6 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC)
# all versions of pg_popcount_avx512_choose.o need CFLAGS_XSAVE
pg_popcount_avx512_choose.o: CFLAGS+=$(CFLAGS_XSAVE)
pg_popcount_avx512_choose_shlib.o: CFLAGS+=$(CFLAGS_XSAVE)
pg_popcount_avx512_choose_srv.o: CFLAGS+=$(CFLAGS_XSAVE)
# all versions of pg_popcount_avx512.o need CFLAGS_POPCNT
pg_popcount_avx512.o: CFLAGS+=$(CFLAGS_POPCNT)
pg_popcount_avx512_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
pg_popcount_avx512_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
#
# Shared library versions of object files
#

View File

@ -7,6 +7,7 @@ pgport_sources = [
'noblock.c',
'path.c',
'pg_bitutils.c',
'pg_popcount_avx512.c',
'pg_strong_random.c',
'pgcheckdir.c',
'pgmkdirp.c',
@ -84,8 +85,6 @@ replace_funcs_pos = [
['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_popcount_avx512', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'],
['pg_popcount_avx512_choose', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'xsave'],
# arm / aarch64
['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
@ -100,8 +99,8 @@ replace_funcs_pos = [
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]
pgport_cflags = {'crc': cflags_crc, 'popcnt': cflags_popcnt, 'xsave': cflags_xsave}
pgport_sources_cflags = {'crc': [], 'popcnt': [], 'xsave': []}
pgport_cflags = {'crc': cflags_crc}
pgport_sources_cflags = {'crc': []}
foreach f : replace_funcs_neg
func = f.get(0)

View File

@ -12,7 +12,17 @@
*/
#include "c.h"
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
#include <cpuid.h>
#endif
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
#include <immintrin.h>
#endif
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
#include <intrin.h>
#endif
#include "port/pg_bitutils.h"
@ -21,12 +31,82 @@
* use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
* the function pointers that are only used when TRY_POPCNT_FAST is set.
*/
#ifdef TRY_POPCNT_FAST
#if defined(TRY_POPCNT_FAST) && defined(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK)
/*
* Does CPUID say there's support for XSAVE instructions?
*/
static inline bool
xsave_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 27)) != 0; /* osxsave */
}
/*
* Does XGETBV say the ZMM registers are enabled?
*
* NB: Caller is responsible for verifying that xsave_available() returns true
* before calling this.
*/
#ifdef HAVE_XSAVE_INTRINSICS
pg_attribute_target("xsave")
#endif
static inline bool
zmm_regs_available(void)
{
#ifdef HAVE_XSAVE_INTRINSICS
return (_xgetbv(0) & 0xe6) == 0xe6;
#else
return false;
#endif
}
/*
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
* instructions?
*/
static inline bool
avx512_popcnt_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID_COUNT)
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUIDEX)
__cpuidex(exx, 7, 0);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
}
/*
* Returns true if the CPU supports the instructions required for the AVX-512
* pg_popcount() implementation.
*/
bool
pg_popcount_avx512_available(void)
{
return xsave_available() &&
zmm_regs_available() &&
avx512_popcnt_available();
}
/*
* pg_popcount_avx512
* Returns the number of 1-bits in buf
*/
pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_avx512(const char *buf, int bytes)
{
@ -82,6 +162,7 @@ pg_popcount_avx512(const char *buf, int bytes)
* pg_popcount_masked_avx512
* Returns the number of 1-bits in buf after applying the mask to each byte
*/
pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
{
@ -138,4 +219,5 @@ pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
return _mm512_reduce_add_epi64(accum);
}
#endif /* TRY_POPCNT_FAST */
#endif /* TRY_POPCNT_FAST &&
* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */

View File

@ -1,102 +0,0 @@
/*-------------------------------------------------------------------------
*
* pg_popcount_avx512_choose.c
* Test whether we can use the AVX-512 pg_popcount() implementation.
*
* Copyright (c) 2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pg_popcount_avx512_choose.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
#include <cpuid.h>
#endif
#ifdef HAVE_XSAVE_INTRINSICS
#include <immintrin.h>
#endif
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
#include <intrin.h>
#endif
#include "port/pg_bitutils.h"
/*
* It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to
* use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
* the function pointers that are only used when TRY_POPCNT_FAST is set.
*/
#ifdef TRY_POPCNT_FAST
/*
* Does CPUID say there's support for XSAVE instructions?
*/
static inline bool
xsave_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 27)) != 0; /* osxsave */
}
/*
* Does XGETBV say the ZMM registers are enabled?
*
* NB: Caller is responsible for verifying that xsave_available() returns true
* before calling this.
*/
static inline bool
zmm_regs_available(void)
{
#ifdef HAVE_XSAVE_INTRINSICS
return (_xgetbv(0) & 0xe6) == 0xe6;
#else
return false;
#endif
}
/*
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
* instructions?
*/
static inline bool
avx512_popcnt_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID_COUNT)
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUIDEX)
__cpuidex(exx, 7, 0);
#else
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
}
/*
* Returns true if the CPU supports the instructions required for the AVX-512
* pg_popcount() implementation.
*/
bool
pg_popcount_avx512_available(void)
{
return xsave_available() &&
zmm_regs_available() &&
avx512_popcnt_available();
}
#endif /* TRY_POPCNT_FAST */