diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am index cfad90124f9..0623a0bf2d1 100644 --- a/libatomic/Makefile.am +++ b/libatomic/Makefile.am @@ -130,6 +130,9 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix _$(s)_.lo,$(SIZEOBJS))) ## On a target-specific basis, include alternates to be selected by IFUNC. if HAVE_IFUNC if ARCH_AARCH64_LINUX +if ARCH_AARCH64_HAVE_LSE128 +AM_CPPFLAGS = -DHAVE_FEAT_LSE128 +endif IFUNC_OPTIONS = -march=armv8-a+lse libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) libatomic_la_SOURCES += atomic_16.S diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index dc2330b91fd..cd48fa21334 100644 --- a/libatomic/Makefile.in +++ b/libatomic/Makefile.in @@ -452,6 +452,7 @@ M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files))) libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \ _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \ $(am__append_4) $(am__append_5) +@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS = -DHAVE_FEAT_LSE128 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586 diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4 index f35ab5b60a5..d4f13174e2c 100644 --- a/libatomic/acinclude.m4 +++ b/libatomic/acinclude.m4 @@ -83,6 +83,25 @@ AC_DEFUN([LIBAT_TEST_ATOMIC_BUILTIN],[ ]) ]) +dnl +dnl Test if the host assembler supports armv9.4-a LSE128 isns. +dnl +AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LSE128],[ + AC_CACHE_CHECK([for armv9.4-a LSE128 insn support], + [libat_cv_have_feat_lse128],[ + AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv9-a+lse128")])]) + if AC_TRY_EVAL(ac_compile); then + eval libat_cv_have_feat_lse128=yes + else + eval libat_cv_have_feat_lse128=no + fi + rm -f conftest* + ]) + LIBAT_DEFINE_YESNO([HAVE_FEAT_LSE128], [$libat_cv_have_feat_lse128], + [Have LSE128 support for 16 byte integers.]) + AM_CONDITIONAL([ARCH_AARCH64_HAVE_LSE128], [test x$libat_cv_have_feat_lse128 = xyes]) +]) + dnl dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2 dnl diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in index ab3424a759e..7c78933b07d 100644 --- a/libatomic/auto-config.h.in +++ b/libatomic/auto-config.h.in @@ -105,6 +105,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H +/* Have LSE128 support for 16 byte integers. */ +#undef HAVE_FEAT_LSE128 + /* Define to 1 if you have the header file. */ #undef HAVE_FENV_H diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 33f2f569d06..d4a360a6f78 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -35,12 +35,17 @@ writes, this will be true when using atomics in actual code. The libat__16 entry points are ARMv8.0. - The libat__16_i1 entry points are used when LSE2 is available. */ - + The libat__16_i1 entry points are used when LSE128 is available. + The libat__16_i2 entry points are used when LSE2 is available. */ +#if HAVE_FEAT_LSE128 + .arch armv9-a+lse128 +#else .arch armv8-a+lse +#endif -#define LSE2(NAME) NAME##_i1 +#define LSE128(NAME) NAME##_i1 +#define LSE2(NAME) NAME##_i2 #define CORE(NAME) NAME #define ENTRY_FEAT(NAME, FEAT) \ @@ -202,6 +207,31 @@ ENTRY (libat_exchange_16) END (libat_exchange_16) +#if HAVE_FEAT_LSE128 +ENTRY_FEAT (libat_exchange_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + swpp res0, res1, [tmp0] + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + swppa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: swppal res0, res1, [tmp0] + ret +END_FEAT (libat_exchange_16, LSE128) +#endif + + ENTRY (libat_compare_exchange_16) ldp exp0, exp1, [x1] cbz w4, 3f @@ -395,6 +425,31 @@ ENTRY (libat_fetch_or_16) END (libat_fetch_or_16) +#if HAVE_FEAT_LSE128 +ENTRY_FEAT (libat_fetch_or_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldsetp res0, res1, [tmp0] + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldsetpa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldsetpal res0, res1, [tmp0] + ret +END_FEAT (libat_fetch_or_16, LSE128) +#endif + + ENTRY (libat_or_fetch_16) mov x5, x0 cbnz w4, 2f @@ -417,6 +472,36 @@ ENTRY (libat_or_fetch_16) END (libat_or_fetch_16) +#if HAVE_FEAT_LSE128 +ENTRY_FEAT (libat_or_fetch_16, LSE128) + cbnz w4, 1f + mov tmp0, in0 + mov tmp1, in1 + + /* RELAXED. */ + ldsetp in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldsetpa in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldsetpal in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +END_FEAT (libat_or_fetch_16, LSE128) +#endif + + ENTRY (libat_fetch_and_16) mov x5, x0 cbnz w4, 2f @@ -439,6 +524,32 @@ ENTRY (libat_fetch_and_16) END (libat_fetch_and_16) +#if HAVE_FEAT_LSE128 +ENTRY_FEAT (libat_fetch_and_16, LSE128) + mov tmp0, x0 + mvn res0, in0 + mvn res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldclrp res0, res1, [tmp0] + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldclrpa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldclrpal res0, res1, [tmp0] + ret +END_FEAT (libat_fetch_and_16, LSE128) +#endif + + ENTRY (libat_and_fetch_16) mov x5, x0 cbnz w4, 2f @@ -461,6 +572,37 @@ ENTRY (libat_and_fetch_16) END (libat_and_fetch_16) +#if HAVE_FEAT_LSE128 +ENTRY_FEAT (libat_and_fetch_16, LSE128) + mvn tmp0, in0 + mvn tmp0, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldclrp tmp0, tmp1, [x0] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldclrpa tmp0, tmp1, [x0] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldclrpal tmp0, tmp1, [x5] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret +END_FEAT (libat_and_fetch_16, LSE128) +#endif + + ENTRY (libat_fetch_xor_16) mov x5, x0 cbnz w4, 2f @@ -566,6 +708,28 @@ ENTRY (libat_test_and_set_16) END (libat_test_and_set_16) +/* Alias entry points which are the same in LSE2 and LSE128. */ + +#if !HAVE_FEAT_LSE128 +ALIAS (libat_exchange_16, LSE128, LSE2) +ALIAS (libat_fetch_or_16, LSE128, LSE2) +ALIAS (libat_fetch_and_16, LSE128, LSE2) +ALIAS (libat_or_fetch_16, LSE128, LSE2) +ALIAS (libat_and_fetch_16, LSE128, LSE2) +#endif +ALIAS (libat_load_16, LSE128, LSE2) +ALIAS (libat_store_16, LSE128, LSE2) +ALIAS (libat_compare_exchange_16, LSE128, LSE2) +ALIAS (libat_fetch_add_16, LSE128, LSE2) +ALIAS (libat_add_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_sub_16, LSE128, LSE2) +ALIAS (libat_sub_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_xor_16, LSE128, LSE2) +ALIAS (libat_xor_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_nand_16, LSE128, LSE2) +ALIAS (libat_nand_fetch_16, LSE128, LSE2) +ALIAS (libat_test_and_set_16, LSE128, LSE2) + /* Alias entry points which are the same in baseline and LSE2. */ ALIAS (libat_exchange_16, LSE2, CORE) diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 8fd4fe3321a..1bc7d839232 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -37,14 +37,17 @@ typedef struct __ifunc_arg_t { #ifdef HWCAP_USCAT # if N == 16 -# define IFUNC_COND_1 ifunc1 (hwcap, features) +# define IFUNC_COND_1 (has_lse128 (hwcap, features)) +# define IFUNC_COND_2 (has_lse2 (hwcap, features)) +# define IFUNC_NCOND(N) 2 # else -# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +# define IFUNC_NCOND(N) 1 # endif #else # define IFUNC_COND_1 (false) +# define IFUNC_NCOND(N) 1 #endif -#define IFUNC_NCOND(N) (1) #endif /* HAVE_IFUNC */ @@ -59,7 +62,7 @@ typedef struct __ifunc_arg_t { #define MIDR_PARTNUM(midr) (((midr) >> 4) & 0xfff) static inline bool -ifunc1 (unsigned long hwcap, const __ifunc_arg_t *features) +has_lse2 (unsigned long hwcap, const __ifunc_arg_t *features) { if (hwcap & HWCAP_USCAT) return true; @@ -75,6 +78,37 @@ ifunc1 (unsigned long hwcap, const __ifunc_arg_t *features) return false; } + +/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, + bits[23:20]. The expected value is 0b0011. Check that. */ + +#define AT_FEAT_FIELD(isar0) (((isar0) >> 20) & 15) + +/* Ensure backwards compatibility with glibc <= 2.38. */ +#ifndef HWCAP2_LSE128 +#define HWCAP2_LSE128 (1UL << 47) +#endif + +static inline bool +has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features) +{ + if (hwcap & _IFUNC_ARG_HWCAP + && features->_hwcap2 & HWCAP2_LSE128) + return true; + /* A 0 HWCAP2_LSE128 bit may be just as much a sign of missing HWCAP2 bit + support in older kernels as it is of CPU feature absence. Try fallback + method to guarantee LSE128 is not implemented. + + In the absence of HWCAP_CPUID, we are unable to check for LSE128. */ + if (!(hwcap & HWCAP_CPUID)) + return false; + unsigned long isar0; + asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0)); + if (AT_FEAT_FIELD (isar0) >= 3) + return true; + return false; +} + #endif #include_next diff --git a/libatomic/configure b/libatomic/configure index d579bab96f8..8ab730d8082 100755 --- a/libatomic/configure +++ b/libatomic/configure @@ -656,6 +656,8 @@ LIBAT_BUILD_VERSIONED_SHLIB_FALSE LIBAT_BUILD_VERSIONED_SHLIB_TRUE OPT_LDFLAGS SECTION_LDFLAGS +ARCH_AARCH64_HAVE_LSE128_FALSE +ARCH_AARCH64_HAVE_LSE128_TRUE SYSROOT_CFLAGS_FOR_TARGET enable_aarch64_lse libtool_VERSION @@ -11456,7 +11458,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11459 "configure" +#line 11461 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11562,7 +11564,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11565 "configure" +#line 11567 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14697,6 +14699,57 @@ _ACEOF +# Check for target-specific assembly-level support for atomic operations. + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv9.4-a LSE128 insn support" >&5 +$as_echo_n "checking for armv9.4-a LSE128 insn support... " >&6; } +if ${libat_cv_have_feat_lse128+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm(".arch armv9-a+lse128") + ; + return 0; +} +_ACEOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + eval libat_cv_have_feat_lse128=yes + else + eval libat_cv_have_feat_lse128=no + fi + rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lse128" >&5 +$as_echo "$libat_cv_have_feat_lse128" >&6; } + + yesno=`echo $libat_cv_have_feat_lse128 | tr 'yesno' '1 0 '` + +cat >>confdefs.h <<_ACEOF +#define HAVE_FEAT_LSE128 $yesno +_ACEOF + + + if test x$libat_cv_have_feat_lse128 = xyes; then + ARCH_AARCH64_HAVE_LSE128_TRUE= + ARCH_AARCH64_HAVE_LSE128_FALSE='#' +else + ARCH_AARCH64_HAVE_LSE128_TRUE='#' + ARCH_AARCH64_HAVE_LSE128_FALSE= +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5 $as_echo_n "checking whether byte ordering is bigendian... " >&6; } if ${ac_cv_c_bigendian+:} false; then : @@ -15989,6 +16042,10 @@ if test -z "${ENABLE_DARWIN_AT_RPATH_TRUE}" && test -z "${ENABLE_DARWIN_AT_RPATH as_fn_error $? "conditional \"ENABLE_DARWIN_AT_RPATH\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${ARCH_AARCH64_HAVE_LSE128_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LSE128_FALSE}"; then + as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LSE128\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBAT_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBAT_BUILD_VERSIONED_SHLIB_FALSE}"; then as_fn_error $? "conditional \"LIBAT_BUILD_VERSIONED_SHLIB\" was never defined. diff --git a/libatomic/configure.ac b/libatomic/configure.ac index 32a2cdb13ae..85824fa7614 100644 --- a/libatomic/configure.ac +++ b/libatomic/configure.ac @@ -206,6 +206,9 @@ LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_CAS]) LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_ADD]) LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP]) +# Check for target-specific assembly-level support for atomic operations. +LIBAT_TEST_FEAT_AARCH64_LSE128() + AC_C_BIGENDIAN # I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE. AH_BOTTOM(