x86-64: Implement memset family IFUNC selectors in C

Implement memset family IFUNC selectors in C.

All internal calls within libc.so can use IFUNC on x86-64 since unlike
x86, x86-64 supports PC-relative addressing to access the GOT entry so
that it can call via PLT without using an extra register.  For libc.a,
we can't use IFUNC for functions which are called before IFUNC has been
initialized.  Use IFUNC internally reduces the icache footprint since
libc.so and other codes in the process use the same implementations.
This patch uses IFUNC for memset functions within libc.

2017-06-07  H.J. Lu  <hongjiu.lu@intel.com>
	    Erich Elsen  <eriche@google.com>

	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
	memset-sse2-unaligned-erms, and memset_chk-nonshared.
	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add test for __memset_chk_erms.
	Update comments.
	* sysdeps/x86_64/multiarch/ifunc-memset.h: New file.
	* sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S: Likewise.
	* sysdeps/x86_64/multiarch/memset.c: Likewise.
	* sysdeps/x86_64/multiarch/memset_chk-nonshared.S: Likewise.
	* sysdeps/x86_64/multiarch/memset_chk.c: Likewise.
	* sysdeps/x86_64/multiarch/memset.S: Removed.
	* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
	* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
	(__memset_chk_erms): New function.
This commit is contained in:
H.J. Lu 2017-06-15 08:33:25 -07:00
parent 2c0b90ab44
commit 93e46f8773
13 changed files with 236 additions and 147 deletions

View File

@ -1,3 +1,21 @@
2017-06-15 H.J. Lu <hongjiu.lu@intel.com>
Erich Elsen <eriche@google.com>
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memset-sse2-unaligned-erms, and memset_chk-nonshared.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add test for __memset_chk_erms.
Update comments.
* sysdeps/x86_64/multiarch/ifunc-memset.h: New file.
* sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S: Likewise.
* sysdeps/x86_64/multiarch/memset.c: Likewise.
* sysdeps/x86_64/multiarch/memset_chk-nonshared.S: Likewise.
* sysdeps/x86_64/multiarch/memset_chk.c: Likewise.
* sysdeps/x86_64/multiarch/memset.S: Removed.
* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
(__memset_chk_erms): New function.
2017-06-15 Siddhesh Poyarekar <siddhesh@sourceware.org>
* configure.ac(--enable-tunables): Enable by default.

View File

@ -29,6 +29,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memmove-sse2-unaligned-erms \
memmove-avx-unaligned-erms \
memmove-avx512-unaligned-erms \
memset-sse2-unaligned-erms \
memset-avx2-unaligned-erms \
memset-avx512-unaligned-erms
CFLAGS-varshift.c += -msse4
@ -50,6 +51,6 @@ endif
ifeq ($(subdir),debug)
sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \
memmove_chk-nonshared \
memmove_chk-nonshared memset_chk-nonshared \
wmemset_chk-nonshared
endif

View File

@ -121,8 +121,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memrchr_avx2)
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
/* Support sysdeps/x86_64/multiarch/memset_chk.c. */
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
__memset_chk_erms)
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
__memset_chk_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
@ -144,7 +146,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memset_chk_avx512_no_vzeroupper)
)
/* Support sysdeps/x86_64/multiarch/memset.S. */
/* Support sysdeps/x86_64/multiarch/memset.c. */
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1,
__memset_sse2_unaligned)

View File

@ -0,0 +1,69 @@
/* Common definition for memset/memset_chk ifunc selections.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
return OPTIMIZE (erms);
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
{
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
return OPTIMIZE (avx512_no_vzeroupper);
if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
return OPTIMIZE (avx512_unaligned_erms);
return OPTIMIZE (avx512_unaligned);
}
if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
{
if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
return OPTIMIZE (avx2_unaligned_erms);
else
return OPTIMIZE (avx2_unaligned);
}
if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
return OPTIMIZE (sse2_unaligned_erms);
return OPTIMIZE (sse2_unaligned);
}

View File

@ -0,0 +1,41 @@
/* memset with SSE2.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2014-2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <shlib-compat.h>
#include <init-arch.h>
#if IS_IN (libc)
# define MEMSET_SYMBOL(p,s) p##_sse2_##s
# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
# ifdef SHARED
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(name)
# endif
# undef weak_alias
# define weak_alias(original, alias) \
.weak bzero; bzero = __bzero
# undef strong_alias
# define strong_alias(ignored1, ignored2)
#endif
#include <sysdeps/x86_64/memset.S>

View File

@ -121,6 +121,11 @@ L(entry_from_bzero):
END (MEMSET_SYMBOL (__memset, unaligned))
# if VEC_SIZE == 16
ENTRY (__memset_chk_erms)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (__memset_chk_erms)
/* Only used to measure performance of REP STOSB. */
ENTRY (__memset_erms)
# else

View File

@ -1,82 +0,0 @@
/* Multiple versions of memset
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2014-2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <shlib-compat.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
ENTRY(memset)
.type memset, @gnu_indirect_function
LOAD_RTLD_GLOBAL_RO_RDX
lea __memset_erms(%rip), %RAX_LP
HAS_ARCH_FEATURE (Prefer_ERMS)
jnz 2f
lea __memset_sse2_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz 1f
lea __memset_sse2_unaligned(%rip), %RAX_LP
1:
HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
lea __memset_avx2_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz L(AVX512F)
lea __memset_avx2_unaligned(%rip), %RAX_LP
L(AVX512F):
HAS_ARCH_FEATURE (Prefer_No_AVX512)
jnz 2f
HAS_ARCH_FEATURE (AVX512F_Usable)
jz 2f
lea __memset_avx512_no_vzeroupper(%rip), %RAX_LP
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jnz 2f
lea __memset_avx512_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz 2f
lea __memset_avx512_unaligned(%rip), %RAX_LP
2: ret
END(memset)
#endif
#if IS_IN (libc)
# define MEMSET_SYMBOL(p,s) p##_sse2_##s
# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
# ifdef SHARED
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memset calls through a PLT.
The speedup we get from using SSE2 instructions is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def(name) \
.globl __GI_memset; __GI_memset = __memset_sse2_unaligned; \
.globl __GI_wmemset; __GI_wmemset = __wmemset_sse2_unaligned; \
.globl __GI___wmemset; __GI___wmemset = __wmemset_sse2_unaligned
# endif
# undef weak_alias
# define weak_alias(original, alias) \
.weak bzero; bzero = __bzero
# undef strong_alias
# define strong_alias(original, alias)
#endif
#include "../memset.S"

View File

@ -0,0 +1,37 @@
/* Multiple versions of memset.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need memset before the initialization
happened. */
#if IS_IN (libc)
# define memset __redirect_memset
# include <string.h>
# undef memset
# define SYMBOL_NAME memset
# include "ifunc-memset.h"
libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR ());
# ifdef SHARED
__hidden_ver1 (memset, __GI_memset, __redirect_memset)
__attribute__ ((visibility ("hidden")));
# endif
#endif

View File

@ -0,0 +1,21 @@
/* Non-shared version of memcpy_chk for x86-64.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if IS_IN (libc) && !defined SHARED
# include <sysdeps/x86_64/memset_chk.S>
#endif

View File

@ -1,61 +0,0 @@
/* Multiple versions of memset_chk
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2014-2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
# ifdef SHARED
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
LOAD_RTLD_GLOBAL_RO_RDX
lea __memset_chk_sse2_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz 1f
lea __memset_chk_sse2_unaligned(%rip), %RAX_LP
1:
HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
lea __memset_chk_avx2_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz L(AVX512F)
lea __memset_chk_avx2_unaligned(%rip), %RAX_LP
L(AVX512F):
HAS_ARCH_FEATURE (Prefer_No_AVX512)
jnz 2f
HAS_ARCH_FEATURE (AVX512F_Usable)
jz 2f
lea __memset_chk_avx512_no_vzeroupper(%rip), %RAX_LP
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jnz 2f
lea __memset_chk_avx512_unaligned_erms(%rip), %RAX_LP
HAS_CPU_FEATURE (ERMS)
jnz 2f
lea __memset_chk_avx512_unaligned(%rip), %RAX_LP
2: ret
END(__memset_chk)
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
.string "memset used with constant zero length parameter; this could be due to transposed parameters"
# else
# include "../memset_chk.S"
# endif
#endif

View File

@ -0,0 +1,31 @@
/* Multiple versions of __memset_chk
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc.so. */
#if IS_IN (libc) && defined SHARED
# define __memset_chk __redirect_memset_chk
# include <string.h>
# undef __memset_chk
# define SYMBOL_NAME memset_chk
# include "ifunc-memset.h"
libc_ifunc_redirected (__redirect_memset_chk, __memset_chk,
IFUNC_SELECTOR ());
#endif

View File

@ -30,4 +30,11 @@
libc_ifunc_redirected (__redirect_wmemset, __wmemset, IFUNC_SELECTOR ());
weak_alias (__wmemset, wmemset)
# ifdef SHARED
__hidden_ver1 (__wmemset, __GI___wmemset, __redirect___wmemset)
__attribute__ ((visibility ("hidden")));
__hidden_ver1 (wmemset, __GI_wmemset, __redirect_wmemset)
__attribute__ ((visibility ("hidden")));
# endif
#endif

View File

@ -17,5 +17,5 @@
<http://www.gnu.org/licenses/>. */
#if IS_IN (libc) && !defined SHARED
# include "../wmemset_chk.S"
# include <sysdeps/x86_64/wmemset_chk.S>
#endif