mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
IFUNC for Cavium ThunderX2
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add memcpy_thunderx2. * sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC): Increment to 4. (__libc_ifunc_impl_list): Add __memcpy_thunderx2. * sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2 and IS_THUNDERX2PA checks. * sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2): Use macro to set name appropriately. (memcpy): Use USE_THUNDERX2 macro to modify prefetches. * sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file. * sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA): New macro. (IS_THUNDERX2): New macro.
This commit is contained in:
parent
da81ae645d
commit
e9537dddc7
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
||||
2018-02-22 Steve Ellcey <sellcey@cavium.com>
|
||||
|
||||
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
|
||||
Add memcpy_thunderx2.
|
||||
* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
|
||||
Increment to 4.
|
||||
(__libc_ifunc_impl_list): Add __memcpy_thunderx2.
|
||||
* sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2
|
||||
and IS_THUNDERX2PA checks.
|
||||
* sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2):
|
||||
Use macro to set name appropriately.
|
||||
(memcpy): Use USE_THUNDERX2 macro to modify prefetches.
|
||||
* sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file.
|
||||
* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA):
|
||||
New macro.
|
||||
(IS_THUNDERX2): New macro.
|
||||
|
||||
2018-02-22 Stefan Liebler <stli@linux.vnet.ibm.com>
|
||||
|
||||
* sysdeps/s390/fpu/libm-test-ulps: Regenerated.
|
||||
|
@ -1,4 +1,4 @@
|
||||
ifeq ($(subdir),string)
|
||||
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor \
|
||||
memmove_falkor memset_generic memset_falkor
|
||||
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
|
||||
memcpy_falkor memmove_falkor memset_generic memset_falkor
|
||||
endif
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
/* Maximum number of IFUNC implementations. */
|
||||
#define MAX_IFUNC 3
|
||||
#define MAX_IFUNC 4
|
||||
|
||||
size_t
|
||||
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
@ -40,6 +40,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
/* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */
|
||||
IFUNC_IMPL (i, name, memcpy,
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2)
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
|
||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
|
||||
IFUNC_IMPL (i, name, memmove,
|
||||
|
@ -30,6 +30,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy;
|
||||
|
||||
extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
|
||||
extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
|
||||
extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
|
||||
extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
|
||||
|
||||
libc_ifunc (__libc_memcpy,
|
||||
@ -37,7 +38,9 @@ libc_ifunc (__libc_memcpy,
|
||||
? __memcpy_thunderx
|
||||
: (IS_FALKOR (midr)
|
||||
? __memcpy_falkor
|
||||
: __memcpy_generic)));
|
||||
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
|
||||
? __memcpy_thunderx2
|
||||
: __memcpy_generic))));
|
||||
|
||||
# undef memcpy
|
||||
strong_alias (__libc_memcpy, memcpy);
|
||||
|
@ -74,11 +74,13 @@
|
||||
|
||||
#if IS_IN (libc)
|
||||
|
||||
# undef MEMCPY
|
||||
# define MEMCPY __memcpy_thunderx
|
||||
# undef MEMMOVE
|
||||
# define MEMMOVE __memmove_thunderx
|
||||
# define USE_THUNDERX
|
||||
# ifndef USE_THUNDERX2
|
||||
# undef MEMCPY
|
||||
# define MEMCPY __memcpy_thunderx
|
||||
# undef MEMMOVE
|
||||
# define MEMMOVE __memmove_thunderx
|
||||
# define USE_THUNDERX
|
||||
# endif
|
||||
|
||||
ENTRY_ALIGN (MEMMOVE, 6)
|
||||
|
||||
@ -180,7 +182,7 @@ L(copy96):
|
||||
.p2align 4
|
||||
L(copy_long):
|
||||
|
||||
# ifdef USE_THUNDERX
|
||||
# if defined(USE_THUNDERX) || defined (USE_THUNDERX2)
|
||||
|
||||
/* On thunderx, large memcpy's are helped by software prefetching.
|
||||
This loop is identical to the one below it but with prefetching
|
||||
@ -194,7 +196,11 @@ L(copy_long):
|
||||
bic dst, dstin, 15
|
||||
ldp D_l, D_h, [src]
|
||||
sub src, src, tmp1
|
||||
# if defined(USE_THUNDERX)
|
||||
prfm pldl1strm, [src, 384]
|
||||
# elif defined(USE_THUNDERX2)
|
||||
prfm pldl1strm, [src, 256]
|
||||
# endif
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
@ -204,9 +210,13 @@ L(copy_long):
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
|
||||
L(prefetch_loop64):
|
||||
# if defined(USE_THUNDERX)
|
||||
tbz src, #6, 1f
|
||||
prfm pldl1strm, [src, 512]
|
||||
1:
|
||||
# elif defined(USE_THUNDERX2)
|
||||
prfm pldl1strm, [src, 256]
|
||||
# endif
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
|
27
sysdeps/aarch64/multiarch/memcpy_thunderx2.S
Normal file
27
sysdeps/aarch64/multiarch/memcpy_thunderx2.S
Normal file
@ -0,0 +1,27 @@
|
||||
/* A Thunderx2 Optimized memcpy implementation for AARCH64.
|
||||
Copyright (C) 2018 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* The actual code in this memcpy and memmove is in memcpy_thunderx.S.
|
||||
The only real differences are with the prefetching instructions. */
|
||||
|
||||
#define MEMCPY __memcpy_thunderx2
|
||||
#define MEMMOVE __memmove_thunderx2
|
||||
#define USE_THUNDERX2
|
||||
|
||||
#include "memcpy_thunderx.S"
|
@ -41,6 +41,11 @@
|
||||
#define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \
|
||||
&& MIDR_PARTNUM(midr) == 0x0a1)
|
||||
|
||||
#define IS_THUNDERX2PA(midr) (MIDR_IMPLEMENTOR(midr) == 'B' \
|
||||
&& MIDR_PARTNUM(midr) == 0x516)
|
||||
#define IS_THUNDERX2(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \
|
||||
&& MIDR_PARTNUM(midr) == 0xaf)
|
||||
|
||||
#define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q' \
|
||||
&& MIDR_PARTNUM(midr) == 0xc00)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user