mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-06 12:00:24 +08:00
2193311288
Here is implementation of cos containing SSE, AVX, AVX2 and AVX512 versions according to Vector ABI which had been discussed in <https://groups.google.com/forum/#!topic/x86-64-abi/LmppCfN1rZ4>. Vector math library build and ABI testing enabled by default for x86_64. * sysdeps/x86_64/fpu/Makefile: New file. * sysdeps/x86_64/fpu/Versions: New file. * sysdeps/x86_64/fpu/svml_d_cos_data.S: New file. * sysdeps/x86_64/fpu/svml_d_cos_data.h: New file. * sysdeps/x86_64/fpu/svml_d_cos2_core.S: New file. * sysdeps/x86_64/fpu/svml_d_cos4_core.S: New file. * sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S: New file. * sysdeps/x86_64/fpu/svml_d_cos8_core.S: New file. * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: New file. * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added build of SSE, AVX2 and AVX512 IFUNC versions. * sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration for cos. * math/bits/mathcalls.h: Added cos declaration with __MATHCALL_VEC. * sysdeps/x86_64/configure.ac: Options for libmvec build. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/sysdep.h (cfi_offset_rel_rsp): New macro. * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New file. * manual/install.texi (Configuring and compiling): Document --disable-mathvec. * INSTALL: Regenerated. * NEWS: Mention addition of libmvec and x86_64 vector cos.
165 lines
4.8 KiB
C
165 lines
4.8 KiB
C
/* Assembler macros for x86-64.
|
|
Copyright (C) 2001-2015 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _X86_64_SYSDEP_H
|
|
#define _X86_64_SYSDEP_H 1
|
|
|
|
#include <sysdeps/generic/sysdep.h>
|
|
|
|
#ifdef __ASSEMBLER__
|
|
|
|
/* Syntactic details of assembler. */
|
|
|
|
/* This macro is for setting proper CFI with DW_CFA_expression describing
|
|
the register as saved relative to %rsp instead of relative to the CFA.
|
|
Expression is DW_OP_drop, DW_OP_breg7 (%rsp is register 7), sleb128 offset
|
|
from %rsp. */
|
|
#define cfi_offset_rel_rsp(regn, off) .cfi_escape 0x10, regn, 0x4, 0x13, \
|
|
0x77, off & 0x7F | 0x80, off >> 7
|
|
|
|
/* ELF uses byte-counts for .align, most others use log2 of count of bytes. */
|
|
#define ALIGNARG(log2) 1<<log2
|
|
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
|
|
|
|
|
|
/* Define an entry point visible from C. */
|
|
#define ENTRY(name) \
|
|
.globl C_SYMBOL_NAME(name); \
|
|
.type C_SYMBOL_NAME(name),@function; \
|
|
.align ALIGNARG(4); \
|
|
C_LABEL(name) \
|
|
cfi_startproc; \
|
|
CALL_MCOUNT
|
|
|
|
#undef END
|
|
#define END(name) \
|
|
cfi_endproc; \
|
|
ASM_SIZE_DIRECTIVE(name)
|
|
|
|
#define ENTRY_CHK(name) ENTRY (name)
|
|
#define END_CHK(name) END (name)
|
|
|
|
/* If compiled for profiling, call `mcount' at the start of each function. */
|
|
#ifdef PROF
|
|
/* The mcount code relies on a normal frame pointer being on the stack
|
|
to locate our caller, so push one just for its benefit. */
|
|
#define CALL_MCOUNT \
|
|
pushq %rbp; \
|
|
cfi_adjust_cfa_offset(8); \
|
|
movq %rsp, %rbp; \
|
|
cfi_def_cfa_register(%rbp); \
|
|
call JUMPTARGET(mcount); \
|
|
popq %rbp; \
|
|
cfi_def_cfa(rsp,8);
|
|
#else
|
|
#define CALL_MCOUNT /* Do nothing. */
|
|
#endif
|
|
|
|
/* Since C identifiers are not normally prefixed with an underscore
|
|
on this system, the asm identifier `syscall_error' intrudes on the
|
|
C name space. Make sure we use an innocuous name. */
|
|
#define syscall_error __syscall_error
|
|
#define mcount _mcount
|
|
|
|
#define PSEUDO(name, syscall_name, args) \
|
|
lose: \
|
|
jmp JUMPTARGET(syscall_error) \
|
|
.globl syscall_error; \
|
|
ENTRY (name) \
|
|
DO_CALL (syscall_name, args); \
|
|
jb lose
|
|
|
|
#undef PSEUDO_END
|
|
#define PSEUDO_END(name) \
|
|
END (name)
|
|
|
|
#undef JUMPTARGET
|
|
#ifdef PIC
|
|
#define JUMPTARGET(name) name##@PLT
|
|
#else
|
|
#define JUMPTARGET(name) name
|
|
#endif
|
|
|
|
/* Local label name for asm code. */
|
|
#ifndef L
|
|
/* ELF-like local names start with `.L'. */
|
|
# define L(name) .L##name
|
|
#endif
|
|
|
|
#define atom_text_section .section ".text.atom", "ax"
|
|
|
|
/* Long and pointer size in bytes. */
|
|
#define LP_SIZE 8
|
|
|
|
/* Instruction to operate on long and pointer. */
|
|
#define LP_OP(insn) insn##q
|
|
|
|
/* Assembler address directive. */
|
|
#define ASM_ADDR .quad
|
|
|
|
/* Registers to hold long and pointer. */
|
|
#define RAX_LP rax
|
|
#define RBP_LP rbp
|
|
#define RBX_LP rbx
|
|
#define RCX_LP rcx
|
|
#define RDI_LP rdi
|
|
#define RDX_LP rdx
|
|
#define RSI_LP rsi
|
|
#define RSP_LP rsp
|
|
#define R8_LP r8
|
|
#define R9_LP r9
|
|
#define R10_LP r10
|
|
#define R11_LP r11
|
|
#define R12_LP r12
|
|
#define R13_LP r13
|
|
#define R14_LP r14
|
|
#define R15_LP r15
|
|
|
|
#else /* __ASSEMBLER__ */
|
|
|
|
/* Long and pointer size in bytes. */
|
|
#define LP_SIZE "8"
|
|
|
|
/* Instruction to operate on long and pointer. */
|
|
#define LP_OP(insn) #insn "q"
|
|
|
|
/* Assembler address directive. */
|
|
#define ASM_ADDR ".quad"
|
|
|
|
/* Registers to hold long and pointer. */
|
|
#define RAX_LP "rax"
|
|
#define RBP_LP "rbp"
|
|
#define RBX_LP "rbx"
|
|
#define RCX_LP "rcx"
|
|
#define RDI_LP "rdi"
|
|
#define RDX_LP "rdx"
|
|
#define RSI_LP "rsi"
|
|
#define RSP_LP "rsp"
|
|
#define R8_LP "r8"
|
|
#define R9_LP "r9"
|
|
#define R10_LP "r10"
|
|
#define R11_LP "r11"
|
|
#define R12_LP "r12"
|
|
#define R13_LP "r13"
|
|
#define R14_LP "r14"
|
|
#define R15_LP "r15"
|
|
|
|
#endif /* __ASSEMBLER__ */
|
|
|
|
#endif /* _X86_64_SYSDEP_H */
|