mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-21 04:31:04 +08:00
7e4ba49cd3
This patch enables SSE2 memset for AMD's upcoming Orochi processor. This patch also fixes the following bug: For misaligned blocks larger than > 144 Bytes, memset branches into the integer code path depending on the value of misalignment even if the startup code chooses the SSE2 code path upfront, when multiarch is enabled.
138 lines
3.9 KiB
C
138 lines
3.9 KiB
C
/* Initialize CPU feature data.
|
|
This file is part of the GNU C Library.
|
|
Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
|
Contributed by Ulrich Drepper <drepper@redhat.com>.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, write to the Free
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307 USA. */
|
|
|
|
#include <atomic.h>
|
|
#include <cpuid.h>
|
|
#include "init-arch.h"
|
|
|
|
|
|
struct cpu_features __cpu_features attribute_hidden;
|
|
|
|
|
|
static void
|
|
get_common_indeces (unsigned int *family, unsigned int *model)
|
|
{
|
|
__cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
|
|
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
|
|
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
|
|
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
|
|
|
|
unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
|
|
*family = (eax >> 8) & 0x0f;
|
|
*model = (eax >> 4) & 0x0f;
|
|
}
|
|
|
|
|
|
void
|
|
__init_cpu_features (void)
|
|
{
|
|
unsigned int ebx;
|
|
unsigned int ecx;
|
|
unsigned int edx;
|
|
unsigned int family = 0;
|
|
unsigned int model = 0;
|
|
enum cpu_features_kind kind;
|
|
|
|
__cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
|
|
|
|
/* This spells out "GenuineIntel". */
|
|
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
|
|
{
|
|
kind = arch_kind_intel;
|
|
|
|
get_common_indeces (&family, &model);
|
|
|
|
/* Intel processors prefer SSE instruction for memory/string
|
|
routines if they are available. */
|
|
__cpu_features.feature[index_Prefer_SSE_for_memop]
|
|
|= bit_Prefer_SSE_for_memop;
|
|
|
|
unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
|
|
unsigned int extended_family = (eax >> 20) & 0xff;
|
|
unsigned int extended_model = (eax >> 12) & 0xf0;
|
|
if (family == 0x0f)
|
|
{
|
|
family += extended_family;
|
|
model += extended_model;
|
|
}
|
|
else if (family == 0x06)
|
|
{
|
|
model += extended_model;
|
|
switch (model)
|
|
{
|
|
case 0x1c:
|
|
case 0x26:
|
|
/* BSF is slow on Atom. */
|
|
__cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
|
|
break;
|
|
|
|
case 0x1a:
|
|
case 0x1e:
|
|
case 0x1f:
|
|
case 0x25:
|
|
case 0x2c:
|
|
case 0x2e:
|
|
case 0x2f:
|
|
/* Rep string instructions and copy backward are fast on
|
|
Intel Core i3, i5 and i7. */
|
|
#if index_Fast_Rep_String != index_Fast_Copy_Backward
|
|
# error index_Fast_Rep_String != index_Fast_Copy_Backward
|
|
#endif
|
|
__cpu_features.feature[index_Fast_Rep_String]
|
|
|= bit_Fast_Rep_String | bit_Fast_Copy_Backward;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/* This spells out "AuthenticAMD". */
|
|
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
|
|
{
|
|
kind = arch_kind_amd;
|
|
|
|
get_common_indeces (&family, &model);
|
|
|
|
unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
|
|
|
|
/* AMD processors prefer SSE instructions for memory/string routines
|
|
if they are available, otherwise they prefer integer instructions. */
|
|
if ((ecx & 0x200))
|
|
__cpu_features.feature[index_Prefer_SSE_for_memop]
|
|
|= bit_Prefer_SSE_for_memop;
|
|
}
|
|
else
|
|
kind = arch_kind_other;
|
|
|
|
__cpu_features.family = family;
|
|
__cpu_features.model = model;
|
|
atomic_write_barrier ();
|
|
__cpu_features.kind = kind;
|
|
}
|
|
|
|
#undef __get_cpu_features
|
|
|
|
const struct cpu_features *
|
|
__get_cpu_features (void)
|
|
{
|
|
if (__cpu_features.kind == arch_kind_unknown)
|
|
__init_cpu_features ();
|
|
|
|
return &__cpu_features;
|
|
}
|