diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 675ae9458111..a16fb6a82d1a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2008-10-11 Uros Bizjak + Andi Kleen + + * config/i386/cpuid.h (__cpuid_count): New defines. + * config/i386/driver-i386.c (struct cache_desc): New structure. + (describe_cache): Use struct cache_desc to pass cache descriptions. + (detect_l2_cache): Ditto. Rename from decode_l2_cache. + (detect_caches_amd): Use struct cache_desc to describe caches. + (decode_caches_intel): Use struct cache_desc to pass cache + descriptions. Update descriptions to match latest (rev -032, + December 2007) CPUID documentation. Do not check valid bit here. + Check for Xeon MP value 0x49 problems. + (detect_caches_cpuid2): New function, split from detect_caches_intel. + Check valid bit before calling decode_caches_intel. Detect number + of times to repeat CPUID instruction. + (detect_caches_cpuid4): New function. + (detect_caches_intel): Depending on max_level, call + detect_caches_cpuid2 or detect_caches_cpuid4. Call detect_l2_cache + only when other methods fail to provide valid L2 cache description. + 2008-10-11 John David Anglin PR middle-end/37608 @@ -13,10 +33,10 @@ 2008-10-11 David Edelsohn - * config/rs6000/rs6000.md (aux_truncdfsf2): Remove TARGET_SINGLE_FLOAT. - (addsf3, subsf3, mulsf3 ! TARGET_POWERPC): Remove TARGET_SINGLE_FLOAT - and fp_type. - (divdf3): Reformat long line. + * config/rs6000/rs6000.md (aux_truncdfsf2): Remove TARGET_SINGLE_FLOAT. + (addsf3, subsf3, mulsf3 ! TARGET_POWERPC): Remove TARGET_SINGLE_FLOAT + and fp_type. + (divdf3): Reformat long line. 2008-10-11 Michael J. Eager @@ -25,8 +45,7 @@ (rs6000_handle_option): Process -mfpu options. * config/rs6000/rs6000.h: (TARGET_XILINX_FPU): New. (enum fpu_type_t): New. - * config/rs6000/rs6000.md (attr fp_type): New. - Include xfpu.md. + * config/rs6000/rs6000.md (attr fp_type): New. Include xfpu.md. (addsf3, subsf3, mulsf3, adddf3, subdf3, muldf3, trunctfdf2): Set fp_type. (floatsisf2): Remove TARGET_SINGLE_FPU condition. diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 90a28137afb8..69d25a2fe1d0 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -74,6 +74,13 @@ "xchg{l}\t{%%}ebx, %1\n\t" \ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) #else /* Host GCCs older than 3.0 weren't supporting Intel asm syntax nor alternatives in i386 code. */ @@ -83,12 +90,24 @@ "xchgl\t%%ebx, %1\n\t" \ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) #endif #else #define __cpuid(level, a, b, c, d) \ __asm__ ("cpuid\n\t" \ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) #endif /* Return highest supported input value for cpuid instruction. ext can diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 013fe93b631f..72e2c48d385b 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -1,5 +1,5 @@ /* Subroutines for the gcc driver. - Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. @@ -28,34 +28,45 @@ const char *host_detect_local_cpu (int argc, const char **argv); #ifdef __GNUC__ #include "cpuid.h" -/* Returns parameters that describe L1_ASSOC associative cache of size - L1_SIZEKB with lines of size L1_LINE. */ +struct cache_desc +{ + unsigned sizekb; + unsigned assoc; + unsigned line; +}; + +/* Returns command line parameters that describe size and + cache line size of the processor caches. */ static char * -describe_cache (unsigned l1_sizekb, unsigned l1_line, - unsigned l1_assoc ATTRIBUTE_UNUSED, unsigned l2_sizekb) +describe_cache (struct cache_desc level1, struct cache_desc level2) { char size[100], line[100], size2[100]; - /* At the moment, gcc middle-end does not use the information about the - associativity of the cache. */ + /* At the moment, gcc does not use the information + about the associativity of the cache. */ - sprintf (size, "--param l1-cache-size=%u", l1_sizekb); - sprintf (line, "--param l1-cache-line-size=%u", l1_line); - sprintf (size2, "--param l2-cache-size=%u", l2_sizekb); + sprintf (size, "--param l1-cache-size=%u", level1.sizekb); + sprintf (line, "--param l1-cache-line-size=%u", level1.line); + + sprintf (size2, "--param l2-cache-size=%u", level2.sizekb); return concat (size, " ", line, " ", size2, " ", NULL); } +/* Detect L2 cache parameters using CPUID extended function 0x80000006. */ + static void -decode_l2_cache (unsigned *l2_size, unsigned *l2_line, unsigned *l2_assoc) +detect_l2_cache (struct cache_desc *level2) { - unsigned eax, ebx, ecx, edx, assoc; + unsigned eax, ebx, ecx, edx; + unsigned assoc; __cpuid (0x80000006, eax, ebx, ecx, edx); - *l2_size = (ecx >> 16) & 0xffff; - *l2_line = ecx & 0xff; + level2->sizekb = (ecx >> 16) & 0xffff; + level2->line = ecx & 0xff; + assoc = (ecx >> 12) & 0xf; if (assoc == 6) assoc = 8; @@ -65,7 +76,8 @@ decode_l2_cache (unsigned *l2_size, unsigned *l2_line, unsigned *l2_assoc) assoc = 32 + (assoc - 0xa) * 16; else if (assoc >= 0xd && assoc <= 0xe) assoc = 96 + (assoc - 0xd) * 32; - *l2_assoc = assoc; + + level2->assoc = assoc; } /* Returns the description of caches for an AMD processor. */ @@ -74,243 +86,261 @@ static const char * detect_caches_amd (unsigned max_ext_level) { unsigned eax, ebx, ecx, edx; - unsigned l1_sizekb, l1_line, l1_assoc; - unsigned l2_sizekb = 0, l2_line = 0, l2_assoc = 0; + + struct cache_desc level1, level2 = {0, 0, 0}; if (max_ext_level < 0x80000005) return ""; __cpuid (0x80000005, eax, ebx, ecx, edx); - l1_line = ecx & 0xff; - l1_sizekb = (ecx >> 24) & 0xff; - l1_assoc = (ecx >> 16) & 0xff; + level1.sizekb = (ecx >> 24) & 0xff; + level1.assoc = (ecx >> 16) & 0xff; + level1.line = ecx & 0xff; if (max_ext_level >= 0x80000006) - decode_l2_cache (&l2_sizekb, &l2_line, &l2_assoc); + detect_l2_cache (&level2); - return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb); + return describe_cache (level1, level2); } -/* Stores the size of the L1/2 cache and cache line, and the associativity - of the cache according to REG to L1_SIZEKB, L1_LINE, L1_ASSOC and - L2_SIZEKB. */ +/* Decodes the size, the associativity and the cache line size of + L1/L2 caches of an Intel processor. Values are based on + "Intel Processor Identification and the CPUID Instruction" + [Application Note 485], revision -032, December 2007. */ static void -decode_caches_intel (unsigned reg, unsigned *l1_sizekb, unsigned *l1_line, - unsigned *l1_assoc, unsigned *l2_sizekb, - unsigned *l2_line, unsigned *l2_assoc) +decode_caches_intel (unsigned reg, bool xeon_mp, + struct cache_desc *level1, struct cache_desc *level2) { - unsigned i, val; + int i; - if (((reg >> 31) & 1) != 0) - return; + for (i = 24; i >= 0; i -= 8) + switch ((reg >> i) & 0xff) + { + case 0x0a: + level1->sizekb = 8; level1->assoc = 2; level1->line = 32; + break; + case 0x0c: + level1->sizekb = 16; level1->assoc = 4; level1->line = 32; + break; + case 0x2c: + level1->sizekb = 32; level1->assoc = 8; level1->line = 64; + break; + case 0x39: + level2->sizekb = 128; level2->assoc = 4; level2->line = 64; + break; + case 0x3a: + level2->sizekb = 192; level2->assoc = 6; level2->line = 64; + break; + case 0x3b: + level2->sizekb = 128; level2->assoc = 2; level2->line = 64; + break; + case 0x3c: + level2->sizekb = 256; level2->assoc = 4; level2->line = 64; + break; + case 0x3d: + level2->sizekb = 384; level2->assoc = 6; level2->line = 64; + break; + case 0x3e: + level2->sizekb = 512; level2->assoc = 4; level2->line = 64; + break; + case 0x41: + level2->sizekb = 128; level2->assoc = 4; level2->line = 32; + break; + case 0x42: + level2->sizekb = 256; level2->assoc = 4; level2->line = 32; + break; + case 0x43: + level2->sizekb = 512; level2->assoc = 4; level2->line = 32; + break; + case 0x44: + level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; + break; + case 0x45: + level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; + break; + case 0x49: + if (xeon_mp) + break; + level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; + break; + case 0x4e: + level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; + break; + case 0x60: + level1->sizekb = 16; level1->assoc = 8; level1->line = 64; + break; + case 0x66: + level1->sizekb = 8; level1->assoc = 4; level1->line = 64; + break; + case 0x67: + level1->sizekb = 16; level1->assoc = 4; level1->line = 64; + break; + case 0x68: + level1->sizekb = 32; level1->assoc = 4; level1->line = 64; + break; + case 0x78: + level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; + break; + case 0x79: + level2->sizekb = 128; level2->assoc = 8; level2->line = 64; + break; + case 0x7a: + level2->sizekb = 256; level2->assoc = 8; level2->line = 64; + break; + case 0x7b: + level2->sizekb = 512; level2->assoc = 8; level2->line = 64; + break; + case 0x7c: + level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; + break; + case 0x7d: + level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; + break; + case 0x7f: + level2->sizekb = 512; level2->assoc = 2; level2->line = 64; + break; + case 0x82: + level2->sizekb = 256; level2->assoc = 8; level2->line = 32; + break; + case 0x83: + level2->sizekb = 512; level2->assoc = 8; level2->line = 32; + break; + case 0x84: + level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; + break; + case 0x85: + level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; + break; + case 0x86: + level2->sizekb = 512; level2->assoc = 4; level2->line = 64; + break; + case 0x87: + level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; - for (i = 0; i < 4; i++) + default: + break; + } +} + +/* Detect cache parameters using CPUID function 2. */ + +static void +detect_caches_cpuid2 (bool xeon_mp, + struct cache_desc *level1, struct cache_desc *level2) +{ + unsigned eax, ebx, ecx, edx; + int nreps; + + __cpuid (2, eax, ebx, ecx, edx); + + nreps = eax & 0x0f; + eax &= ~0x0f; + + while (--nreps >= 0) { - val = reg & 0xff; - reg >>= 8; + if (!((eax >> 31) & 1)) + decode_caches_intel (eax, xeon_mp, level1, level2); + if (!((ebx >> 31) & 1)) + decode_caches_intel (ebx, xeon_mp, level1, level2); + if (!((ecx >> 31) & 1)) + decode_caches_intel (ecx, xeon_mp, level1, level2); + if (!((edx >> 31) & 1)) + decode_caches_intel (edx, xeon_mp, level1, level2); - switch (val) + if (nreps) + __cpuid (2, eax, ebx, ecx, edx); + } +} + +/* Detect cache parameters using CPUID function 4. This + method doesn't require hardcoded tables. */ + +enum cache_type +{ + CACHE_END = 0, + CACHE_DATA = 1, + CACHE_INST = 2, + CACHE_UNIFIED = 3 +}; + +static void +detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) +{ + struct cache_desc *cache; + + unsigned eax, ebx, ecx, edx; + int count; + + for (count = 0;; count++) + { + __cpuid_count(4, count, eax, ebx, ecx, edx); + switch (eax & 0x1f) { - case 0xa: - *l1_sizekb = 8; - *l1_line = 32; - *l1_assoc = 2; - break; - case 0xc: - *l1_sizekb = 16; - *l1_line = 32; - *l1_assoc = 4; - break; - case 0x2c: - *l1_sizekb = 32; - *l1_line = 64; - *l1_assoc = 8; - break; - case 0x39: - *l2_sizekb = 128; - *l2_line = 64; - *l2_assoc = 4; - break; - case 0x3a: - *l2_sizekb = 192; - *l2_line = 64; - *l2_assoc = 6; - break; - case 0x3b: - *l2_sizekb = 128; - *l2_line = 64; - *l2_assoc = 2; - break; - case 0x3c: - *l2_sizekb = 256; - *l2_line = 64; - *l2_assoc = 4; - break; - case 0x3d: - *l2_sizekb = 384; - *l2_line = 64; - *l2_assoc = 6; - break; - case 0x3e: - *l2_sizekb = 512; - *l2_line = 64; - *l2_assoc = 4; - break; - case 0x41: - *l2_sizekb = 128; - *l2_line = 32; - *l2_assoc = 4; - break; - case 0x42: - *l2_sizekb = 256; - *l2_line = 32; - *l2_assoc = 4; - break; - case 0x43: - *l2_sizekb = 512; - *l2_line = 32; - *l2_assoc = 4; - break; - case 0x44: - *l2_sizekb = 1024; - *l2_line = 32; - *l2_assoc = 4; - break; - case 0x45: - *l2_sizekb = 2048; - *l2_line = 32; - *l2_assoc = 4; - break; - case 0x49: - *l2_sizekb = 4096; - *l2_line = 64; - *l2_assoc = 16; - break; - case 0x60: - *l1_sizekb = 16; - *l1_line = 64; - *l1_assoc = 8; - break; - case 0x66: - *l1_sizekb = 8; - *l1_line = 64; - *l1_assoc = 4; - break; - case 0x67: - *l1_sizekb = 16; - *l1_line = 64; - *l1_assoc = 4; - break; - case 0x68: - *l1_sizekb = 32; - *l1_line = 64; - *l1_assoc = 4; - break; - case 0x78: - *l2_sizekb = 1024; - *l2_line = 64; - *l2_assoc = 4; - break; - case 0x79: - *l2_sizekb = 128; - *l2_line = 64; - *l2_assoc = 8; - break; - case 0x7a: - *l2_sizekb = 256; - *l2_line = 64; - *l2_assoc = 8; - break; - case 0x7b: - *l2_sizekb = 512; - *l2_line = 64; - *l2_assoc = 8; - break; - case 0x7c: - *l2_sizekb = 1024; - *l2_line = 64; - *l2_assoc = 8; - break; - case 0x7d: - *l2_sizekb = 2048; - *l2_line = 64; - *l2_assoc = 8; - break; - case 0x7f: - *l2_sizekb = 512; - *l2_line = 64; - *l2_assoc = 2; - break; - case 0x82: - *l2_sizekb = 256; - *l2_line = 32; - *l2_assoc = 8; - break; - case 0x83: - *l2_sizekb = 512; - *l2_line = 32; - *l2_assoc = 8; - break; - case 0x84: - *l2_sizekb = 1024; - *l2_line = 32; - *l2_assoc = 8; - break; - case 0x85: - *l2_sizekb = 2048; - *l2_line = 32; - *l2_assoc = 8; - break; - case 0x86: - *l2_sizekb = 512; - *l2_line = 64; - *l2_assoc = 4; - break; - case 0x87: - *l2_sizekb = 1024; - *l2_line = 64; - *l2_assoc = 8; - break; + case CACHE_END: + return; + case CACHE_DATA: + case CACHE_UNIFIED: + { + switch ((eax >> 5) & 0x07) + { + case 1: + cache = level1; + break; + case 2: + cache = level2; + break; + default: + cache = NULL; + } + if (cache) + { + unsigned sets = ecx + 1; + unsigned part; + + cache->line = (ebx & 0x0fff) + 1; + ebx >>= 12; + + part = (ebx & 0x03ff) + 1; + ebx >>= 10; + + cache->assoc = (ebx & 0x03ff) + 1; + + cache->sizekb = (cache->assoc * part + * cache->line * sets) / 1024; + } + } default: break; } } } -/* Returns the description of caches for an intel processor. */ +/* Returns the description of caches for an Intel processor. */ static const char * -detect_caches_intel (unsigned max_level, unsigned max_ext_level) +detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level) { - unsigned eax, ebx, ecx, edx; - unsigned l1_sizekb = 0, l1_line = 0, assoc = 0; - unsigned l2_sizekb = 0, l2_line = 0, l2_assoc = 0; + struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}; - if (max_level < 2) + if (max_level >= 4) + detect_caches_cpuid4 (&level1, &level2); + else if (max_level >= 2) + detect_caches_cpuid2 (xeon_mp, &level1, &level2); + else return ""; - __cpuid (2, eax, ebx, ecx, edx); - - decode_caches_intel (eax, &l1_sizekb, &l1_line, &assoc, - &l2_sizekb, &l2_line, &l2_assoc); - decode_caches_intel (ebx, &l1_sizekb, &l1_line, &assoc, - &l2_sizekb, &l2_line, &l2_assoc); - decode_caches_intel (ecx, &l1_sizekb, &l1_line, &assoc, - &l2_sizekb, &l2_line, &l2_assoc); - decode_caches_intel (edx, &l1_sizekb, &l1_line, &assoc, - &l2_sizekb, &l2_line, &l2_assoc); - - if (!l1_sizekb) + if (level1.sizekb == 0) return ""; - /* Newer Intel CPUs are equipped with AMD style L2 cache info */ - if (max_ext_level >= 0x80000006) - decode_l2_cache (&l2_sizekb, &l2_line, &l2_assoc); + /* Intel CPUs are equipped with AMD style L2 cache info. Try this + method if other methods fail to provide L2 cache parameters. */ + if (level2.sizekb == 0 && max_ext_level >= 0x80000006) + detect_l2_cache (&level2); - return describe_cache (l1_sizekb, l1_line, assoc, l2_sizekb); + return describe_cache (level1, level2); } /* This will be called by the spec parser in gcc.c when it sees @@ -334,11 +364,12 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *cache = ""; const char *options = ""; - unsigned int eax, ebx, ecx, edx; + unsigned int eax, ebx, ecx, edx; unsigned int max_level, ext_level; + unsigned int vendor; - unsigned int family; + unsigned int model, family; unsigned int has_sse3, has_ssse3, has_cmpxchg16b; unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; @@ -364,6 +395,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) __cpuid (1, eax, ebx, ecx, edx); /* We don't care for extended family. */ + model = (eax >> 4) & 0x0f; family = (eax >> 8) & 0x0f; has_sse3 = ecx & bit_SSE3; @@ -396,7 +428,10 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (vendor == *(const unsigned int*) "Auth") cache = detect_caches_amd (ext_level); else if (vendor == *(const unsigned int*) "Genu") - cache = detect_caches_intel (max_level, ext_level); + { + bool xeon_mp = (family == 15 && model == 6); + cache = detect_caches_intel (xeon_mp, max_level, ext_level); + } } if (vendor == *(const unsigned int*) "Auth")