mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-18 23:51:08 +08:00
i386.c (dimode_scalar_chain::compute_convert_gain): Use xmm_move instead of sse_move.
* i386.c (dimode_scalar_chain::compute_convert_gain): Use xmm_move instead of sse_move. (sse_store_index): New function. (ix86_register_move_cost): Be more sensible about mismatch stall; model AVX moves correctly; make difference between sse->integer and integer->sse. (ix86_builtin_vectorization_cost): Model correctly aligned and unaligned moves; make difference between SSE and AVX. * i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move and zmm_move. Increase size of sse load and store tables; add unaligned load and store tables; add ssemmx_to_integer. * x86-tune-costs.h: Update all entries according to real move latencies from Agner Fog's manual and chip documentation. From-SVN: r254012
This commit is contained in:
parent
47a6cc4e29
commit
df41dbaf7e
@ -1,3 +1,19 @@
|
||||
2017-10-23 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* i386.c (dimode_scalar_chain::compute_convert_gain): Use
|
||||
xmm_move instead of sse_move.
|
||||
(sse_store_index): New function.
|
||||
(ix86_register_move_cost): Be more sensible about mismatch stall;
|
||||
model AVX moves correctly; make difference between sse->integer and
|
||||
integer->sse.
|
||||
(ix86_builtin_vectorization_cost): Model correctly aligned and unaligned
|
||||
moves; make difference between SSE and AVX.
|
||||
* i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move
|
||||
and zmm_move. Increase size of sse load and store tables;
|
||||
add unaligned load and store tables; add ssemmx_to_integer.
|
||||
* x86-tune-costs.h: Update all entries according to real
|
||||
move latencies from Agner Fog's manual and chip documentation.
|
||||
|
||||
2017-10-23 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/82628
|
||||
|
@ -1601,7 +1601,7 @@ dimode_scalar_chain::compute_convert_gain ()
|
||||
rtx dst = SET_DEST (def_set);
|
||||
|
||||
if (REG_P (src) && REG_P (dst))
|
||||
gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
|
||||
gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
|
||||
else if (REG_P (src) && MEM_P (dst))
|
||||
gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
|
||||
else if (MEM_P (src) && REG_P (dst))
|
||||
@ -38634,6 +38634,28 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return index of MODE in the sse load/store tables. */
|
||||
|
||||
static inline int
|
||||
sse_store_index (machine_mode mode)
|
||||
{
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 4:
|
||||
return 0;
|
||||
case 8:
|
||||
return 1;
|
||||
case 16:
|
||||
return 2;
|
||||
case 32:
|
||||
return 3;
|
||||
case 64:
|
||||
return 4;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the cost of moving data of mode M between a
|
||||
register and memory. A value of 2 is the default; this cost is
|
||||
relative to those in `REGISTER_MOVE_COST'.
|
||||
@ -38677,21 +38699,9 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
|
||||
}
|
||||
if (SSE_CLASS_P (regclass))
|
||||
{
|
||||
int index;
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 4:
|
||||
index = 0;
|
||||
break;
|
||||
case 8:
|
||||
index = 1;
|
||||
break;
|
||||
case 16:
|
||||
index = 2;
|
||||
break;
|
||||
default:
|
||||
return 100;
|
||||
}
|
||||
int index = sse_store_index (mode);
|
||||
if (index == -1)
|
||||
return 100;
|
||||
if (in == 2)
|
||||
return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
|
||||
return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
|
||||
@ -38794,8 +38804,10 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
|
||||
/* In case of copying from general_purpose_register we may emit multiple
|
||||
stores followed by single load causing memory size mismatch stall.
|
||||
Count this as arbitrarily high cost of 20. */
|
||||
if (targetm.class_max_nregs (class1, mode)
|
||||
> targetm.class_max_nregs (class2, mode))
|
||||
if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
|
||||
&& TARGET_MEMORY_MISMATCH_STALL
|
||||
&& targetm.class_max_nregs (class1, mode)
|
||||
> targetm.class_max_nregs (class2, mode))
|
||||
cost += 20;
|
||||
|
||||
/* In the case of FP/MMX moves, the registers actually overlap, and we
|
||||
@ -38817,12 +38829,19 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
|
||||
where integer modes in MMX/SSE registers are not tieable
|
||||
because of missing QImode and HImode moves to, from or between
|
||||
MMX/SSE registers. */
|
||||
return MAX (8, ix86_cost->mmxsse_to_integer);
|
||||
return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
|
||||
? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
|
||||
|
||||
if (MAYBE_FLOAT_CLASS_P (class1))
|
||||
return ix86_cost->fp_move;
|
||||
if (MAYBE_SSE_CLASS_P (class1))
|
||||
return ix86_cost->sse_move;
|
||||
{
|
||||
if (GET_MODE_BITSIZE (mode) <= 128)
|
||||
return ix86_cost->xmm_move;
|
||||
if (GET_MODE_BITSIZE (mode) <= 256)
|
||||
return ix86_cost->ymm_move;
|
||||
return ix86_cost->zmm_move;
|
||||
}
|
||||
if (MAYBE_MMX_CLASS_P (class1))
|
||||
return ix86_cost->mmx_move;
|
||||
return 2;
|
||||
@ -44370,6 +44389,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||
{
|
||||
bool fp = false;
|
||||
machine_mode mode = TImode;
|
||||
int index;
|
||||
if (vectype != NULL)
|
||||
{
|
||||
fp = FLOAT_TYPE_P (vectype);
|
||||
@ -44397,13 +44417,16 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||
true);
|
||||
|
||||
case vector_load:
|
||||
index = sse_store_index (mode);
|
||||
gcc_assert (index >= 0);
|
||||
return ix86_vec_cost (mode,
|
||||
COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
|
||||
COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
|
||||
true);
|
||||
|
||||
case vector_store:
|
||||
index = sse_store_index (mode);
|
||||
return ix86_vec_cost (mode,
|
||||
COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
|
||||
COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
|
||||
true);
|
||||
|
||||
case vec_to_scalar:
|
||||
@ -44414,14 +44437,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||
Do that incrementally. */
|
||||
case unaligned_load:
|
||||
case vector_gather_load:
|
||||
index = sse_store_index (mode);
|
||||
return ix86_vec_cost (mode,
|
||||
COSTS_N_INSNS (ix86_cost->sse_load[2]),
|
||||
COSTS_N_INSNS
|
||||
(ix86_cost->sse_unaligned_load[index]) / 2,
|
||||
true);
|
||||
|
||||
case unaligned_store:
|
||||
case vector_scatter_store:
|
||||
index = sse_store_index (mode);
|
||||
return ix86_vec_cost (mode,
|
||||
COSTS_N_INSNS (ix86_cost->sse_store[2]),
|
||||
COSTS_N_INSNS
|
||||
(ix86_cost->sse_unaligned_store[index]) / 2,
|
||||
true);
|
||||
|
||||
case cond_branch_taken:
|
||||
|
@ -242,13 +242,17 @@ struct processor_costs {
|
||||
in SImode and DImode */
|
||||
const int mmx_store[2]; /* cost of storing MMX register
|
||||
in SImode and DImode */
|
||||
const int sse_move; /* cost of moving SSE register. */
|
||||
const int sse_load[3]; /* cost of loading SSE register
|
||||
in SImode, DImode and TImode*/
|
||||
const int sse_store[3]; /* cost of storing SSE register
|
||||
in SImode, DImode and TImode*/
|
||||
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
|
||||
zmm_move;
|
||||
const int sse_load[5]; /* cost of loading SSE register
|
||||
in 32bit, 64bit, 128bit, 256bit and 512bit */
|
||||
const int sse_unaligned_load[5];/* cost of unaligned load. */
|
||||
const int sse_store[5]; /* cost of storing SSE register
|
||||
in SImode, DImode and TImode. */
|
||||
const int sse_unaligned_store[5];/* cost of unaligned store. */
|
||||
const int mmxsse_to_integer; /* cost of moving mmxsse register to
|
||||
integer and vice versa. */
|
||||
integer. */
|
||||
const int ssemmx_to_integer; /* cost of moving integer to mmxsse register. */
|
||||
const int l1_cache_size; /* size of l1 cache, in kilobytes. */
|
||||
const int l2_cache_size; /* size of l2 cache, in kilobytes. */
|
||||
const int prefetch_block; /* bytes moved to cache for prefetch. */
|
||||
|
@ -1,4 +1,26 @@
|
||||
/* Costs of operations of individual x86 CPUs.
|
||||
Copyright (C) 1988-2017 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
/* Processor costs (relative to an add) */
|
||||
/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
|
||||
#define COSTS_N_BYTES(N) ((N) * 2)
|
||||
@ -33,6 +55,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
|
||||
COSTS_N_BYTES (3), /* cost of movzx */
|
||||
0, /* "large" insn */
|
||||
2, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2. */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
{2, 2, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -48,12 +72,16 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
|
||||
in SImode and DImode */
|
||||
{3, 3}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
3, /* cost of moving SSE register */
|
||||
{3, 3, 3}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{3, 3, 3}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
|
||||
{3, 3, 3, 3, 3}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{3, 3, 3, 3, 3}, /* cost of unaligned SSE load
|
||||
in 128bit, 256bit and 512bit */
|
||||
{3, 3, 3, 3, 3}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{3, 3, 3, 3, 3}, /* cost of unaligned SSE store
|
||||
in 128bit, 256bit and 512bit */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
0, /* size of l1 cache */
|
||||
0, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
@ -112,6 +140,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
||||
COSTS_N_INSNS (2), /* cost of movzx */
|
||||
15, /* "large" insn */
|
||||
3, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -127,12 +158,14 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
||||
in SImode and DImode */
|
||||
{4, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 8, 16}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 8, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
0, /* size of l1 cache */
|
||||
0, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
@ -190,6 +223,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
||||
COSTS_N_INSNS (2), /* cost of movzx */
|
||||
15, /* "large" insn */
|
||||
3, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -205,12 +241,14 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
||||
in SImode and DImode */
|
||||
{4, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 8, 16}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 8, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
4, /* size of l1 cache. 486 has 8kB cache
|
||||
shared for code and data, so 4kB is
|
||||
not really precise. */
|
||||
@ -270,6 +308,9 @@ struct processor_costs pentium_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
6, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -285,12 +326,14 @@ struct processor_costs pentium_cost = {
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 8, 16}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 8, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
8, /* size of l1 cache. */
|
||||
8, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
@ -341,6 +384,9 @@ struct processor_costs lakemont_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -356,12 +402,14 @@ struct processor_costs lakemont_cost = {
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 8, 16}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 8, 16}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 8, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
8, /* size of l1 cache. */
|
||||
8, /* size of l2 cache */
|
||||
0, /* size of prefetch block */
|
||||
@ -427,6 +475,9 @@ struct processor_costs pentiumpro_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
6, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -442,12 +493,14 @@ struct processor_costs pentiumpro_cost = {
|
||||
in SImode and DImode */
|
||||
{2, 2}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{2, 2, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 8, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
8, /* size of l1 cache. */
|
||||
256, /* size of l2 cache */
|
||||
32, /* size of prefetch block */
|
||||
@ -504,13 +557,16 @@ struct processor_costs geode_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
4, /* MOVE_RATIO */
|
||||
1, /* cost for loading QImode using movzbl */
|
||||
{1, 1, 1}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
{2, 2, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{1, 1, 1}, /* cost of storing integer registers */
|
||||
1, /* cost of reg,reg fld/fst */
|
||||
{1, 1, 1}, /* cost of loading fp registers
|
||||
{2, 2, 2}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{2, 2, 2}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 6, 6}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
@ -520,12 +576,14 @@ struct processor_costs geode_cost = {
|
||||
in SImode and DImode */
|
||||
{2, 2}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{2, 2, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{2, 2, 8, 16, 32}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{2, 2, 8, 16, 32}, /* cost of unaligned loads. */
|
||||
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
|
||||
6, 6, /* SSE->integer and integer->SSE moves */
|
||||
64, /* size of l1 cache. */
|
||||
128, /* size of l2 cache. */
|
||||
32, /* size of prefetch block */
|
||||
@ -582,6 +640,9 @@ struct processor_costs k6_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
4, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
3, /* cost for loading QImode using movzbl */
|
||||
{4, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -597,12 +658,14 @@ struct processor_costs k6_cost = {
|
||||
in SImode and DImode */
|
||||
{2, 2}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{2, 2, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
6, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{2, 2, 8, 16, 32}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{2, 2, 8, 16, 32}, /* cost of unaligned loads. */
|
||||
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
|
||||
6, 6, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
32, /* size of l2 cache. Some models
|
||||
have integrated l2 cache, but
|
||||
@ -665,6 +728,9 @@ struct processor_costs athlon_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -680,12 +746,14 @@ struct processor_costs athlon_cost = {
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 4, 6, 12, 24}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 4, 6, 12, 24}, /* cost of unaligned loads. */
|
||||
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
|
||||
5, 5, /* SSE->integer and integer->SSE moves */
|
||||
64, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -750,6 +818,9 @@ struct processor_costs k8_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -765,12 +836,14 @@ struct processor_costs k8_cost = {
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 3, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 3, 6, 12, 24}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 3, 6, 12, 24}, /* cost of unaligned loads. */
|
||||
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
|
||||
5, 5, /* SSE->integer and integer->SSE moves */
|
||||
64, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -839,6 +912,9 @@ struct processor_costs amdfam10_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -854,12 +930,14 @@ struct processor_costs amdfam10_cost = {
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 3}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{4, 4, 3, 6, 12}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 4, 3, 7, 12}, /* cost of unaligned loads. */
|
||||
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
|
||||
3, 3, /* SSE->integer and integer->SSE moves */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
@ -937,35 +1015,32 @@ const struct processor_costs bdver1_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{5, 5, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{5, 5, 12}, /* cost of loading fp registers
|
||||
{8, 8, 8}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 8}, /* cost of storing fp registers
|
||||
{10, 10, 18}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{4, 4}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{10, 10}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 4}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
On AMDFAM10:
|
||||
MOVD reg64, xmmreg Double FADD 3
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{12, 12, 10, 20, 30}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{12, 12, 10, 20, 30}, /* cost of unaligned loads. */
|
||||
{10, 10, 10, 20, 30}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 20, 30}, /* cost of unaligned stores. */
|
||||
16, 20, /* SSE->integer and integer->SSE moves */
|
||||
16, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1037,35 +1112,32 @@ const struct processor_costs bdver2_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{5, 5, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{5, 5, 12}, /* cost of loading fp registers
|
||||
{8, 8, 8}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 8}, /* cost of storing fp registers
|
||||
{10, 10, 18}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{4, 4}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{10, 10}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 4}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
On AMDFAM10:
|
||||
MOVD reg64, xmmreg Double FADD 3
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{12, 12, 10, 20, 30}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{12, 12, 10, 20, 30}, /* cost of unaligned loads. */
|
||||
{10, 10, 10, 20, 30}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 20, 30}, /* cost of unaligned stores. */
|
||||
16, 20, /* SSE->integer and integer->SSE moves */
|
||||
16, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1136,27 +1208,32 @@ struct processor_costs bdver3_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{5, 5, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{5, 5, 12}, /* cost of loading fp registers
|
||||
{8, 8, 8}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 8}, /* cost of storing fp registers
|
||||
{10, 10, 18}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{4, 4}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{10, 10}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 4}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{12, 12, 10, 20, 30}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{12, 12, 10, 20, 30}, /* cost of unaligned loads. */
|
||||
{10, 10, 10, 20, 30}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 20, 30}, /* cost of unaligned stores. */
|
||||
16, 20, /* SSE->integer and integer->SSE moves */
|
||||
16, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1226,27 +1303,32 @@ struct processor_costs bdver4_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{5, 5, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{5, 5, 12}, /* cost of loading fp registers
|
||||
{8, 8, 8}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 8}, /* cost of storing fp registers
|
||||
{10, 10, 18}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{4, 4}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{10, 10}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 4}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{12, 12, 10, 20, 30}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{12, 12, 10, 20, 30}, /* cost of unaligned loads. */
|
||||
{10, 10, 10, 20, 30}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 20, 30}, /* cost of unaligned stores. */
|
||||
16, 20, /* SSE->integer and integer->SSE moves */
|
||||
16, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1321,6 +1403,9 @@ struct processor_costs znver1_cost = {
|
||||
8, /* "large" insn. */
|
||||
9, /* MOVE_RATIO. */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
|
||||
/* reg-reg moves are done by renaming and thus they are even cheaper than
|
||||
1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
|
||||
to doubles of latencies, we do not model this correctly. It does not
|
||||
@ -1342,12 +1427,14 @@ struct processor_costs znver1_cost = {
|
||||
in SImode and DImode. */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode. */
|
||||
2, /* cost of moving SSE register. */
|
||||
{6, 6, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode. */
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode. */
|
||||
6, /* MMX or SSE register to integer. */
|
||||
2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
|
||||
{6, 6, 6, 10, 20}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit. */
|
||||
{6, 6, 6, 10, 20}, /* cost of unaligned loads. */
|
||||
{8, 8, 8, 8, 16}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit. */
|
||||
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
|
||||
6, 6, /* SSE->integer and integer->SSE moves. */
|
||||
32, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block. */
|
||||
@ -1426,35 +1513,32 @@ const struct processor_costs btver1_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{6, 8, 6}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{3, 4, 3}, /* cost of storing integer registers */
|
||||
{6, 8, 6}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{4, 4, 12}, /* cost of loading fp registers
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of storing fp registers
|
||||
{12, 12, 38}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{3, 3}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{10, 10}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{12, 12}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 3}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
On AMDFAM10:
|
||||
MOVD reg64, xmmreg Double FADD 3
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{10, 10, 12, 24, 48}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 12, 24, 48}, /* cost of unaligned loads. */
|
||||
{10, 10, 12, 24, 48}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 12, 24, 48}, /* cost of unaligned stores. */
|
||||
14, 14, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1514,35 +1598,32 @@ const struct processor_costs btver2_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 6}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{3, 4, 3}, /* cost of storing integer registers */
|
||||
{8, 8, 6}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{4, 4, 12}, /* cost of loading fp registers
|
||||
{12, 12, 28}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of storing fp registers
|
||||
{12, 12, 38}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{3, 3}, /* cost of loading MMX registers
|
||||
4, /* cost of moving MMX register */
|
||||
{10, 10}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
{12, 12}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 3}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
On AMDFAM10:
|
||||
MOVD reg64, xmmreg Double FADD 3
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{10, 10, 12, 24, 48}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 12, 24, 48}, /* cost of unaligned loads. */
|
||||
{10, 10, 12, 24, 48}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 12, 24, 48}, /* cost of unaligned stores. */
|
||||
14, 14, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
2048, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1601,27 +1682,32 @@ struct processor_costs pentium4_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
16, /* "large" insn */
|
||||
6, /* MOVE_RATIO */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
5, /* cost for loading QImode using movzbl */
|
||||
{4, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{2, 3, 2}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{2, 2, 6}, /* cost of loading fp registers
|
||||
12, /* cost of reg,reg fld/fst */
|
||||
{14, 14, 14}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 6}, /* cost of storing fp registers
|
||||
{14, 14, 14}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{2, 2}, /* cost of loading MMX registers
|
||||
12, /* cost of moving MMX register */
|
||||
{16, 16}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{2, 2}, /* cost of storing MMX registers
|
||||
{16, 16}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
12, /* cost of moving SSE register */
|
||||
{12, 12, 12}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{2, 2, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
10, /* MMX or SSE register to integer */
|
||||
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
|
||||
{16, 16, 16, 32, 64}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{32, 32, 32, 64, 128}, /* cost of unaligned loads. */
|
||||
{16, 16, 16, 32, 64}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
|
||||
20, 12, /* SSE->integer and integer->SSE moves */
|
||||
8, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1683,27 +1769,32 @@ struct processor_costs nocona_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
16, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
3, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
12, /* cost of reg,reg fld/fst */
|
||||
{14, 14, 14}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 4}, /* cost of storing fp registers
|
||||
{14, 14, 14}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
6, /* cost of moving MMX register */
|
||||
14, /* cost of moving MMX register */
|
||||
{12, 12}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{12, 12}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
6, /* cost of moving SSE register */
|
||||
{12, 12, 12}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{12, 12, 12}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
8, /* MMX or SSE register to integer */
|
||||
6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
|
||||
{12, 12, 12, 24, 48}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{24, 24, 24, 48, 96}, /* cost of unaligned loads. */
|
||||
{12, 12, 12, 24, 48}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
|
||||
20, 12, /* SSE->integer and integer->SSE moves */
|
||||
8, /* size of l1 cache. */
|
||||
1024, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1763,27 +1854,32 @@ struct processor_costs atom_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{6, 6, 6}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
{6, 6, 6}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
{6, 6, 18}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of storing fp registers
|
||||
{14, 14, 24}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{8, 8}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
{10, 10}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{8, 8, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{8, 8, 8, 16, 32}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
8, 6, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1843,27 +1939,32 @@ struct processor_costs slm_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
8, /* cost for loading QImode using movzbl */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{12, 12, 12}, /* cost of loading fp registers
|
||||
{6, 6, 6}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{8, 8, 18}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of storing fp registers
|
||||
{6, 6, 18}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{8, 8}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
{6, 6}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{8, 8, 8}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{8, 8, 8}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
5, /* MMX or SSE register to integer */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
{8, 8, 8, 16, 32}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
|
||||
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
8, 6, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -1923,6 +2024,9 @@ struct processor_costs intel_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -1938,12 +2042,14 @@ struct processor_costs intel_cost = {
|
||||
in SImode and DImode */
|
||||
{6, 6}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{6, 6, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{6, 6, 6}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
|
||||
{6, 6, 6, 6, 6}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
|
||||
{6, 6, 6, 6, 6}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
|
||||
4, 4, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -2010,6 +2116,9 @@ struct processor_costs generic_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -2025,12 +2134,14 @@ struct processor_costs generic_cost = {
|
||||
in SImode and DImode */
|
||||
{6, 6}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{6, 6, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{6, 6, 6}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
6, /* MMX or SSE register to integer */
|
||||
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
{6, 6, 6, 10, 15}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 15, 20}, /* cost of unaligned loads. */
|
||||
{6, 6, 6, 10, 15}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{10, 10, 10, 15, 20}, /* cost of unaligned storess. */
|
||||
20, 20, /* SSE->integer and integer->SSE moves */
|
||||
32, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
@ -2102,6 +2213,9 @@ struct processor_costs core_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
17, /* MOVE_RATIO */
|
||||
|
||||
/* All move costs are relative to integer->integer move times 2 and thus
|
||||
they are latency*2. */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
@ -2117,12 +2231,14 @@ struct processor_costs core_cost = {
|
||||
in SImode and DImode */
|
||||
{6, 6}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{6, 6, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{6, 6, 6}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
{6, 6, 6, 6, 12}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{6, 6, 6, 6, 12}, /* cost of unaligned loads. */
|
||||
{6, 6, 6, 6, 12}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit */
|
||||
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
|
||||
2, 2, /* SSE->integer and integer->SSE moves */
|
||||
64, /* size of l1 cache. */
|
||||
512, /* size of l2 cache. */
|
||||
64, /* size of prefetch block */
|
||||
|
Loading…
x
Reference in New Issue
Block a user