mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-17 18:00:55 +08:00
aarch64: Add A64FX machine model
This patch add support for Fujitsu A64FX, as the first step of adding A64FX machine model. A64FX is used in FUJITSU Supercomputer PRIMEHPC FX1000, PRIMEHPC FX700, and supercomputer Fugaku. The official microarchitecture information of A64FX can be read at https://github.com/fujitsu/A64FX. 2020-08-03 Qian jianhua <qianjh@cn.fujitsu.com> gcc/ * config/aarch64/aarch64-cores.def (a64fx): New core. * config/aarch64/aarch64-tune.md: Regenerated. * config/aarch64/aarch64.c (a64fx_prefetch_tune, a64fx_tunings): New. * doc/invoke.texi: Add a64fx to the list.
This commit is contained in:
parent
919c9d4bd3
commit
02f21aea06
@ -119,6 +119,9 @@ AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_
|
||||
AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
|
||||
/* Fujitsu ('F') cores. */
|
||||
AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
|
||||
/* HiSilicon ('H') cores. */
|
||||
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
;; -*- buffer-read-only: t -*-
|
||||
;; Generated automatically by gentune.sh from aarch64-cores.def
|
||||
(define_attr "tune"
|
||||
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
|
||||
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
|
||||
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
|
||||
|
@ -868,6 +868,17 @@ static const cpu_prefetch_tune xgene1_prefetch_tune =
|
||||
-1 /* default_opt_level */
|
||||
};
|
||||
|
||||
static const cpu_prefetch_tune a64fx_prefetch_tune =
|
||||
{
|
||||
8, /* num_slots */
|
||||
64, /* l1_cache_size */
|
||||
256, /* l1_cache_line_size */
|
||||
32768, /* l2_cache_size */
|
||||
true, /* prefetch_dynamic_strides */
|
||||
-1, /* minimum_stride */
|
||||
-1 /* default_opt_level */
|
||||
};
|
||||
|
||||
static const struct tune_params generic_tunings =
|
||||
{
|
||||
&cortexa57_extra_costs,
|
||||
@ -1325,6 +1336,32 @@ static const struct tune_params neoversen1_tunings =
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params a64fx_tunings =
|
||||
{
|
||||
&generic_extra_costs,
|
||||
&generic_addrcost_table,
|
||||
&generic_regmove_cost,
|
||||
&generic_vector_cost,
|
||||
&generic_branch_cost,
|
||||
&generic_approx_modes,
|
||||
SVE_512, /* sve_width */
|
||||
4, /* memmov_cost */
|
||||
7, /* issue_rate */
|
||||
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
|
||||
"32", /* function_align. */
|
||||
"16", /* jump_align. */
|
||||
"32", /* loop_align. */
|
||||
4, /* int_reassoc_width. */
|
||||
2, /* fp_reassoc_width. */
|
||||
2, /* vec_reassoc_width. */
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&a64fx_prefetch_tune
|
||||
};
|
||||
|
||||
/* Support for fine-grained override of the tuning structures. */
|
||||
struct aarch64_tuning_override_function
|
||||
{
|
||||
|
@ -17354,6 +17354,7 @@ performance of the code. Permissible values for this option are:
|
||||
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
|
||||
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
|
||||
@samp{octeontx2f95mm}
|
||||
@samp{a64fx},
|
||||
@samp{thunderx}, @samp{thunderxt88},
|
||||
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
|
||||
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
|
||||
|
Loading…
x
Reference in New Issue
Block a user