mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-04 14:41:14 +08:00
extend.texi (Half-Precision): New section.
2009-06-18 Sandra Loosemore <sandra@codesourcery.com> gcc/ * doc/extend.texi (Half-Precision): New section. * doc/invoke.texi (Option Summary): List -mfp16-format. (ARM Options): List neon-fp16 as -mfpu value. Document -mfp16-format. * config/arm/arm.opt (mfp16-format=): New. * config/arm/arm.c: Include intl.h. (TARGET_INVALID_PARAMETER_TYPE): Redefine. (TARGET_INVALID_RETURN_TYPE): Redefine. (TARGET_PROMOTED_TYPE): Redefine. (TARGET_CONVERT_TO_TYPE): Redefine. (arm_fp16_format): Define. (all_fpus): Add entry for neon-fp16. (fp_model_for_fpu): Likewise. (struct fp16_format): Declare. (all_fp16_formats): Define. (arm_init_libfuncs): Add entries for HFmode conversions and arithmetic functions. (arm_override_options): Set arm_fp16_format. Call sorry for fp16 and no ldrh. (arm_legitimate_index_p): Treat HFmode like HImode. (thumb1_legitimate_address_p): Make it recognize HFmode constants. (coproc_secondary_reload_class): Special-case HFmode. (arm_print_operand): Add 'z' specifier for vld1.16/vst1.16. (arm_hard_regno_mode_ok): Allow HFmode values in VFP registers. (arm_init_fp16_builtins): New. (arm_init_builtins): Call it. (arm_invalid_parameter_type): New. (arm_invalid_return_type): New. (arm_promoted_type): New. (arm_convert_to_type). (arm_file_start): Deal with neon-fp16 as fpu_name. Emit tag for fp16 format. (arm_emit_fp16_const): New function. (arm_mangle_type): Mangle __fp16 as "Dh". * config/arm/arm.h (TARGET_VFPD32): Make it know about FPUTYPE_NEON_FP16. (TARGET_NEON_FP16): New. (TARGET_NEON): Make it know about FPUTYPE_NEON_FP16. (enum fputype): Add FPUTYPE_NEON_FP16. (enum arm_fp16_format_type): Declare. (arm_fp16_format): Declare. (LARGEST_EXPONENT_IS_NORMAL): Define. * config/arm/arm-protos.h (arm_emit_fp16_const): Declare. * config/arm/arm-modes.def (HFmode): Define. * config/arm/vfp.md: (*movhf_vfp): New. (extendhfsf2): New. (truncsfhf2): New. * config/arm/arm.md: (fpu): Add neon_fp16. (floatsihf2, floatdihf2): New. (fix_trunchfsi2, fix_trunchfdi2): New. (truncdfhf2): New. (extendhfdf2): New. (movhf): New. (*arm32_movhf): New. (*thumb1_movhf): New. (consttable_2): Add check for HFmode constants. (consttable_4): Handle HFmode constants. From-SVN: r148654
This commit is contained in:
parent
2c2f70e1f2
commit
0fd8c3ad1e
@ -1,3 +1,62 @@
|
||||
2009-06-18 Sandra Loosemore <sandra@codesourcery.com>
|
||||
|
||||
* doc/extend.texi (Half-Precision): New section.
|
||||
* doc/invoke.texi (Option Summary): List -mfp16-format.
|
||||
(ARM Options): List neon-fp16 as -mfpu value. Document -mfp16-format.
|
||||
* config/arm/arm.opt (mfp16-format=): New.
|
||||
* config/arm/arm.c: Include intl.h.
|
||||
(TARGET_INVALID_PARAMETER_TYPE): Redefine.
|
||||
(TARGET_INVALID_RETURN_TYPE): Redefine.
|
||||
(TARGET_PROMOTED_TYPE): Redefine.
|
||||
(TARGET_CONVERT_TO_TYPE): Redefine.
|
||||
(arm_fp16_format): Define.
|
||||
(all_fpus): Add entry for neon-fp16.
|
||||
(fp_model_for_fpu): Likewise.
|
||||
(struct fp16_format): Declare.
|
||||
(all_fp16_formats): Define.
|
||||
(arm_init_libfuncs): Add entries for HFmode conversions and arithmetic
|
||||
functions.
|
||||
(arm_override_options): Set arm_fp16_format. Call sorry for fp16
|
||||
and no ldrh.
|
||||
(arm_legitimate_index_p): Treat HFmode like HImode.
|
||||
(thumb1_legitimate_address_p): Make it recognize HFmode constants.
|
||||
(coproc_secondary_reload_class): Special-case HFmode.
|
||||
(arm_print_operand): Add 'z' specifier for vld1.16/vst1.16.
|
||||
(arm_hard_regno_mode_ok): Allow HFmode values in VFP registers.
|
||||
(arm_init_fp16_builtins): New.
|
||||
(arm_init_builtins): Call it.
|
||||
(arm_invalid_parameter_type): New.
|
||||
(arm_invalid_return_type): New.
|
||||
(arm_promoted_type): New.
|
||||
(arm_convert_to_type).
|
||||
(arm_file_start): Deal with neon-fp16 as fpu_name. Emit tag for fp16
|
||||
format.
|
||||
(arm_emit_fp16_const): New function.
|
||||
(arm_mangle_type): Mangle __fp16 as "Dh".
|
||||
* config/arm/arm.h (TARGET_VFPD32): Make it know about
|
||||
FPUTYPE_NEON_FP16.
|
||||
(TARGET_NEON_FP16): New.
|
||||
(TARGET_NEON): Make it know about FPUTYPE_NEON_FP16.
|
||||
(enum fputype): Add FPUTYPE_NEON_FP16.
|
||||
(enum arm_fp16_format_type): Declare.
|
||||
(arm_fp16_format): Declare.
|
||||
(LARGEST_EXPONENT_IS_NORMAL): Define.
|
||||
* config/arm/arm-protos.h (arm_emit_fp16_const): Declare.
|
||||
* config/arm/arm-modes.def (HFmode): Define.
|
||||
* config/arm/vfp.md: (*movhf_vfp): New.
|
||||
(extendhfsf2): New.
|
||||
(truncsfhf2): New.
|
||||
* config/arm/arm.md: (fpu): Add neon_fp16.
|
||||
(floatsihf2, floatdihf2): New.
|
||||
(fix_trunchfsi2, fix_trunchfdi2): New.
|
||||
(truncdfhf2): New.
|
||||
(extendhfdf2): New.
|
||||
(movhf): New.
|
||||
(*arm32_movhf): New.
|
||||
(*thumb1_movhf): New.
|
||||
(consttable_2): Add check for HFmode constants.
|
||||
(consttable_4): Handle HFmode constants.
|
||||
|
||||
2009-06-18 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* convert.c (convert_to_integer): Convert (int)logb() into ilogb().
|
||||
|
@ -25,6 +25,11 @@
|
||||
FIXME What format is this? */
|
||||
FLOAT_MODE (XF, 12, 0);
|
||||
|
||||
/* Half-precision floating point */
|
||||
FLOAT_MODE (HF, 2, 0);
|
||||
ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
? &arm_half_format : &ieee_half_format));
|
||||
|
||||
/* CCFPEmode should be used with floating inequalities,
|
||||
CCFPmode should be used with floating equalities.
|
||||
CC_NOOVmode should be used with SImode integer equalities.
|
||||
|
@ -140,6 +140,7 @@ extern void arm_final_prescan_insn (rtx);
|
||||
extern int arm_debugger_arg_offset (int, rtx);
|
||||
extern bool arm_is_long_call_p (tree);
|
||||
extern int arm_emit_vector_const (FILE *, rtx);
|
||||
extern void arm_emit_fp16_const (rtx c);
|
||||
extern const char * arm_output_load_gr (rtx *);
|
||||
extern const char *vfp_output_fstmd (rtx *);
|
||||
extern void arm_set_return_address (rtx, rtx);
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "debug.h"
|
||||
#include "langhooks.h"
|
||||
#include "df.h"
|
||||
#include "intl.h"
|
||||
|
||||
/* Forward definitions of types. */
|
||||
typedef struct minipool_node Mnode;
|
||||
@ -200,6 +201,10 @@ static bool arm_tls_symbol_p (rtx x);
|
||||
static int arm_issue_rate (void);
|
||||
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
|
||||
static bool arm_allocate_stack_slots_for_args (void);
|
||||
static const char *arm_invalid_parameter_type (const_tree t);
|
||||
static const char *arm_invalid_return_type (const_tree t);
|
||||
static tree arm_promoted_type (const_tree t);
|
||||
static tree arm_convert_to_type (tree type, tree expr);
|
||||
|
||||
|
||||
/* Initialize the GCC target structure. */
|
||||
@ -407,6 +412,18 @@ static bool arm_allocate_stack_slots_for_args (void);
|
||||
#undef TARGET_LEGITIMATE_ADDRESS_P
|
||||
#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
|
||||
|
||||
#undef TARGET_INVALID_PARAMETER_TYPE
|
||||
#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
|
||||
|
||||
#undef TARGET_INVALID_RETURN_TYPE
|
||||
#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
|
||||
|
||||
#undef TARGET_PROMOTED_TYPE
|
||||
#define TARGET_PROMOTED_TYPE arm_promoted_type
|
||||
|
||||
#undef TARGET_CONVERT_TO_TYPE
|
||||
#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
/* Obstack for minipool constant handling. */
|
||||
@ -440,6 +457,9 @@ enum fputype arm_fpu_tune;
|
||||
/* Whether to use floating point hardware. */
|
||||
enum float_abi_type arm_float_abi;
|
||||
|
||||
/* Which __fp16 format to use. */
|
||||
enum arm_fp16_format_type arm_fp16_format;
|
||||
|
||||
/* Which ABI to use. */
|
||||
enum arm_abi_type arm_abi;
|
||||
|
||||
@ -719,15 +739,16 @@ struct fpu_desc
|
||||
|
||||
static const struct fpu_desc all_fpus[] =
|
||||
{
|
||||
{"fpa", FPUTYPE_FPA},
|
||||
{"fpe2", FPUTYPE_FPA_EMU2},
|
||||
{"fpe3", FPUTYPE_FPA_EMU2},
|
||||
{"maverick", FPUTYPE_MAVERICK},
|
||||
{"vfp", FPUTYPE_VFP},
|
||||
{"vfp3", FPUTYPE_VFP3},
|
||||
{"vfpv3", FPUTYPE_VFP3},
|
||||
{"vfpv3-d16", FPUTYPE_VFP3D16},
|
||||
{"neon", FPUTYPE_NEON}
|
||||
{"fpa", FPUTYPE_FPA},
|
||||
{"fpe2", FPUTYPE_FPA_EMU2},
|
||||
{"fpe3", FPUTYPE_FPA_EMU2},
|
||||
{"maverick", FPUTYPE_MAVERICK},
|
||||
{"vfp", FPUTYPE_VFP},
|
||||
{"vfp3", FPUTYPE_VFP3},
|
||||
{"vfpv3", FPUTYPE_VFP3},
|
||||
{"vfpv3-d16", FPUTYPE_VFP3D16},
|
||||
{"neon", FPUTYPE_NEON},
|
||||
{"neon-fp16", FPUTYPE_NEON_FP16}
|
||||
};
|
||||
|
||||
|
||||
@ -745,7 +766,8 @@ static const enum arm_fp_model fp_model_for_fpu[] =
|
||||
ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
|
||||
ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
|
||||
ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
|
||||
ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
|
||||
ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
|
||||
ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
|
||||
};
|
||||
|
||||
|
||||
@ -766,6 +788,23 @@ static const struct float_abi all_float_abis[] =
|
||||
};
|
||||
|
||||
|
||||
struct fp16_format
|
||||
{
|
||||
const char *name;
|
||||
enum arm_fp16_format_type fp16_format_type;
|
||||
};
|
||||
|
||||
|
||||
/* Available values for -mfp16-format=. */
|
||||
|
||||
static const struct fp16_format all_fp16_formats[] =
|
||||
{
|
||||
{"none", ARM_FP16_FORMAT_NONE},
|
||||
{"ieee", ARM_FP16_FORMAT_IEEE},
|
||||
{"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
|
||||
};
|
||||
|
||||
|
||||
struct abi_name
|
||||
{
|
||||
const char *name;
|
||||
@ -923,6 +962,44 @@ arm_init_libfuncs (void)
|
||||
set_optab_libfunc (umod_optab, DImode, NULL);
|
||||
set_optab_libfunc (smod_optab, SImode, NULL);
|
||||
set_optab_libfunc (umod_optab, SImode, NULL);
|
||||
|
||||
/* Half-precision float operations. The compiler handles all operations
|
||||
with NULL libfuncs by converting the SFmode. */
|
||||
switch (arm_fp16_format)
|
||||
{
|
||||
case ARM_FP16_FORMAT_IEEE:
|
||||
case ARM_FP16_FORMAT_ALTERNATIVE:
|
||||
|
||||
/* Conversions. */
|
||||
set_conv_libfunc (trunc_optab, HFmode, SFmode,
|
||||
(arm_fp16_format == ARM_FP16_FORMAT_IEEE
|
||||
? "__gnu_f2h_ieee"
|
||||
: "__gnu_f2h_alternative"));
|
||||
set_conv_libfunc (sext_optab, SFmode, HFmode,
|
||||
(arm_fp16_format == ARM_FP16_FORMAT_IEEE
|
||||
? "__gnu_h2f_ieee"
|
||||
: "__gnu_h2f_alternative"));
|
||||
|
||||
/* Arithmetic. */
|
||||
set_optab_libfunc (add_optab, HFmode, NULL);
|
||||
set_optab_libfunc (sdiv_optab, HFmode, NULL);
|
||||
set_optab_libfunc (smul_optab, HFmode, NULL);
|
||||
set_optab_libfunc (neg_optab, HFmode, NULL);
|
||||
set_optab_libfunc (sub_optab, HFmode, NULL);
|
||||
|
||||
/* Comparisons. */
|
||||
set_optab_libfunc (eq_optab, HFmode, NULL);
|
||||
set_optab_libfunc (ne_optab, HFmode, NULL);
|
||||
set_optab_libfunc (lt_optab, HFmode, NULL);
|
||||
set_optab_libfunc (le_optab, HFmode, NULL);
|
||||
set_optab_libfunc (ge_optab, HFmode, NULL);
|
||||
set_optab_libfunc (gt_optab, HFmode, NULL);
|
||||
set_optab_libfunc (unord_optab, HFmode, NULL);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* On AAPCS systems, this is the "struct __va_list". */
|
||||
@ -1294,6 +1371,23 @@ arm_override_options (void)
|
||||
|
||||
tune_flags = all_cores[(int)arm_tune].flags;
|
||||
|
||||
if (target_fp16_format_name)
|
||||
{
|
||||
for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
|
||||
{
|
||||
if (streq (all_fp16_formats[i].name, target_fp16_format_name))
|
||||
{
|
||||
arm_fp16_format = all_fp16_formats[i].fp16_format_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == ARRAY_SIZE (all_fp16_formats))
|
||||
error ("invalid __fp16 format option: -mfp16-format=%s",
|
||||
target_fp16_format_name);
|
||||
}
|
||||
else
|
||||
arm_fp16_format = ARM_FP16_FORMAT_NONE;
|
||||
|
||||
if (target_abi_name)
|
||||
{
|
||||
for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
|
||||
@ -1525,6 +1619,10 @@ arm_override_options (void)
|
||||
if (TARGET_THUMB2 && TARGET_IWMMXT)
|
||||
sorry ("Thumb-2 iWMMXt");
|
||||
|
||||
/* __fp16 support currently assumes the core has ldrh. */
|
||||
if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
|
||||
sorry ("__fp16 and no ldrh");
|
||||
|
||||
/* If soft-float is specified then don't use FPU. */
|
||||
if (TARGET_SOFT_FLOAT)
|
||||
arm_fpu_arch = FPUTYPE_NONE;
|
||||
@ -4173,6 +4271,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
|
||||
if (GET_MODE_SIZE (mode) <= 4
|
||||
&& ! (arm_arch4
|
||||
&& (mode == HImode
|
||||
|| mode == HFmode
|
||||
|| (mode == QImode && outer == SIGN_EXTEND))))
|
||||
{
|
||||
if (code == MULT)
|
||||
@ -4201,13 +4300,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
|
||||
load. */
|
||||
if (arm_arch4)
|
||||
{
|
||||
if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
|
||||
if (mode == HImode
|
||||
|| mode == HFmode
|
||||
|| (outer == SIGN_EXTEND && mode == QImode))
|
||||
range = 256;
|
||||
else
|
||||
range = 4096;
|
||||
}
|
||||
else
|
||||
range = (mode == HImode) ? 4095 : 4096;
|
||||
range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
|
||||
|
||||
return (code == CONST_INT
|
||||
&& INTVAL (index) < range
|
||||
@ -4380,7 +4481,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
||||
return 1;
|
||||
|
||||
/* This is PC relative data after arm_reorg runs. */
|
||||
else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
|
||||
else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
|
||||
&& reload_completed
|
||||
&& (GET_CODE (x) == LABEL_REF
|
||||
|| (GET_CODE (x) == CONST
|
||||
&& GET_CODE (XEXP (x, 0)) == PLUS
|
||||
@ -7121,6 +7223,13 @@ arm_eliminable_register (rtx x)
|
||||
enum reg_class
|
||||
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
|
||||
{
|
||||
if (mode == HFmode)
|
||||
{
|
||||
if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
|
||||
return NO_REGS;
|
||||
return GENERAL_REGS;
|
||||
}
|
||||
|
||||
if (TARGET_NEON
|
||||
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||
@ -13926,6 +14035,31 @@ arm_print_operand (FILE *stream, rtx x, int code)
|
||||
}
|
||||
return;
|
||||
|
||||
/* Register specifier for vld1.16/vst1.16. Translate the S register
|
||||
number into a D register number and element index. */
|
||||
case 'z':
|
||||
{
|
||||
int mode = GET_MODE (x);
|
||||
int regno;
|
||||
|
||||
if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
|
||||
{
|
||||
output_operand_lossage ("invalid operand for code '%c'", code);
|
||||
return;
|
||||
}
|
||||
|
||||
regno = REGNO (x);
|
||||
if (!VFP_REGNO_OK_FOR_SINGLE (regno))
|
||||
{
|
||||
output_operand_lossage ("invalid operand for code '%c'", code);
|
||||
return;
|
||||
}
|
||||
|
||||
regno = regno - FIRST_VFP_REGNUM;
|
||||
fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
if (x == 0)
|
||||
{
|
||||
@ -14723,6 +14857,12 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
|
||||
if (mode == DFmode)
|
||||
return VFP_REGNO_OK_FOR_DOUBLE (regno);
|
||||
|
||||
/* VFP registers can hold HFmode values, but there is no point in
|
||||
putting them there unless we have the NEON extensions for
|
||||
loading/storing them, too. */
|
||||
if (mode == HFmode)
|
||||
return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
|
||||
|
||||
if (TARGET_NEON)
|
||||
return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
|
||||
|| (VALID_NEON_QREG_MODE (mode)
|
||||
@ -16208,6 +16348,15 @@ arm_init_neon_builtins (void)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
arm_init_fp16_builtins (void)
|
||||
{
|
||||
tree fp16_type = make_node (REAL_TYPE);
|
||||
TYPE_PRECISION (fp16_type) = 16;
|
||||
layout_type (fp16_type);
|
||||
(*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
|
||||
}
|
||||
|
||||
static void
|
||||
arm_init_builtins (void)
|
||||
{
|
||||
@ -16218,6 +16367,56 @@ arm_init_builtins (void)
|
||||
|
||||
if (TARGET_NEON)
|
||||
arm_init_neon_builtins ();
|
||||
|
||||
if (arm_fp16_format)
|
||||
arm_init_fp16_builtins ();
|
||||
}
|
||||
|
||||
/* Implement TARGET_INVALID_PARAMETER_TYPE. */
|
||||
|
||||
static const char *
|
||||
arm_invalid_parameter_type (const_tree t)
|
||||
{
|
||||
if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
||||
return N_("function parameters cannot have __fp16 type");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Implement TARGET_INVALID_PARAMETER_TYPE. */
|
||||
|
||||
static const char *
|
||||
arm_invalid_return_type (const_tree t)
|
||||
{
|
||||
if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
||||
return N_("functions cannot return __fp16 type");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Implement TARGET_PROMOTED_TYPE. */
|
||||
|
||||
static tree
|
||||
arm_promoted_type (const_tree t)
|
||||
{
|
||||
if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
||||
return float_type_node;
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Implement TARGET_CONVERT_TO_TYPE.
|
||||
Specifically, this hook implements the peculiarity of the ARM
|
||||
half-precision floating-point C semantics that requires conversions between
|
||||
__fp16 to or from double to do an intermediate conversion to float. */
|
||||
|
||||
static tree
|
||||
arm_convert_to_type (tree type, tree expr)
|
||||
{
|
||||
tree fromtype = TREE_TYPE (expr);
|
||||
if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
|
||||
return NULL_TREE;
|
||||
if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
|
||||
|| (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
|
||||
return convert (type, convert (float_type_node, expr));
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Errors in the source file can cause expand_expr to return const0_rtx
|
||||
@ -18413,6 +18612,10 @@ arm_file_start (void)
|
||||
fpu_name = "neon";
|
||||
set_float_abi_attributes = 1;
|
||||
break;
|
||||
case FPUTYPE_NEON_FP16:
|
||||
fpu_name = "neon-fp16";
|
||||
set_float_abi_attributes = 1;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
@ -18466,6 +18669,11 @@ arm_file_start (void)
|
||||
val = 6;
|
||||
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
|
||||
|
||||
/* Tag_ABI_FP_16bit_format. */
|
||||
if (arm_fp16_format)
|
||||
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
|
||||
(int)arm_fp16_format);
|
||||
|
||||
if (arm_lang_output_object_attributes_hook)
|
||||
arm_lang_output_object_attributes_hook();
|
||||
}
|
||||
@ -18695,6 +18903,23 @@ arm_emit_vector_const (FILE *file, rtx x)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
|
||||
HFmode constant pool entries are actually loaded with ldr. */
|
||||
void
|
||||
arm_emit_fp16_const (rtx c)
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
long bits;
|
||||
|
||||
REAL_VALUE_FROM_CONST_DOUBLE (r, c);
|
||||
bits = real_to_target (NULL, &r, HFmode);
|
||||
if (WORDS_BIG_ENDIAN)
|
||||
assemble_zeros (2);
|
||||
assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
|
||||
if (!WORDS_BIG_ENDIAN)
|
||||
assemble_zeros (2);
|
||||
}
|
||||
|
||||
const char *
|
||||
arm_output_load_gr (rtx *operands)
|
||||
{
|
||||
@ -19724,6 +19949,10 @@ arm_mangle_type (const_tree type)
|
||||
return "St9__va_list";
|
||||
}
|
||||
|
||||
/* Half-precision float. */
|
||||
if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
|
||||
return "Dh";
|
||||
|
||||
if (TREE_CODE (type) != VECTOR_TYPE)
|
||||
return NULL;
|
||||
|
||||
|
@ -215,20 +215,25 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
|
||||
/* FPU is has the full VFPv3/NEON register file of 32 D registers. */
|
||||
#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \
|
||||
&& (arm_fpu_arch == FPUTYPE_VFP3 \
|
||||
|| arm_fpu_arch == FPUTYPE_NEON))
|
||||
|| arm_fpu_arch == FPUTYPE_NEON \
|
||||
|| arm_fpu_arch == FPUTYPE_NEON_FP16))
|
||||
|
||||
/* FPU supports VFPv3 instructions. */
|
||||
#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
|
||||
&& (arm_fpu_arch == FPUTYPE_VFP3D16 \
|
||||
|| TARGET_VFPD32))
|
||||
|
||||
/* FPU supports NEON/VFP half-precision floating-point. */
|
||||
#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16)
|
||||
|
||||
/* FPU supports Neon instructions. The setting of this macro gets
|
||||
revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
|
||||
and TARGET_HARD_FLOAT to ensure that NEON instructions are
|
||||
available. */
|
||||
#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
|
||||
&& arm_fp_model == ARM_FP_MODEL_VFP \
|
||||
&& arm_fpu_arch == FPUTYPE_NEON)
|
||||
&& (arm_fpu_arch == FPUTYPE_NEON \
|
||||
|| arm_fpu_arch == FPUTYPE_NEON_FP16))
|
||||
|
||||
/* "DSP" multiply instructions, eg. SMULxy. */
|
||||
#define TARGET_DSP_MULTIPLY \
|
||||
@ -308,7 +313,9 @@ enum fputype
|
||||
/* VFPv3. */
|
||||
FPUTYPE_VFP3,
|
||||
/* Neon. */
|
||||
FPUTYPE_NEON
|
||||
FPUTYPE_NEON,
|
||||
/* Neon with half-precision float extensions. */
|
||||
FPUTYPE_NEON_FP16
|
||||
};
|
||||
|
||||
/* Recast the floating point class to be the floating point attribute. */
|
||||
@ -333,6 +340,21 @@ extern enum float_abi_type arm_float_abi;
|
||||
#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
|
||||
#endif
|
||||
|
||||
/* Which __fp16 format to use.
|
||||
The enumeration values correspond to the numbering for the
|
||||
Tag_ABI_FP_16bit_format attribute.
|
||||
*/
|
||||
enum arm_fp16_format_type
|
||||
{
|
||||
ARM_FP16_FORMAT_NONE = 0,
|
||||
ARM_FP16_FORMAT_IEEE = 1,
|
||||
ARM_FP16_FORMAT_ALTERNATIVE = 2
|
||||
};
|
||||
|
||||
extern enum arm_fp16_format_type arm_fp16_format;
|
||||
#define LARGEST_EXPONENT_IS_NORMAL(bits) \
|
||||
((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
|
||||
/* Which ABI to use. */
|
||||
enum arm_abi_type
|
||||
{
|
||||
|
@ -158,7 +158,7 @@
|
||||
; Floating Point Unit. If we only have floating point emulation, then there
|
||||
; is no point in scheduling the floating point insns. (Well, for best
|
||||
; performance we should try and group them together).
|
||||
(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon"
|
||||
(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16"
|
||||
(const (symbol_ref "arm_fpu_attr")))
|
||||
|
||||
; LENGTH of an instruction (in bytes)
|
||||
@ -3734,6 +3734,34 @@
|
||||
|
||||
;; Fixed <--> Floating conversion insns
|
||||
|
||||
(define_expand "floatsihf2"
|
||||
[(set (match_operand:HF 0 "general_operand" "")
|
||||
(float:HF (match_operand:SI 1 "general_operand" "")))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (SFmode);
|
||||
expand_float (op1, operands[1], 0);
|
||||
op1 = convert_to_mode (HFmode, op1, 0);
|
||||
emit_move_insn (operands[0], op1);
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
(define_expand "floatdihf2"
|
||||
[(set (match_operand:HF 0 "general_operand" "")
|
||||
(float:HF (match_operand:DI 1 "general_operand" "")))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (SFmode);
|
||||
expand_float (op1, operands[1], 0);
|
||||
op1 = convert_to_mode (HFmode, op1, 0);
|
||||
emit_move_insn (operands[0], op1);
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
(define_expand "floatsisf2"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "")
|
||||
(float:SF (match_operand:SI 1 "s_register_operand" "")))]
|
||||
@ -3758,6 +3786,30 @@
|
||||
}
|
||||
")
|
||||
|
||||
(define_expand "fix_trunchfsi2"
|
||||
[(set (match_operand:SI 0 "general_operand" "")
|
||||
(fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1 = convert_to_mode (SFmode, operands[1], 0);
|
||||
expand_fix (operands[0], op1, 0);
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
(define_expand "fix_trunchfdi2"
|
||||
[(set (match_operand:DI 0 "general_operand" "")
|
||||
(fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1 = convert_to_mode (SFmode, operands[1], 0);
|
||||
expand_fix (operands[0], op1, 0);
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
(define_expand "fix_truncsfsi2"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))]
|
||||
@ -3797,6 +3849,22 @@
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT"
|
||||
""
|
||||
)
|
||||
|
||||
/* DFmode -> HFmode conversions have to go through SFmode. */
|
||||
(define_expand "truncdfhf2"
|
||||
[(set (match_operand:HF 0 "general_operand" "")
|
||||
(float_truncate:HF
|
||||
(match_operand:DF 1 "general_operand" "")))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1;
|
||||
op1 = convert_to_mode (SFmode, operands[1], 0);
|
||||
op1 = convert_to_mode (HFmode, op1, 0);
|
||||
emit_move_insn (operands[0], op1);
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
;; Zero and sign extension instructions.
|
||||
|
||||
@ -4660,6 +4728,21 @@
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT"
|
||||
""
|
||||
)
|
||||
|
||||
/* HFmode -> DFmode conversions have to go through SFmode. */
|
||||
(define_expand "extendhfdf2"
|
||||
[(set (match_operand:DF 0 "general_operand" "")
|
||||
(float_extend:DF (match_operand:HF 1 "general_operand" "")))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
{
|
||||
rtx op1;
|
||||
op1 = convert_to_mode (SFmode, operands[1], 0);
|
||||
op1 = convert_to_mode (DFmode, op1, 0);
|
||||
emit_insn (gen_movdf (operands[0], op1));
|
||||
DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
;; Move insns (including loads and stores)
|
||||
|
||||
@ -5808,6 +5891,107 @@
|
||||
(set_attr "pool_range" "*,32,*,*,*,*")]
|
||||
)
|
||||
|
||||
;; HFmode moves
|
||||
(define_expand "movhf"
|
||||
[(set (match_operand:HF 0 "general_operand" "")
|
||||
(match_operand:HF 1 "general_operand" ""))]
|
||||
"TARGET_EITHER"
|
||||
"
|
||||
if (TARGET_32BIT)
|
||||
{
|
||||
if (GET_CODE (operands[0]) == MEM)
|
||||
operands[1] = force_reg (HFmode, operands[1]);
|
||||
}
|
||||
else /* TARGET_THUMB1 */
|
||||
{
|
||||
if (can_create_pseudo_p ())
|
||||
{
|
||||
if (GET_CODE (operands[0]) != REG)
|
||||
operands[1] = force_reg (HFmode, operands[1]);
|
||||
}
|
||||
}
|
||||
"
|
||||
)
|
||||
|
||||
(define_insn "*arm32_movhf"
|
||||
[(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r")
|
||||
(match_operand:HF 1 "general_operand" " m,r,r,F"))]
|
||||
"TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_NEON_FP16)
|
||||
&& ( s_register_operand (operands[0], HFmode)
|
||||
|| s_register_operand (operands[1], HFmode))"
|
||||
"*
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0: /* ARM register from memory */
|
||||
return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\";
|
||||
case 1: /* memory from ARM register */
|
||||
return \"str%(h%)\\t%1, %0\\t%@ __fp16\";
|
||||
case 2: /* ARM register from ARM register */
|
||||
return \"mov%?\\t%0, %1\\t%@ __fp16\";
|
||||
case 3: /* ARM register from constant */
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
long bits;
|
||||
rtx ops[4];
|
||||
|
||||
REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
|
||||
bits = real_to_target (NULL, &r, HFmode);
|
||||
ops[0] = operands[0];
|
||||
ops[1] = GEN_INT (bits);
|
||||
ops[2] = GEN_INT (bits & 0xff00);
|
||||
ops[3] = GEN_INT (bits & 0x00ff);
|
||||
|
||||
if (arm_arch_thumb2)
|
||||
output_asm_insn (\"movw%?\\t%0, %1\", ops);
|
||||
else
|
||||
output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops);
|
||||
return \"\";
|
||||
}
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "load1,store1,*,*")
|
||||
(set_attr "length" "4,4,4,8")
|
||||
(set_attr "predicable" "yes")
|
||||
]
|
||||
)
|
||||
|
||||
(define_insn "*thumb1_movhf"
|
||||
[(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h")
|
||||
(match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))]
|
||||
"TARGET_THUMB1
|
||||
&& ( s_register_operand (operands[0], HFmode)
|
||||
|| s_register_operand (operands[1], HFmode))"
|
||||
"*
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
rtx addr;
|
||||
gcc_assert (GET_CODE(operands[1]) == MEM);
|
||||
addr = XEXP (operands[1], 0);
|
||||
if (GET_CODE (addr) == LABEL_REF
|
||||
|| (GET_CODE (addr) == CONST
|
||||
&& GET_CODE (XEXP (addr, 0)) == PLUS
|
||||
&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
|
||||
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))
|
||||
{
|
||||
/* Constant pool entry. */
|
||||
return \"ldr\\t%0, %1\";
|
||||
}
|
||||
return \"ldrh\\t%0, %1\";
|
||||
}
|
||||
case 2: return \"strh\\t%1, %0\";
|
||||
default: return \"mov\\t%0, %1\";
|
||||
}
|
||||
"
|
||||
[(set_attr "length" "2")
|
||||
(set_attr "type" "*,load1,store1,*,*")
|
||||
(set_attr "pool_range" "*,1020,*,*,*")]
|
||||
)
|
||||
|
||||
(define_expand "movsf"
|
||||
[(set (match_operand:SF 0 "general_operand" "")
|
||||
(match_operand:SF 1 "general_operand" ""))]
|
||||
@ -10674,6 +10858,7 @@
|
||||
"TARGET_THUMB1"
|
||||
"*
|
||||
making_const_table = TRUE;
|
||||
gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT);
|
||||
assemble_integer (operands[0], 2, BITS_PER_WORD, 1);
|
||||
assemble_zeros (2);
|
||||
return \"\";
|
||||
@ -10686,19 +10871,23 @@
|
||||
"TARGET_EITHER"
|
||||
"*
|
||||
{
|
||||
rtx x = operands[0];
|
||||
making_const_table = TRUE;
|
||||
switch (GET_MODE_CLASS (GET_MODE (operands[0])))
|
||||
switch (GET_MODE_CLASS (GET_MODE (x)))
|
||||
{
|
||||
case MODE_FLOAT:
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
|
||||
assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
|
||||
break;
|
||||
}
|
||||
if (GET_MODE (x) == HFmode)
|
||||
arm_emit_fp16_const (x);
|
||||
else
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
|
||||
assemble_real (r, GET_MODE (x), BITS_PER_WORD);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assemble_integer (operands[0], 4, BITS_PER_WORD, 1);
|
||||
mark_symbol_refs_as_used (operands[0]);
|
||||
assemble_integer (x, 4, BITS_PER_WORD, 1);
|
||||
mark_symbol_refs_as_used (x);
|
||||
break;
|
||||
}
|
||||
return \"\";
|
||||
|
@ -78,6 +78,10 @@ Specify if floating point hardware should be used
|
||||
mfp=
|
||||
Target RejectNegative Joined Undocumented Var(target_fpe_name)
|
||||
|
||||
mfp16-format=
|
||||
Target RejectNegative Joined Var(target_fp16_format_name)
|
||||
Specify the __fp16 floating-point format
|
||||
|
||||
;; Now ignored.
|
||||
mfpe
|
||||
Target RejectNegative Mask(FPE) Undocumented
|
||||
|
@ -185,6 +185,61 @@
|
||||
(set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")]
|
||||
)
|
||||
|
||||
;; HFmode moves
|
||||
(define_insn "*movhf_vfp"
|
||||
[(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
|
||||
(match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
|
||||
&& ( s_register_operand (operands[0], HFmode)
|
||||
|| s_register_operand (operands[1], HFmode))"
|
||||
"*
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0: /* S register from memory */
|
||||
return \"vld1.16\\t{%z0}, %A1\";
|
||||
case 1: /* memory from S register */
|
||||
return \"vst1.16\\t{%z1}, %A0\";
|
||||
case 2: /* ARM register from memory */
|
||||
return \"ldrh\\t%0, %1\\t%@ __fp16\";
|
||||
case 3: /* memory from ARM register */
|
||||
return \"strh\\t%1, %0\\t%@ __fp16\";
|
||||
case 4: /* S register from S register */
|
||||
return \"fcpys\\t%0, %1\";
|
||||
case 5: /* ARM register from ARM register */
|
||||
return \"mov\\t%0, %1\\t%@ __fp16\";
|
||||
case 6: /* S register from ARM register */
|
||||
return \"fmsr\\t%0, %1\";
|
||||
case 7: /* ARM register from S register */
|
||||
return \"fmrs\\t%0, %1\";
|
||||
case 8: /* ARM register from constant */
|
||||
{
|
||||
REAL_VALUE_TYPE r;
|
||||
long bits;
|
||||
rtx ops[4];
|
||||
|
||||
REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
|
||||
bits = real_to_target (NULL, &r, HFmode);
|
||||
ops[0] = operands[0];
|
||||
ops[1] = GEN_INT (bits);
|
||||
ops[2] = GEN_INT (bits & 0xff00);
|
||||
ops[3] = GEN_INT (bits & 0x00ff);
|
||||
|
||||
if (arm_arch_thumb2)
|
||||
output_asm_insn (\"movw\\t%0, %1\", ops);
|
||||
else
|
||||
output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
|
||||
return \"\";
|
||||
}
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
"
|
||||
[(set_attr "conds" "unconditional")
|
||||
(set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
|
||||
(set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "4,4,4,4,4,4,4,4,8")]
|
||||
)
|
||||
|
||||
|
||||
;; SFmode moves
|
||||
;; Disparage the w<->r cases because reloading an invalid address is
|
||||
@ -736,6 +791,24 @@
|
||||
(set_attr "type" "f_cvt")]
|
||||
)
|
||||
|
||||
(define_insn "extendhfsf2"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "=t")
|
||||
(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16"
|
||||
"vcvtb%?.f32.f16\\t%0, %1"
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "type" "f_cvt")]
|
||||
)
|
||||
|
||||
(define_insn "truncsfhf2"
|
||||
[(set (match_operand:HF 0 "s_register_operand" "=t")
|
||||
(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16"
|
||||
"vcvtb%?.f16.f32\\t%0, %1"
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "type" "f_cvt")]
|
||||
)
|
||||
|
||||
(define_insn "*truncsisf2_vfp"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=t")
|
||||
(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
|
||||
|
@ -35,6 +35,7 @@ extensions, accepted by GCC in C89 mode and in C++.
|
||||
* Long Long:: Double-word integers---@code{long long int}.
|
||||
* Complex:: Data types for complex numbers.
|
||||
* Floating Types:: Additional Floating Types.
|
||||
* Half-Precision:: Half-Precision Floating Point.
|
||||
* Decimal Float:: Decimal Floating Types.
|
||||
* Hex Floats:: Hexadecimal floating-point constants.
|
||||
* Fixed-Point:: Fixed-Point Types.
|
||||
@ -921,6 +922,55 @@ Not all targets support additional floating point types. @code{__float80}
|
||||
is supported on i386, x86_64 and ia64 targets and target @code{__float128}
|
||||
is supported on x86_64 and ia64 targets.
|
||||
|
||||
@node Half-Precision
|
||||
@section Half-Precision Floating Point
|
||||
@cindex half-precision floating point
|
||||
@cindex @code{__fp16} data type
|
||||
|
||||
On ARM targets, GCC supports half-precision (16-bit) floating point via
|
||||
the @code{__fp16} type. You must enable this type explicitly
|
||||
with the @option{-mfp16-format} command-line option in order to use it.
|
||||
|
||||
ARM supports two incompatible representations for half-precision
|
||||
floating-point values. You must choose one of the representations and
|
||||
use it consistently in your program.
|
||||
|
||||
Specifying @option{-mfp16-format=ieee} selects the IEEE 754-2008 format.
|
||||
This format can represent normalized values in the range of @math{2^{-14}} to 65504.
|
||||
There are 11 bits of significand precision, approximately 3
|
||||
decimal digits.
|
||||
|
||||
Specifying @option{-mfp16-format=alternative} selects the ARM
|
||||
alternative format. This representation is similar to the IEEE
|
||||
format, but does not support infinities or NaNs. Instead, the range
|
||||
of exponents is extended, so that this format can represent normalized
|
||||
values in the range of @math{2^{-14}} to 131008.
|
||||
|
||||
The @code{__fp16} type is a storage format only. For purposes
|
||||
of arithmetic and other operations, @code{__fp16} values in C or C++
|
||||
expressions are automatically promoted to @code{float}. In addition,
|
||||
you cannot declare a function with a return value or parameters
|
||||
of type @code{__fp16}.
|
||||
|
||||
Note that conversions from @code{double} to @code{__fp16}
|
||||
involve an intermediate conversion to @code{float}. Because
|
||||
of rounding, this can sometimes produce a different result than a
|
||||
direct conversion.
|
||||
|
||||
ARM provides hardware support for conversions between
|
||||
@code{__fp16} and @code{float} values
|
||||
as an extension to VFP and NEON (Advanced SIMD). GCC generates
|
||||
code using the instructions provided by this extension if you compile
|
||||
with the options @option{-mfpu=neon-fp16 -mfloat-abi=softfp},
|
||||
in addition to the @option{-mfp16-format} option to select
|
||||
a half-precision format.
|
||||
|
||||
Language-level support for the @code{__fp16} data type is
|
||||
independent of whether GCC generates code using hardware floating-point
|
||||
instructions. In cases where hardware support is not specified, GCC
|
||||
implements conversions between @code{__fp16} and @code{float} values
|
||||
as library calls.
|
||||
|
||||
@node Decimal Float
|
||||
@section Decimal Floating Types
|
||||
@cindex decimal floating types
|
||||
|
@ -443,6 +443,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-msched-prolog -mno-sched-prolog @gol
|
||||
-mlittle-endian -mbig-endian -mwords-little-endian @gol
|
||||
-mfloat-abi=@var{name} -msoft-float -mhard-float -mfpe @gol
|
||||
-mfp16-format=@var{name}
|
||||
-mthumb-interwork -mno-thumb-interwork @gol
|
||||
-mcpu=@var{name} -march=@var{name} -mfpu=@var{name} @gol
|
||||
-mstructure-size-boundary=@var{n} @gol
|
||||
@ -9301,14 +9302,21 @@ of the @option{-mcpu=} option. Permissible names are: @samp{armv2},
|
||||
@opindex mfp
|
||||
This specifies what floating point hardware (or hardware emulation) is
|
||||
available on the target. Permissible names are: @samp{fpa}, @samp{fpe2},
|
||||
@samp{fpe3}, @samp{maverick}, @samp{vfp}, @samp{vfpv3}, @samp{vfpv3-d16} and
|
||||
@samp{neon}. @option{-mfp} and @option{-mfpe}
|
||||
@samp{fpe3}, @samp{maverick}, @samp{vfp}, @samp{vfpv3}, @samp{vfpv3-d16},
|
||||
@samp{neon}, and @samp{neon-fp16}. @option{-mfp} and @option{-mfpe}
|
||||
are synonyms for @option{-mfpu}=@samp{fpe}@var{number}, for compatibility
|
||||
with older versions of GCC@.
|
||||
|
||||
If @option{-msoft-float} is specified this specifies the format of
|
||||
floating point values.
|
||||
|
||||
@item -mfp16-format=@var{name}
|
||||
@opindex mfp16-format
|
||||
Specify the format of the @code{__fp16} half-precision floating-point type.
|
||||
Permissible names are @samp{none}, @samp{ieee}, and @samp{alternative};
|
||||
the default is @samp{none}, in which case the @code{__fp16} type is not
|
||||
defined. @xref{Half-Precision}, for more information.
|
||||
|
||||
@item -mstructure-size-boundary=@var{n}
|
||||
@opindex mstructure-size-boundary
|
||||
The size of all structures and unions will be rounded up to a multiple
|
||||
|
Loading…
x
Reference in New Issue
Block a user