mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-09 17:25:13 +08:00
extend.texi: Document new builtin functions for Intel Prescott New Intrunctions.
2003-06-25 H.J. Lu <hongjiu.lu@intel.com> * doc/extend.texi: Document new builtin functions for Intel Prescott New Intrunctions. * doc/invoke.texi: Document new command-line options, -mpni and -mno-pni, for Intel Prescott New Intrunctions. * config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*. * config/i386/i386.c (override_options): Turn on MASK_SSE2 for -mpni. Turn on MASK_SSE for -msse2. (bdesc_2arg): Add PNI builtins with 2 args. (bdesc_1arg): Add PNI builtins with 1 arg. (ix86_init_mmx_sse_builtins): Handle PNI builtins. (ix86_expand_builtin): Likewise. * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT, MASK_TLS_DIRECT_SEG_REFS): Renumbered. (TARGET_PNI): New. (TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add -mpni and -mno-pni. (TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI. (ix86_builtins): Add PNI builtins. (config/i386/i386.md): Add PNI patterns. * config/i386/pmmintrin.h: New file. From-SVN: r68502
This commit is contained in:
parent
7f8b3eb0cc
commit
22c7c85ebc
@ -1,3 +1,32 @@
|
||||
2003-06-25 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* doc/extend.texi: Document new builtin functions for Intel
|
||||
Prescott New Intrunctions.
|
||||
|
||||
* doc/invoke.texi: Document new command-line options, -mpni and
|
||||
-mno-pni, for Intel Prescott New Intrunctions.
|
||||
|
||||
* config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*.
|
||||
|
||||
* config/i386/i386.c (override_options): Turn on MASK_SSE2
|
||||
for -mpni. Turn on MASK_SSE for -msse2.
|
||||
(bdesc_2arg): Add PNI builtins with 2 args.
|
||||
(bdesc_1arg): Add PNI builtins with 1 arg.
|
||||
(ix86_init_mmx_sse_builtins): Handle PNI builtins.
|
||||
(ix86_expand_builtin): Likewise.
|
||||
|
||||
* config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A,
|
||||
MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT,
|
||||
MASK_TLS_DIRECT_SEG_REFS): Renumbered.
|
||||
(TARGET_PNI): New.
|
||||
(TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add
|
||||
-mpni and -mno-pni.
|
||||
(TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI.
|
||||
(ix86_builtins): Add PNI builtins.
|
||||
(config/i386/i386.md): Add PNI patterns.
|
||||
|
||||
* config/i386/pmmintrin.h: New file.
|
||||
|
||||
2003-06-25 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* config/h8300/h8300.md (call): Fix the insn lengths.
|
||||
|
@ -1373,6 +1373,14 @@ override_options ()
|
||||
if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
|
||||
target_flags &= ~MASK_NO_FANCY_MATH_387;
|
||||
|
||||
/* Turn on SSE2 builtins for -mpni. */
|
||||
if (TARGET_PNI)
|
||||
target_flags |= MASK_SSE2;
|
||||
|
||||
/* Turn on SSE builtins for -msse2. */
|
||||
if (TARGET_SSE2)
|
||||
target_flags |= MASK_SSE;
|
||||
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
if (TARGET_ALIGN_DOUBLE)
|
||||
@ -13058,7 +13066,15 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
|
||||
{ MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
|
||||
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
|
||||
|
||||
/* PNI MMX */
|
||||
{ MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_1arg[] =
|
||||
@ -13104,7 +13120,12 @@ static const struct builtin_description bdesc_1arg[] =
|
||||
{ MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
|
||||
|
||||
{ MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
|
||||
{ MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
|
||||
|
||||
/* PNI */
|
||||
{ MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
|
||||
{ MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
|
||||
};
|
||||
|
||||
void
|
||||
@ -13195,6 +13216,13 @@ ix86_init_mmx_sse_builtins ()
|
||||
= build_function_type (void_type_node, void_list_node);
|
||||
tree void_ftype_unsigned
|
||||
= build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
|
||||
tree void_ftype_unsigned_unsigned
|
||||
= build_function_type_list (void_type_node, unsigned_type_node,
|
||||
unsigned_type_node, NULL_TREE);
|
||||
tree void_ftype_pcvoid_unsigned_unsigned
|
||||
= build_function_type_list (void_type_node, const_ptr_type_node,
|
||||
unsigned_type_node, unsigned_type_node,
|
||||
NULL_TREE);
|
||||
tree unsigned_ftype_void
|
||||
= build_function_type (unsigned_type_node, void_list_node);
|
||||
tree di_ftype_void
|
||||
@ -13700,6 +13728,26 @@ ix86_init_mmx_sse_builtins ()
|
||||
def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
|
||||
|
||||
def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
|
||||
|
||||
/* Prescott New Instructions. */
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_monitor",
|
||||
void_ftype_pcvoid_unsigned_unsigned,
|
||||
IX86_BUILTIN_MONITOR);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_mwait",
|
||||
void_ftype_unsigned_unsigned,
|
||||
IX86_BUILTIN_MWAIT);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
|
||||
v4sf_ftype_v4sf,
|
||||
IX86_BUILTIN_MOVSHDUP);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
|
||||
v4sf_ftype_v4sf,
|
||||
IX86_BUILTIN_MOVSLDUP);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
|
||||
v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
|
||||
v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
|
||||
def_builtin (MASK_PNI, "__builtin_ia32_movddup",
|
||||
v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
|
||||
}
|
||||
|
||||
/* Errors in the source file can cause expand_expr to return const0_rtx
|
||||
@ -14509,6 +14557,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
case IX86_BUILTIN_STORED:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
|
||||
|
||||
case IX86_BUILTIN_MONITOR:
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
|
||||
if (!REG_P (op0))
|
||||
op0 = copy_to_mode_reg (SImode, op0);
|
||||
if (!REG_P (op1))
|
||||
op1 = copy_to_mode_reg (SImode, op1);
|
||||
if (!REG_P (op2))
|
||||
op2 = copy_to_mode_reg (SImode, op2);
|
||||
emit_insn (gen_monitor (op0, op1, op2));
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_MWAIT:
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
if (!REG_P (op0))
|
||||
op0 = copy_to_mode_reg (SImode, op0);
|
||||
if (!REG_P (op1))
|
||||
op1 = copy_to_mode_reg (SImode, op1);
|
||||
emit_insn (gen_mwait (op0, op1));
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_LOADDDUP:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
|
||||
|
||||
case IX86_BUILTIN_LDDQU:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
|
||||
1);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -121,12 +121,13 @@ extern int target_flags;
|
||||
#define MASK_MMX 0x00002000 /* Support MMX regs/builtins */
|
||||
#define MASK_SSE 0x00004000 /* Support SSE regs/builtins */
|
||||
#define MASK_SSE2 0x00008000 /* Support SSE2 regs/builtins */
|
||||
#define MASK_3DNOW 0x00010000 /* Support 3Dnow builtins */
|
||||
#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */
|
||||
#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */
|
||||
#define MASK_64BIT 0x00080000 /* Produce 64bit code */
|
||||
#define MASK_MS_BITFIELD_LAYOUT 0x00100000 /* Use native (MS) bitfield layout */
|
||||
#define MASK_TLS_DIRECT_SEG_REFS 0x00200000 /* Avoid adding %gs:0 */
|
||||
#define MASK_PNI 0x00010000 /* Support PNI regs/builtins */
|
||||
#define MASK_3DNOW 0x00020000 /* Support 3Dnow builtins */
|
||||
#define MASK_3DNOW_A 0x00040000 /* Support Athlon 3Dnow builtins */
|
||||
#define MASK_128BIT_LONG_DOUBLE 0x00080000 /* long double size is 128bit */
|
||||
#define MASK_64BIT 0x00100000 /* Produce 64bit code */
|
||||
#define MASK_MS_BITFIELD_LAYOUT 0x00200000 /* Use native (MS) bitfield layout */
|
||||
#define MASK_TLS_DIRECT_SEG_REFS 0x00400000 /* Avoid adding %gs:0 */
|
||||
|
||||
/* Unused: 0x03e0000 */
|
||||
|
||||
@ -302,6 +303,7 @@ extern int x86_prefetch_sse;
|
||||
|
||||
#define TARGET_SSE ((target_flags & MASK_SSE) != 0)
|
||||
#define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
|
||||
#define TARGET_PNI ((target_flags & MASK_PNI) != 0)
|
||||
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
|
||||
#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
|
||||
&& (ix86_fpmath & FPMATH_387))
|
||||
@ -393,10 +395,14 @@ extern int x86_prefetch_sse;
|
||||
N_("Support MMX and SSE built-in functions and code generation") }, \
|
||||
{ "no-sse", -MASK_SSE, \
|
||||
N_("Do not support MMX and SSE built-in functions and code generation") },\
|
||||
{ "sse2", (MASK_SSE2|MASK_SSE), \
|
||||
{ "sse2", MASK_SSE2, \
|
||||
N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \
|
||||
{ "no-sse2", -MASK_SSE2, \
|
||||
N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") }, \
|
||||
{ "pni", MASK_PNI, \
|
||||
N_("Support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\
|
||||
{ "no-pni", -MASK_PNI, \
|
||||
N_("Do not support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\
|
||||
{ "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \
|
||||
N_("sizeof(long double) is 16") }, \
|
||||
{ "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \
|
||||
@ -611,6 +617,8 @@ extern int x86_prefetch_sse;
|
||||
builtin_define ("__SSE__"); \
|
||||
if (TARGET_SSE2) \
|
||||
builtin_define ("__SSE2__"); \
|
||||
if (TARGET_PNI) \
|
||||
builtin_define ("__PNI__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE) \
|
||||
builtin_define ("__SSE_MATH__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE2) \
|
||||
@ -2516,6 +2524,22 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_MFENCE,
|
||||
IX86_BUILTIN_LFENCE,
|
||||
|
||||
/* Prescott New Instructions. */
|
||||
IX86_BUILTIN_ADDSUBPS,
|
||||
IX86_BUILTIN_HADDPS,
|
||||
IX86_BUILTIN_HSUBPS,
|
||||
IX86_BUILTIN_MOVSHDUP,
|
||||
IX86_BUILTIN_MOVSLDUP,
|
||||
IX86_BUILTIN_ADDSUBPD,
|
||||
IX86_BUILTIN_HADDPD,
|
||||
IX86_BUILTIN_HSUBPD,
|
||||
IX86_BUILTIN_LOADDDUP,
|
||||
IX86_BUILTIN_MOVDDUP,
|
||||
IX86_BUILTIN_LDDQU,
|
||||
|
||||
IX86_BUILTIN_MONITOR,
|
||||
IX86_BUILTIN_MWAIT,
|
||||
|
||||
IX86_BUILTIN_MAX
|
||||
};
|
||||
|
||||
|
@ -109,6 +109,13 @@
|
||||
(UNSPEC_MFENCE 59)
|
||||
(UNSPEC_LFENCE 60)
|
||||
(UNSPEC_PSADBW 61)
|
||||
(UNSPEC_ADDSUB 71)
|
||||
(UNSPEC_HADD 72)
|
||||
(UNSPEC_HSUB 73)
|
||||
(UNSPEC_MOVSHDUP 74)
|
||||
(UNSPEC_MOVSLDUP 75)
|
||||
(UNSPEC_LDQQU 76)
|
||||
(UNSPEC_MOVDDUP 77)
|
||||
|
||||
; x87 Floating point
|
||||
(UNSPEC_FPATAN 65)
|
||||
@ -130,6 +137,8 @@
|
||||
(UNSPECV_FEMMS 46)
|
||||
(UNSPECV_CLFLUSH 57)
|
||||
(UNSPECV_ALIGN 68)
|
||||
(UNSPECV_MONITOR 69)
|
||||
(UNSPECV_MWAIT 70)
|
||||
])
|
||||
|
||||
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
|
||||
@ -23512,3 +23521,129 @@
|
||||
"lfence"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "memory" "unknown")])
|
||||
|
||||
;; PNI
|
||||
|
||||
(define_insn "mwait"
|
||||
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
|
||||
(match_operand:SI 1 "register_operand" "c")]
|
||||
UNSPECV_MWAIT)]
|
||||
"TARGET_PNI"
|
||||
"mwait\t%0, %1"
|
||||
[(set_attr "length" "3")])
|
||||
|
||||
(define_insn "monitor"
|
||||
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
|
||||
(match_operand:SI 1 "register_operand" "c")
|
||||
(match_operand:SI 2 "register_operand" "d")]
|
||||
UNSPECV_MONITOR)]
|
||||
"TARGET_PNI"
|
||||
"monitor\t%0, %1, %2"
|
||||
[(set_attr "length" "3")])
|
||||
|
||||
;; PNI arithmetic
|
||||
|
||||
(define_insn "addsubv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_ADDSUB))]
|
||||
"TARGET_PNI"
|
||||
"addsubps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "addsubv2df3"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
|
||||
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_ADDSUB))]
|
||||
"TARGET_PNI"
|
||||
"addsubpd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V2DF")])
|
||||
|
||||
(define_insn "haddv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_HADD))]
|
||||
"TARGET_PNI"
|
||||
"haddps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "haddv2df3"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
|
||||
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_HADD))]
|
||||
"TARGET_PNI"
|
||||
"haddpd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V2DF")])
|
||||
|
||||
(define_insn "hsubv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_HSUB))]
|
||||
"TARGET_PNI"
|
||||
"hsubps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "hsubv2df3"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
|
||||
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_HSUB))]
|
||||
"TARGET_PNI"
|
||||
"hsubpd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V2DF")])
|
||||
|
||||
(define_insn "movshdup"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))]
|
||||
"TARGET_PNI"
|
||||
"movshdup\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "movsldup"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))]
|
||||
"TARGET_PNI"
|
||||
"movsldup\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "lddqu"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
|
||||
UNSPEC_LDQQU))]
|
||||
"TARGET_PNI"
|
||||
"lddqu\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "loadddup"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))]
|
||||
"TARGET_PNI"
|
||||
"movddup\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_insn "movddup"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(vec_duplicate:V2DF
|
||||
(vec_select:DF (match_operand:V2DF 1 "register_operand" "x")
|
||||
(parallel [(const_int 0)]))))]
|
||||
"TARGET_PNI"
|
||||
"movddup\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "DF")])
|
||||
|
132
gcc/config/i386/pmmintrin.h
Normal file
132
gcc/config/i386/pmmintrin.h
Normal file
@ -0,0 +1,132 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
||||
GNU CC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU CC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU CC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 8.0. */
|
||||
|
||||
#ifndef _PMMINTRIN_H_INCLUDED
|
||||
#define _PMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __PNI__
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
/* Additional bits in the MXCSR. */
|
||||
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||
#define _MM_DENORMALS_ZERO_OFF 0x0000
|
||||
|
||||
#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
|
||||
_mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
||||
#define _MM_GET_DENORMALS_ZERO_MODE() \
|
||||
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
|
||||
|
||||
static __inline __m128
|
||||
_mm_addsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128
|
||||
_mm_hadd_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128
|
||||
_mm_hsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128
|
||||
_mm_movehdup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
static __inline __m128
|
||||
_mm_moveldup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
static __inline __m128d
|
||||
_mm_addsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128d
|
||||
_mm_hadd_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128d
|
||||
_mm_hsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128d
|
||||
_mm_loaddup_pd (double const *__P)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_loadddup (__P);
|
||||
}
|
||||
|
||||
static __inline __m128d
|
||||
_mm_movedup_pd (__m128d __X)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_movddup ((__v2df)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i
|
||||
_mm_lddqu_si128 (__m128i const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static __inline void
|
||||
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_monitor (__P, __E, __H);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
_mm_mwait (unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_mwait (__E, __H);
|
||||
}
|
||||
#else
|
||||
#define _mm_monitor(P, E, H) __builtin_ia32_monitor ((P), (E), (H))
|
||||
#define _mm_mwait(E, H) __builtin_ia32_mwait ((E), (H))
|
||||
#endif
|
||||
|
||||
#endif /* __PNI__ */
|
||||
|
||||
#endif /* _PMMINTRIN_H_INCLUDED */
|
@ -5592,6 +5592,31 @@ Generates the @code{movhps} machine instruction as a store to memory.
|
||||
Generates the @code{movlps} machine instruction as a store to memory.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-mpni} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
|
||||
@example
|
||||
v2df __builtin_ia32_addsubpd (v2df, v2df)
|
||||
v2df __builtin_ia32_addsubps (v2df, v2df)
|
||||
v2df __builtin_ia32_haddpd (v2df, v2df)
|
||||
v2df __builtin_ia32_haddps (v2df, v2df)
|
||||
v2df __builtin_ia32_hsubpd (v2df, v2df)
|
||||
v2df __builtin_ia32_hsubps (v2df, v2df)
|
||||
v16qi __builtin_ia32_lddqu (char const *)
|
||||
void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
|
||||
v2df __builtin_ia32_movddup (v2df)
|
||||
v4sf __builtin_ia32_movshdup (v4sf)
|
||||
v4sf __builtin_ia32_movsldup (v4sf)
|
||||
void __builtin_ia32_mwait (unsigned int, unsigned int)
|
||||
@end example
|
||||
|
||||
The following built-in functions are available when @option{-mpni} is used.
|
||||
|
||||
@table @code
|
||||
@item v2df __builtin_ia32_loadddup (double const *)
|
||||
Generates the @code{movddup} machine instruction as a load from memory.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-m3dnow} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
|
||||
|
@ -487,7 +487,7 @@ in the following sections.
|
||||
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
|
||||
-mno-wide-multiply -mrtd -malign-double @gol
|
||||
-mpreferred-stack-boundary=@var{num} @gol
|
||||
-mmmx -msse -msse2 -m3dnow @gol
|
||||
-mmmx -msse -msse2 -mpni -m3dnow @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol
|
||||
@ -8217,6 +8217,13 @@ code that expects temporaries to be 80bit.
|
||||
|
||||
This is the default choice for x86-64 compiler.
|
||||
|
||||
@item pni
|
||||
Use all SSE extensions enabled by @option{-msse2} as well as the new
|
||||
SSE extensions in Prescott New Intrunctions. @option{-mpni} also
|
||||
enables 2 builtin functions, @code{__builtin_ia32_monitor} and
|
||||
@code{__builtin_ia32_mwait}, for new intrunctions @code{monitor} and
|
||||
@code{mwait}.
|
||||
|
||||
@item sse,387
|
||||
Attempt to utilize both instruction sets at once. This effectively double the
|
||||
amount of available registers and on chips with separate execution units for
|
||||
@ -8398,6 +8405,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mno-sse
|
||||
@item -msse2
|
||||
@itemx -mno-sse2
|
||||
@item -mpni
|
||||
@itemx -mno-pni
|
||||
@item -m3dnow
|
||||
@itemx -mno-3dnow
|
||||
@opindex mmmx
|
||||
|
Loading…
Reference in New Issue
Block a user