extend.texi: Document new builtin functions for Intel Prescott New Intrunctions.

2003-06-25  H.J. Lu <hongjiu.lu@intel.com>

	* doc/extend.texi: Document new builtin functions for Intel
	Prescott New Intrunctions.

	* doc/invoke.texi: Document new command-line options, -mpni and
	-mno-pni, for Intel Prescott New Intrunctions.

	* config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*.

	* config/i386/i386.c (override_options): Turn on MASK_SSE2
	for -mpni. Turn on MASK_SSE for -msse2.
	(bdesc_2arg): Add PNI builtins with 2 args.
	(bdesc_1arg): Add PNI builtins with 1 arg.
	(ix86_init_mmx_sse_builtins): Handle PNI builtins.
	(ix86_expand_builtin): Likewise.

	* config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A,
	MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT,
	MASK_TLS_DIRECT_SEG_REFS): Renumbered.
	(TARGET_PNI): New.
	(TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add
	-mpni and -mno-pni.
	(TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI.
	(ix86_builtins): Add PNI builtins.
	(config/i386/i386.md): Add PNI patterns.

	* config/i386/pmmintrin.h: New file.

From-SVN: r68502
This commit is contained in:
H.J. Lu 2003-06-25 22:28:27 +00:00 committed by H.J. Lu
parent 7f8b3eb0cc
commit 22c7c85ebc
7 changed files with 447 additions and 10 deletions

View File

@ -1,3 +1,32 @@
2003-06-25 H.J. Lu <hongjiu.lu@intel.com>
* doc/extend.texi: Document new builtin functions for Intel
Prescott New Intrunctions.
* doc/invoke.texi: Document new command-line options, -mpni and
-mno-pni, for Intel Prescott New Intrunctions.
* config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*.
* config/i386/i386.c (override_options): Turn on MASK_SSE2
for -mpni. Turn on MASK_SSE for -msse2.
(bdesc_2arg): Add PNI builtins with 2 args.
(bdesc_1arg): Add PNI builtins with 1 arg.
(ix86_init_mmx_sse_builtins): Handle PNI builtins.
(ix86_expand_builtin): Likewise.
* config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A,
MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT,
MASK_TLS_DIRECT_SEG_REFS): Renumbered.
(TARGET_PNI): New.
(TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add
-mpni and -mno-pni.
(TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI.
(ix86_builtins): Add PNI builtins.
(config/i386/i386.md): Add PNI patterns.
* config/i386/pmmintrin.h: New file.
2003-06-25 Kazu Hirata <kazu@cs.umass.edu>
* config/h8300/h8300.md (call): Fix the insn lengths.

View File

@ -1373,6 +1373,14 @@ override_options ()
if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
target_flags &= ~MASK_NO_FANCY_MATH_387;
/* Turn on SSE2 builtins for -mpni. */
if (TARGET_PNI)
target_flags |= MASK_SSE2;
/* Turn on SSE builtins for -msse2. */
if (TARGET_SSE2)
target_flags |= MASK_SSE;
if (TARGET_64BIT)
{
if (TARGET_ALIGN_DOUBLE)
@ -13058,7 +13066,15 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
{ MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
/* PNI MMX */
{ MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
{ MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
{ MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
{ MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
{ MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
{ MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
};
static const struct builtin_description bdesc_1arg[] =
@ -13104,7 +13120,12 @@ static const struct builtin_description bdesc_1arg[] =
{ MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
{ MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
/* PNI */
{ MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
{ MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
{ MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
};
void
@ -13195,6 +13216,13 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (void_type_node, void_list_node);
tree void_ftype_unsigned
= build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
tree void_ftype_unsigned_unsigned
= build_function_type_list (void_type_node, unsigned_type_node,
unsigned_type_node, NULL_TREE);
tree void_ftype_pcvoid_unsigned_unsigned
= build_function_type_list (void_type_node, const_ptr_type_node,
unsigned_type_node, unsigned_type_node,
NULL_TREE);
tree unsigned_ftype_void
= build_function_type (unsigned_type_node, void_list_node);
tree di_ftype_void
@ -13700,6 +13728,26 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
/* Prescott New Instructions. */
def_builtin (MASK_PNI, "__builtin_ia32_monitor",
void_ftype_pcvoid_unsigned_unsigned,
IX86_BUILTIN_MONITOR);
def_builtin (MASK_PNI, "__builtin_ia32_mwait",
void_ftype_unsigned_unsigned,
IX86_BUILTIN_MWAIT);
def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
v4sf_ftype_v4sf,
IX86_BUILTIN_MOVSHDUP);
def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
v4sf_ftype_v4sf,
IX86_BUILTIN_MOVSLDUP);
def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
def_builtin (MASK_PNI, "__builtin_ia32_movddup",
v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
@ -14509,6 +14557,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_STORED:
return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
case IX86_BUILTIN_MONITOR:
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
if (!REG_P (op0))
op0 = copy_to_mode_reg (SImode, op0);
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
emit_insn (gen_monitor (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
if (!REG_P (op0))
op0 = copy_to_mode_reg (SImode, op0);
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
emit_insn (gen_mwait (op0, op1));
return 0;
case IX86_BUILTIN_LOADDDUP:
return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
case IX86_BUILTIN_LDDQU:
return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
1);
default:
break;
}

View File

@ -121,12 +121,13 @@ extern int target_flags;
#define MASK_MMX 0x00002000 /* Support MMX regs/builtins */
#define MASK_SSE 0x00004000 /* Support SSE regs/builtins */
#define MASK_SSE2 0x00008000 /* Support SSE2 regs/builtins */
#define MASK_3DNOW 0x00010000 /* Support 3Dnow builtins */
#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */
#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */
#define MASK_64BIT 0x00080000 /* Produce 64bit code */
#define MASK_MS_BITFIELD_LAYOUT 0x00100000 /* Use native (MS) bitfield layout */
#define MASK_TLS_DIRECT_SEG_REFS 0x00200000 /* Avoid adding %gs:0 */
#define MASK_PNI 0x00010000 /* Support PNI regs/builtins */
#define MASK_3DNOW 0x00020000 /* Support 3Dnow builtins */
#define MASK_3DNOW_A 0x00040000 /* Support Athlon 3Dnow builtins */
#define MASK_128BIT_LONG_DOUBLE 0x00080000 /* long double size is 128bit */
#define MASK_64BIT 0x00100000 /* Produce 64bit code */
#define MASK_MS_BITFIELD_LAYOUT 0x00200000 /* Use native (MS) bitfield layout */
#define MASK_TLS_DIRECT_SEG_REFS 0x00400000 /* Avoid adding %gs:0 */
/* Unused: 0x03e0000 */
@ -302,6 +303,7 @@ extern int x86_prefetch_sse;
#define TARGET_SSE ((target_flags & MASK_SSE) != 0)
#define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
#define TARGET_PNI ((target_flags & MASK_PNI) != 0)
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
&& (ix86_fpmath & FPMATH_387))
@ -393,10 +395,14 @@ extern int x86_prefetch_sse;
N_("Support MMX and SSE built-in functions and code generation") }, \
{ "no-sse", -MASK_SSE, \
N_("Do not support MMX and SSE built-in functions and code generation") },\
{ "sse2", (MASK_SSE2|MASK_SSE), \
{ "sse2", MASK_SSE2, \
N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \
{ "no-sse2", -MASK_SSE2, \
N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") }, \
{ "pni", MASK_PNI, \
N_("Support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\
{ "no-pni", -MASK_PNI, \
N_("Do not support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\
{ "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \
N_("sizeof(long double) is 16") }, \
{ "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \
@ -611,6 +617,8 @@ extern int x86_prefetch_sse;
builtin_define ("__SSE__"); \
if (TARGET_SSE2) \
builtin_define ("__SSE2__"); \
if (TARGET_PNI) \
builtin_define ("__PNI__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \
builtin_define ("__SSE_MATH__"); \
if (TARGET_SSE_MATH && TARGET_SSE2) \
@ -2516,6 +2524,22 @@ enum ix86_builtins
IX86_BUILTIN_MFENCE,
IX86_BUILTIN_LFENCE,
/* Prescott New Instructions. */
IX86_BUILTIN_ADDSUBPS,
IX86_BUILTIN_HADDPS,
IX86_BUILTIN_HSUBPS,
IX86_BUILTIN_MOVSHDUP,
IX86_BUILTIN_MOVSLDUP,
IX86_BUILTIN_ADDSUBPD,
IX86_BUILTIN_HADDPD,
IX86_BUILTIN_HSUBPD,
IX86_BUILTIN_LOADDDUP,
IX86_BUILTIN_MOVDDUP,
IX86_BUILTIN_LDDQU,
IX86_BUILTIN_MONITOR,
IX86_BUILTIN_MWAIT,
IX86_BUILTIN_MAX
};

View File

@ -109,6 +109,13 @@
(UNSPEC_MFENCE 59)
(UNSPEC_LFENCE 60)
(UNSPEC_PSADBW 61)
(UNSPEC_ADDSUB 71)
(UNSPEC_HADD 72)
(UNSPEC_HSUB 73)
(UNSPEC_MOVSHDUP 74)
(UNSPEC_MOVSLDUP 75)
(UNSPEC_LDQQU 76)
(UNSPEC_MOVDDUP 77)
; x87 Floating point
(UNSPEC_FPATAN 65)
@ -130,6 +137,8 @@
(UNSPECV_FEMMS 46)
(UNSPECV_CLFLUSH 57)
(UNSPECV_ALIGN 68)
(UNSPECV_MONITOR 69)
(UNSPECV_MWAIT 70)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@ -23512,3 +23521,129 @@
"lfence"
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
;; PNI
(define_insn "mwait"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
(match_operand:SI 1 "register_operand" "c")]
UNSPECV_MWAIT)]
"TARGET_PNI"
"mwait\t%0, %1"
[(set_attr "length" "3")])
(define_insn "monitor"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
(match_operand:SI 1 "register_operand" "c")
(match_operand:SI 2 "register_operand" "d")]
UNSPECV_MONITOR)]
"TARGET_PNI"
"monitor\t%0, %1, %2"
[(set_attr "length" "3")])
;; PNI arithmetic
(define_insn "addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
UNSPEC_ADDSUB))]
"TARGET_PNI"
"addsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V4SF")])
(define_insn "addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
UNSPEC_ADDSUB))]
"TARGET_PNI"
"addsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
(define_insn "haddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
UNSPEC_HADD))]
"TARGET_PNI"
"haddps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V4SF")])
(define_insn "haddv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
UNSPEC_HADD))]
"TARGET_PNI"
"haddpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
(define_insn "hsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")]
UNSPEC_HSUB))]
"TARGET_PNI"
"hsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V4SF")])
(define_insn "hsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")]
UNSPEC_HSUB))]
"TARGET_PNI"
"hsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
(define_insn "movshdup"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))]
"TARGET_PNI"
"movshdup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
(define_insn "movsldup"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))]
"TARGET_PNI"
"movsldup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
(define_insn "lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
UNSPEC_LDQQU))]
"TARGET_PNI"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
(define_insn "loadddup"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))]
"TARGET_PNI"
"movddup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_insn "movddup"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_duplicate:V2DF
(vec_select:DF (match_operand:V2DF 1 "register_operand" "x")
(parallel [(const_int 0)]))))]
"TARGET_PNI"
"movddup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])

132
gcc/config/i386/pmmintrin.h Normal file
View File

@ -0,0 +1,132 @@
/* Copyright (C) 2003 Free Software Foundation, Inc.
This file is part of GNU CC.
GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* As a special exception, if you include this header file into source
files compiled by GCC, this header file does not by itself cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License. */
/* Implemented from the specification included in the Intel C++ Compiler
User Guide and Reference, version 8.0. */
#ifndef _PMMINTRIN_H_INCLUDED
#define _PMMINTRIN_H_INCLUDED
#ifdef __PNI__
#include <xmmintrin.h>
#include <emmintrin.h>
/* Additional bits in the MXCSR. */
#define _MM_DENORMALS_ZERO_MASK 0x0040
#define _MM_DENORMALS_ZERO_ON 0x0040
#define _MM_DENORMALS_ZERO_OFF 0x0000
#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
_mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
#define _MM_GET_DENORMALS_ZERO_MODE() \
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
static __inline __m128
_mm_addsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128
_mm_hadd_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128
_mm_hsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128
_mm_movehdup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
}
static __inline __m128
_mm_moveldup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
}
static __inline __m128d
_mm_addsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d
_mm_hadd_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d
_mm_hsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d
_mm_loaddup_pd (double const *__P)
{
return (__m128d) __builtin_ia32_loadddup (__P);
}
static __inline __m128d
_mm_movedup_pd (__m128d __X)
{
return (__m128d) __builtin_ia32_movddup ((__v2df)__X);
}
static __inline __m128i
_mm_lddqu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
}
#if 0
static __inline void
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
{
__builtin_ia32_monitor (__P, __E, __H);
}
static __inline void
_mm_mwait (unsigned int __E, unsigned int __H)
{
__builtin_ia32_mwait (__E, __H);
}
#else
#define _mm_monitor(P, E, H) __builtin_ia32_monitor ((P), (E), (H))
#define _mm_mwait(E, H) __builtin_ia32_mwait ((E), (H))
#endif
#endif /* __PNI__ */
#endif /* _PMMINTRIN_H_INCLUDED */

View File

@ -5592,6 +5592,31 @@ Generates the @code{movhps} machine instruction as a store to memory.
Generates the @code{movlps} machine instruction as a store to memory.
@end table
The following built-in functions are available when @option{-mpni} is used.
All of them generate the machine instruction that is part of the name.
@example
v2df __builtin_ia32_addsubpd (v2df, v2df)
v2df __builtin_ia32_addsubps (v2df, v2df)
v2df __builtin_ia32_haddpd (v2df, v2df)
v2df __builtin_ia32_haddps (v2df, v2df)
v2df __builtin_ia32_hsubpd (v2df, v2df)
v2df __builtin_ia32_hsubps (v2df, v2df)
v16qi __builtin_ia32_lddqu (char const *)
void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
v2df __builtin_ia32_movddup (v2df)
v4sf __builtin_ia32_movshdup (v4sf)
v4sf __builtin_ia32_movsldup (v4sf)
void __builtin_ia32_mwait (unsigned int, unsigned int)
@end example
The following built-in functions are available when @option{-mpni} is used.
@table @code
@item v2df __builtin_ia32_loadddup (double const *)
Generates the @code{movddup} machine instruction as a load from memory.
@end table
The following built-in functions are available when @option{-m3dnow} is used.
All of them generate the machine instruction that is part of the name.

View File

@ -487,7 +487,7 @@ in the following sections.
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
-mmmx -msse -msse2 -m3dnow @gol
-mmmx -msse -msse2 -mpni -m3dnow @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol
@ -8217,6 +8217,13 @@ code that expects temporaries to be 80bit.
This is the default choice for x86-64 compiler.
@item pni
Use all SSE extensions enabled by @option{-msse2} as well as the new
SSE extensions in Prescott New Intrunctions. @option{-mpni} also
enables 2 builtin functions, @code{__builtin_ia32_monitor} and
@code{__builtin_ia32_mwait}, for new intrunctions @code{monitor} and
@code{mwait}.
@item sse,387
Attempt to utilize both instruction sets at once. This effectively double the
amount of available registers and on chips with separate execution units for
@ -8398,6 +8405,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-sse
@item -msse2
@itemx -mno-sse2
@item -mpni
@itemx -mno-pni
@item -m3dnow
@itemx -mno-3dnow
@opindex mmmx