2002-05-01 04:58:18 +08:00
|
|
|
\# $Id$
|
|
|
|
\#
|
|
|
|
\# Source code to NASM documentation
|
|
|
|
\#
|
|
|
|
\IR{-D} \c{-D} option
|
|
|
|
\IR{-E} \c{-E} option
|
|
|
|
\IR{-I} \c{-I} option
|
|
|
|
\IR{-P} \c{-P} option
|
|
|
|
\IR{-U} \c{-U} option
|
|
|
|
\IR{-a} \c{-a} option
|
|
|
|
\IR{-d} \c{-d} option
|
|
|
|
\IR{-e} \c{-e} option
|
2002-05-01 04:52:49 +08:00
|
|
|
\IR{-f} \c{-f} option
|
|
|
|
\IR{-i} \c{-i} option
|
2002-05-01 04:58:18 +08:00
|
|
|
\IR{-l} \c{-l} option
|
|
|
|
\IR{-o} \c{-o} option
|
2002-05-01 04:52:49 +08:00
|
|
|
\IR{-p} \c{-p} option
|
2002-05-01 04:58:18 +08:00
|
|
|
\IR{-s} \c{-s} option
|
|
|
|
\IR{-u} \c{-u} option
|
2002-05-01 04:52:49 +08:00
|
|
|
\IR{-w} \c{-w} option
|
|
|
|
\IR{!=} \c{!=} operator
|
|
|
|
\IR{$ here} \c{$} Here token
|
|
|
|
\IR{$$} \c{$$} token
|
|
|
|
\IR{%} \c{%} operator
|
|
|
|
\IR{%%} \c{%%} operator
|
|
|
|
\IR{%+1} \c{%+1} and \c{%-1} syntax
|
|
|
|
\IA{%-1}{%+1}
|
|
|
|
\IR{%0} \c{%0} parameter count
|
|
|
|
\IR{&} \c{&} operator
|
|
|
|
\IR{&&} \c{&&} operator
|
|
|
|
\IR{*} \c{*} operator
|
|
|
|
\IR{..@} \c{..@} symbol prefix
|
|
|
|
\IR{/} \c{/} operator
|
|
|
|
\IR{//} \c{//} operator
|
|
|
|
\IR{<} \c{<} operator
|
|
|
|
\IR{<<} \c{<<} operator
|
|
|
|
\IR{<=} \c{<=} operator
|
|
|
|
\IR{<>} \c{<>} operator
|
|
|
|
\IR{=} \c{=} operator
|
|
|
|
\IR{==} \c{==} operator
|
|
|
|
\IR{>} \c{>} operator
|
|
|
|
\IR{>=} \c{>=} operator
|
|
|
|
\IR{>>} \c{>>} operator
|
|
|
|
\IR{?} \c{?} MASM syntax
|
|
|
|
\IR{^} \c{^} operator
|
|
|
|
\IR{^^} \c{^^} operator
|
|
|
|
\IR{|} \c{|} operator
|
|
|
|
\IR{||} \c{||} operator
|
|
|
|
\IR{~} \c{~} operator
|
|
|
|
\IR{%$} \c{%$} and \c{%$$} prefixes
|
|
|
|
\IA{%$$}{%$}
|
|
|
|
\IR{+ opaddition} \c{+} operator, binary
|
|
|
|
\IR{+ opunary} \c{+} operator, unary
|
|
|
|
\IR{+ modifier} \c{+} modifier
|
|
|
|
\IR{- opsubtraction} \c{-} operator, binary
|
|
|
|
\IR{- opunary} \c{-} operator, unary
|
|
|
|
\IR{alignment, in bin sections} alignment, in \c{bin} sections
|
|
|
|
\IR{alignment, in elf sections} alignment, in \c{elf} sections
|
|
|
|
\IR{alignment, in win32 sections} alignment, in \c{win32} sections
|
|
|
|
\IR{alignment, of elf common variables} alignment, of \c{elf} common
|
|
|
|
variables
|
|
|
|
\IR{alignment, in obj sections} alignment, in \c{obj} sections
|
|
|
|
\IR{a.out, bsd version} \c{a.out}, BSD version
|
|
|
|
\IR{a.out, linux version} \c{a.out}, Linux version
|
|
|
|
\IR{autoconf} Autoconf
|
|
|
|
\IR{bitwise and} bitwise AND
|
|
|
|
\IR{bitwise or} bitwise OR
|
|
|
|
\IR{bitwise xor} bitwise XOR
|
|
|
|
\IR{block ifs} block IFs
|
|
|
|
\IR{borland pascal} Borland, Pascal
|
|
|
|
\IR{borland's win32 compilers} Borland, Win32 compilers
|
|
|
|
\IR{braces, after % sign} braces, after \c{%} sign
|
|
|
|
\IR{bsd} BSD
|
|
|
|
\IR{c calling convention} C calling convention
|
|
|
|
\IR{c symbol names} C symbol names
|
|
|
|
\IA{critical expressions}{critical expression}
|
|
|
|
\IA{command line}{command-line}
|
|
|
|
\IA{case sensitivity}{case sensitive}
|
|
|
|
\IA{case-sensitive}{case sensitive}
|
|
|
|
\IA{case-insensitive}{case sensitive}
|
|
|
|
\IA{character constants}{character constant}
|
|
|
|
\IR{common object file format} Common Object File Format
|
|
|
|
\IR{common variables, alignment in elf} common variables, alignment
|
|
|
|
in \c{elf}
|
|
|
|
\IR{common, elf extensions to} \c{COMMON}, \c{elf} extensions to
|
|
|
|
\IR{common, obj extensions to} \c{COMMON}, \c{obj} extensions to
|
|
|
|
\IR{declaring structure} declaring structures
|
|
|
|
\IR{default-wrt mechanism} default-\c{WRT} mechanism
|
|
|
|
\IR{devpac} DevPac
|
|
|
|
\IR{djgpp} DJGPP
|
|
|
|
\IR{dll symbols, exporting} DLL symbols, exporting
|
|
|
|
\IR{dll symbols, importing} DLL symbols, importing
|
|
|
|
\IR{dos} DOS
|
|
|
|
\IR{dos archive} DOS archive
|
|
|
|
\IR{dos source archive} DOS source archive
|
|
|
|
\IA{effective address}{effective addresses}
|
|
|
|
\IA{effective-address}{effective addresses}
|
|
|
|
\IR{elf shared libraries} \c{elf} shared libraries
|
|
|
|
\IR{freebsd} FreeBSD
|
|
|
|
\IR{freelink} FreeLink
|
|
|
|
\IR{functions, c calling convention} functions, C calling convention
|
|
|
|
\IR{functions, pascal calling convention} functions, Pascal calling
|
|
|
|
convention
|
|
|
|
\IR{global, aoutb extensions to} \c{GLOBAL}, \c{aoutb} extensions to
|
|
|
|
\IR{global, elf extensions to} \c{GLOBAL}, \c{elf} extensions to
|
|
|
|
\IR{got} GOT
|
|
|
|
\IR{got relocations} \c{GOT} relocations
|
|
|
|
\IR{gotoff relocation} \c{GOTOFF} relocations
|
|
|
|
\IR{gotpc relocation} \c{GOTPC} relocations
|
|
|
|
\IR{linux elf} Linux ELF
|
|
|
|
\IR{logical and} logical AND
|
|
|
|
\IR{logical or} logical OR
|
|
|
|
\IR{logical xor} logical XOR
|
|
|
|
\IR{masm} MASM
|
|
|
|
\IA{memory reference}{memory references}
|
|
|
|
\IA{misc directory}{misc subdirectory}
|
|
|
|
\IR{misc subdirectory} \c{misc} subdirectory
|
|
|
|
\IR{microsoft omf} Microsoft OMF
|
|
|
|
\IR{mmx registers} MMX registers
|
|
|
|
\IA{modr/m}{modr/m byte}
|
|
|
|
\IR{modr/m byte} ModR/M byte
|
|
|
|
\IR{ms-dos} MS-DOS
|
|
|
|
\IR{ms-dos device drivers} MS-DOS device drivers
|
|
|
|
\IR{multipush} \c{multipush} macro
|
|
|
|
\IR{nasm version} NASM version
|
|
|
|
\IR{netbsd} NetBSD
|
|
|
|
\IR{omf} OMF
|
|
|
|
\IR{openbsd} OpenBSD
|
|
|
|
\IR{operating-system} operating system
|
|
|
|
\IR{os/2} OS/2
|
|
|
|
\IR{pascal calling convention}Pascal calling convention
|
|
|
|
\IR{passes} passes, assembly
|
|
|
|
\IR{perl} Perl
|
|
|
|
\IR{pic} PIC
|
|
|
|
\IR{pharlap} PharLap
|
|
|
|
\IR{plt} PLT
|
|
|
|
\IR{plt} \c{PLT} relocations
|
|
|
|
\IA{pre-defining macros}{pre-define}
|
|
|
|
\IR{qbasic} QBasic
|
|
|
|
\IA{rdoff subdirectory}{rdoff}
|
|
|
|
\IR{rdoff} \c{rdoff} subdirectory
|
|
|
|
\IR{relocatable dynamic object file format} Relocatable Dynamic
|
|
|
|
Object File Format
|
|
|
|
\IR{relocations, pic-specific} relocations, PIC-specific
|
|
|
|
\IA{repeating}{repeating code}
|
|
|
|
\IR{section alignment, in elf} section alignment, in \c{elf}
|
|
|
|
\IR{section alignment, in bin} section alignment, in \c{bin}
|
|
|
|
\IR{section alignment, in obj} section alignment, in \c{obj}
|
|
|
|
\IR{section alignment, in win32} section alignment, in \c{win32}
|
|
|
|
\IR{section, elf extensions to} \c{SECTION}, \c{elf} extensions to
|
|
|
|
\IR{section, win32 extensions to} \c{SECTION}, \c{win32} extensions to
|
|
|
|
\IR{segment alignment, in bin} segment alignment, in \c{bin}
|
|
|
|
\IR{segment alignment, in obj} segment alignment, in \c{obj}
|
|
|
|
\IR{segment, obj extensions to} \c{SEGMENT}, \c{elf} extensions to
|
|
|
|
\IR{segment names, borland pascal} segment names, Borland Pascal
|
|
|
|
\IR{shift commane} \c{shift} command
|
|
|
|
\IA{sib}{sib byte}
|
|
|
|
\IR{sib byte} SIB byte
|
|
|
|
\IA{standard section names}{standardised section names}
|
|
|
|
\IR{symbols, exporting from dlls} symbols, exporting from DLLs
|
|
|
|
\IR{symbols, importing from dlls} symbols, importing from DLLs
|
|
|
|
\IR{tasm} TASM
|
|
|
|
\IR{test subdirectory} \c{test} subdirectory
|
|
|
|
\IR{tlink} TLINK
|
|
|
|
\IR{underscore, in c symbols} underscore, in C symbols
|
|
|
|
\IR{unix} Unix
|
|
|
|
\IR{unix source archive} Unix source archive
|
|
|
|
\IR{val} VAL
|
|
|
|
\IR{version number of nasm} version number of NASM
|
|
|
|
\IR{visual c++} Visual C++
|
|
|
|
\IR{www page} WWW page
|
|
|
|
\IR{win32} Win32
|
|
|
|
\IR{windows} Windows
|
|
|
|
\IR{windows 95} Windows 95
|
|
|
|
\IR{windows nt} Windows NT
|
|
|
|
\# \IC{program entry point}{entry point, program}
|
|
|
|
\# \IC{program entry point}{start point, program}
|
|
|
|
\# \IC{MS-DOS device drivers}{device drivers, MS-DOS}
|
|
|
|
\# \IC{16-bit mode, versus 32-bit mode}{32-bit mode, versus 16-bit mode}
|
|
|
|
\# \IC{c symbol names}{symbol names, in C}
|
|
|
|
|
|
|
|
\C{intro} Introduction
|
|
|
|
|
|
|
|
\H{whatsnasm} What Is NASM?
|
|
|
|
|
|
|
|
The Netwide Assembler, NASM, is an 80x86 assembler designed for
|
|
|
|
portability and modularity. It supports a range of object file
|
|
|
|
formats, including Linux \c{a.out} and ELF, NetBSD/FreeBSD, COFF,
|
|
|
|
Microsoft 16-bit OBJ and Win32. It will also output plain binary
|
|
|
|
files. Its syntax is designed to be simple and easy to understand,
|
|
|
|
similar to Intel's but less complex. It supports Pentium, P6 and MMX
|
|
|
|
opcodes, and has macro capability.
|
|
|
|
|
|
|
|
\S{yaasm} Why Yet Another Assembler?
|
|
|
|
|
|
|
|
The Netwide Assembler grew out of an idea on \i\c{comp.lang.asm.x86}
|
|
|
|
(or possibly \i\c{alt.lang.asm} - I forget which), which was
|
|
|
|
essentially that there didn't seem to be a good free x86-series
|
|
|
|
assembler around, and that maybe someone ought to write one.
|
|
|
|
|
|
|
|
\b \i\c{a86} is good, but not free, and in particular you don't get any
|
|
|
|
32-bit capability until you pay. It's DOS only, too.
|
|
|
|
|
|
|
|
\b \i\c{gas} is free, and ports over DOS and Unix, but it's not very good,
|
|
|
|
since it's designed to be a back end to \i\c{gcc}, which always feeds
|
|
|
|
it correct code. So its error checking is minimal. Also, its syntax
|
|
|
|
is horrible, from the point of view of anyone trying to actually
|
|
|
|
\e{write} anything in it. Plus you can't write 16-bit code in it
|
|
|
|
(properly).
|
|
|
|
|
|
|
|
\b \i\c{as86} is Linux-specific, and (my version at least) doesn't seem to
|
|
|
|
have much (or any) documentation.
|
|
|
|
|
|
|
|
\b \i{MASM} isn't very good, and it's expensive, and it runs only under
|
|
|
|
DOS.
|
|
|
|
|
|
|
|
\b \i{TASM} is better, but still strives for \i{MASM} compatibility, which
|
|
|
|
means millions of directives and tons of red tape. And its syntax is
|
|
|
|
essentially \i{MASM}'s, with the contradictions and quirks that entails
|
|
|
|
(although it sorts out some of those by means of Ideal mode). It's
|
|
|
|
expensive too. And it's DOS-only.
|
|
|
|
|
|
|
|
So here, for your coding pleasure, is NASM. At present it's
|
|
|
|
still in prototype stage - we don't promise that it can outperform
|
|
|
|
any of these assemblers. But please, \e{please} send us bug reports,
|
|
|
|
fixes, helpful information, and anything else you can get your hands
|
|
|
|
on (and thanks to the many people who've done this already! You all
|
|
|
|
know who you are), and we'll improve it out of all recognition.
|
|
|
|
Again.
|
|
|
|
|
|
|
|
\S{legal} Licence Conditions
|
|
|
|
|
|
|
|
Please see the file \c{Licence}, supplied as part of any NASM
|
|
|
|
distribution archive, for the \i{licence} conditions under which you
|
|
|
|
may use NASM.
|
|
|
|
|
|
|
|
\H{contact} Contact Information
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
The current version of NASM (since 0.98) are maintained by H. Peter
|
|
|
|
Anvin, \W{mailto:hpa@zytor.com}\c{hpa@zytor.com}. If you want to report
|
|
|
|
a bug, please read \k{bugs} first.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
NASM has a \i{WWW page} at
|
2002-05-01 04:58:18 +08:00
|
|
|
\W{http://www.cryogen.com/Nasm}\c{http://www.cryogen.com/Nasm}.
|
|
|
|
|
|
|
|
The original authors are \i{e\-mail}able as
|
2002-05-01 04:52:49 +08:00
|
|
|
\W{mailto:jules@earthcorp.com}\c{jules@earthcorp.com} and
|
2002-05-01 04:58:18 +08:00
|
|
|
\W{mailto:anakin@pobox.com}\c{anakin@pobox.com}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\i{New releases} of NASM are uploaded to
|
2002-05-01 04:58:18 +08:00
|
|
|
\W{ftp://ftp.kernel.org/pub/software/devel/nasm/}\i\c{ftp.kernel.org},
|
2002-05-01 04:52:49 +08:00
|
|
|
\W{ftp://sunsite.unc.edu/pub/Linux/devel/lang/assemblers/}\i\c{sunsite.unc.edu},
|
|
|
|
\W{ftp://ftp.simtel.net/pub/simtelnet/msdos/asmutl/}\i\c{ftp.simtel.net}
|
|
|
|
and
|
|
|
|
\W{ftp://ftp.coast.net/coast/msdos/asmutil/}\i\c{ftp.coast.net}.
|
|
|
|
Announcements are posted to
|
|
|
|
\W{news:comp.lang.asm.x86}\i\c{comp.lang.asm.x86},
|
|
|
|
\W{news:alt.lang.asm}\i\c{alt.lang.asm},
|
|
|
|
\W{news:comp.os.linux.announce}\i\c{comp.os.linux.announce} and
|
|
|
|
\W{news:comp.archives.msdos.announce}\i\c{comp.archives.msdos.announce}
|
|
|
|
(the last one is done automagically by uploading to
|
|
|
|
\W{ftp://ftp.simtel.net/pub/simtelnet/msdos/asmutl/}\c{ftp.simtel.net}).
|
|
|
|
|
|
|
|
If you don't have Usenet access, or would rather be informed by
|
2002-05-01 04:58:18 +08:00
|
|
|
\i{e\-mail} when new releases come out, you can subscribe to the
|
|
|
|
\c{nasm-announce} email list by sending an email containing the line
|
|
|
|
\c{subscribe nasm-announce} to
|
|
|
|
\W{mailto:majordomo@linux.kernel.org}\c{majordomo@linux.kernel.org}.
|
|
|
|
|
|
|
|
If you want information about NASM beta releases, please subscribe to
|
|
|
|
the \c{nasm-beta} email list by sending an email containing the line
|
|
|
|
\c{subscribe nasm-beta} to
|
|
|
|
\W{mailto:majordomo@linux.kernel.org}\c{majordomo@linux.kernel.org}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\H{install} Installation
|
|
|
|
|
|
|
|
\S{instdos} \i{Installing} NASM under MS-\i{DOS} or Windows
|
|
|
|
|
|
|
|
Once you've obtained the \i{DOS archive} for NASM, \i\c{nasmXXX.zip}
|
|
|
|
(where \c{XXX} denotes the version number of NASM contained in the
|
|
|
|
archive), unpack it into its own directory (for example
|
|
|
|
\c{c:\\nasm}).
|
|
|
|
|
|
|
|
The archive will contain four executable files: the NASM executable
|
|
|
|
files \i\c{nasm.exe} and \i\c{nasmw.exe}, and the NDISASM executable
|
|
|
|
files \i\c{ndisasm.exe} and \i\c{ndisasmw.exe}. In each case, the
|
|
|
|
file whose name ends in \c{w} is a \i{Win32} executable, designed to
|
|
|
|
run under \i{Windows 95} or \i{Windows NT} Intel, and the other one
|
|
|
|
is a 16-bit \i{DOS} executable.
|
|
|
|
|
|
|
|
The only file NASM needs to run is its own executable, so copy
|
|
|
|
(at least) one of \c{nasm.exe} and \c{nasmw.exe} to a directory on
|
|
|
|
your PATH, or alternatively edit \i\c{autoexec.bat} to add the
|
|
|
|
\c{nasm} directory to your \i\c{PATH}. (If you're only installing the
|
|
|
|
Win32 version, you may wish to rename it to \c{nasm.exe}.)
|
|
|
|
|
|
|
|
That's it - NASM is installed. You don't need the \c{nasm} directory
|
|
|
|
to be present to run NASM (unless you've added it to your \c{PATH}),
|
|
|
|
so you can delete it if you need to save space; however, you may
|
|
|
|
want to keep the documentation or test programs.
|
|
|
|
|
|
|
|
If you've downloaded the \i{DOS source archive}, \i\c{nasmXXXs.zip},
|
|
|
|
the \c{nasm} directory will also contain the full NASM \i{source
|
|
|
|
code}, and a selection of \i{Makefiles} you can (hopefully) use to
|
2002-05-01 04:58:18 +08:00
|
|
|
rebuild your copy of NASM from scratch. The file \c{Readme} lists the
|
|
|
|
various Makefiles and which compilers they work with.
|
|
|
|
|
|
|
|
Note that the source files \c{insnsa.c}, \c{insnsd.c}, \c{insnsi.h}
|
|
|
|
and \c{insnsn.c} are automatically generated from the master
|
|
|
|
instruction table \c{insns.dat} by a Perl script; the file
|
|
|
|
\c{macros.c} is generated from \c{standard.mac} by another Perl
|
|
|
|
script. Although the NASM 0.98 distribution includes these generated
|
|
|
|
files, you will need to rebuild them (and hence, will need a Perl
|
|
|
|
interpreter) if you change \c{insns.dat}, \c{standard.mac} or the
|
|
|
|
documentation. It is possible future source distributions may not
|
|
|
|
include these files at all. Ports of \i{Perl} for a variety of
|
|
|
|
platforms, including DOS and Windows, are available from
|
|
|
|
\W{http://www.cpan.org/ports/}\i{www.cpan.org}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\S{instdos} Installing NASM under \i{Unix}
|
|
|
|
|
|
|
|
Once you've obtained the \i{Unix source archive} for NASM,
|
|
|
|
\i\c{nasm-X.XX.tar.gz} (where \c{X.XX} denotes the version number of
|
|
|
|
NASM contained in the archive), unpack it into a directory such
|
|
|
|
as \c{/usr/local/src}. The archive, when unpacked, will create its
|
|
|
|
own subdirectory \c{nasm-X.XX}.
|
|
|
|
|
|
|
|
NASM is an \I{Autoconf}\I\c{configure}auto-configuring package: once
|
|
|
|
you've unpacked it, \c{cd} to the directory it's been unpacked into
|
|
|
|
and type \c{./configure}. This shell script will find the best C
|
|
|
|
compiler to use for building NASM and set up \i{Makefiles}
|
|
|
|
accordingly.
|
|
|
|
|
|
|
|
Once NASM has auto-configured, you can type \i\c{make} to build the
|
|
|
|
\c{nasm} and \c{ndisasm} binaries, and then \c{make install} to
|
|
|
|
install them in \c{/usr/local/bin} and install the \i{man pages}
|
|
|
|
\i\c{nasm.1} and \i\c{ndisasm.1} in \c{/usr/local/man/man1}.
|
|
|
|
Alternatively, you can give options such as \c{--prefix} to the
|
|
|
|
\c{configure} script (see the file \i\c{INSTALL} for more details), or
|
|
|
|
install the programs yourself.
|
|
|
|
|
|
|
|
NASM also comes with a set of utilities for handling the RDOFF
|
|
|
|
custom object-file format, which are in the \i\c{rdoff} subdirectory
|
|
|
|
of the NASM archive. You can build these with \c{make rdf} and
|
|
|
|
install them with \c{make rdf_install}, if you want them.
|
|
|
|
|
|
|
|
If NASM fails to auto-configure, you may still be able to make it
|
|
|
|
compile by using the fall-back Unix makefile \i\c{Makefile.unx}.
|
|
|
|
Copy or rename that file to \c{Makefile} and try typing \c{make}.
|
|
|
|
There is also a \c{Makefile.unx} file in the \c{rdoff} subdirectory.
|
|
|
|
|
|
|
|
\C{running} Running NASM
|
|
|
|
|
|
|
|
\H{syntax} NASM \i{Command-Line} Syntax
|
|
|
|
|
|
|
|
To assemble a file, you issue a command of the form
|
|
|
|
|
|
|
|
\c nasm -f <format> <filename> [-o <output>]
|
|
|
|
|
|
|
|
For example,
|
|
|
|
|
|
|
|
\c nasm -f elf myfile.asm
|
|
|
|
|
|
|
|
will assemble \c{myfile.asm} into an ELF object file \c{myfile.o}. And
|
|
|
|
|
|
|
|
\c nasm -f bin myfile.asm -o myfile.com
|
|
|
|
|
|
|
|
will assemble \c{myfile.asm} into a raw binary file \c{myfile.com}.
|
|
|
|
|
|
|
|
To produce a listing file, with the hex codes output from NASM
|
|
|
|
displayed on the left of the original sources, use the \c{-l} option
|
|
|
|
to give a listing file name, for example:
|
|
|
|
|
|
|
|
\c nasm -f coff myfile.asm -l myfile.lst
|
|
|
|
|
|
|
|
To get further usage instructions from NASM, try typing
|
|
|
|
|
|
|
|
\c nasm -h
|
|
|
|
|
|
|
|
This will also list the available output file formats, and what they
|
|
|
|
are.
|
|
|
|
|
|
|
|
If you use Linux but aren't sure whether your system is \c{a.out} or
|
|
|
|
ELF, type
|
|
|
|
|
|
|
|
\c file nasm
|
|
|
|
|
|
|
|
(in the directory in which you put the NASM binary when you
|
|
|
|
installed it). If it says something like
|
|
|
|
|
|
|
|
\c nasm: ELF 32-bit LSB executable i386 (386 and up) Version 1
|
|
|
|
|
|
|
|
then your system is ELF, and you should use the option \c{-f elf}
|
|
|
|
when you want NASM to produce Linux object files. If it says
|
|
|
|
|
|
|
|
\c nasm: Linux/i386 demand-paged executable (QMAGIC)
|
|
|
|
|
|
|
|
or something similar, your system is \c{a.out}, and you should use
|
2002-05-01 04:58:18 +08:00
|
|
|
\c{-f aout} instead (Linux \c{a.out} systems are considered obsolete,
|
|
|
|
and are rare these days.)
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
Like Unix compilers and assemblers, NASM is silent unless it
|
|
|
|
goes wrong: you won't see any output at all, unless it gives error
|
|
|
|
messages.
|
|
|
|
|
|
|
|
\S{opt-o} The \i\c{-o} Option: Specifying the Output File Name
|
|
|
|
|
|
|
|
NASM will normally choose the name of your output file for you;
|
|
|
|
precisely how it does this is dependent on the object file format.
|
|
|
|
For Microsoft object file formats (\i\c{obj} and \i\c{win32}), it
|
|
|
|
will remove the \c{.asm} \i{extension} (or whatever extension you
|
|
|
|
like to use - NASM doesn't care) from your source file name and
|
|
|
|
substitute \c{.obj}. For Unix object file formats (\i\c{aout},
|
|
|
|
\i\c{coff}, \i\c{elf} and \i\c{as86}) it will substitute \c{.o}. For
|
|
|
|
\i\c{rdf}, it will use \c{.rdf}, and for the \i\c{bin} format it
|
|
|
|
will simply remove the extension, so that \c{myfile.asm} produces
|
|
|
|
the output file \c{myfile}.
|
|
|
|
|
|
|
|
If the output file already exists, NASM will overwrite it, unless it
|
|
|
|
has the same name as the input file, in which case it will give a
|
|
|
|
warning and use \i\c{nasm.out} as the output file name instead.
|
|
|
|
|
|
|
|
For situations in which this behaviour is unacceptable, NASM
|
|
|
|
provides the \c{-o} command-line option, which allows you to specify
|
|
|
|
your desired output file name. You invoke \c{-o} by following it
|
|
|
|
with the name you wish for the output file, either with or without
|
|
|
|
an intervening space. For example:
|
|
|
|
|
|
|
|
\c nasm -f bin program.asm -o program.com
|
|
|
|
\c nasm -f bin driver.asm -odriver.sys
|
|
|
|
|
|
|
|
\S{opt-f} The \i\c{-f} Option: Specifying the \i{Output File Format}
|
|
|
|
|
|
|
|
If you do not supply the \c{-f} option to NASM, it will choose an
|
|
|
|
output file format for you itself. In the distribution versions of
|
|
|
|
NASM, the default is always \i\c{bin}; if you've compiled your own
|
|
|
|
copy of NASM, you can redefine \i\c{OF_DEFAULT} at compile time and
|
|
|
|
choose what you want the default to be.
|
|
|
|
|
|
|
|
Like \c{-o}, the intervening space between \c{-f} and the output
|
|
|
|
file format is optional; so \c{-f elf} and \c{-felf} are both valid.
|
|
|
|
|
|
|
|
A complete list of the available output file formats can be given by
|
|
|
|
issuing the command \i\c{nasm -h}.
|
|
|
|
|
|
|
|
\S{opt-l} The \i\c{-l} Option: Generating a \i{Listing File}
|
|
|
|
|
|
|
|
If you supply the \c{-l} option to NASM, followed (with the usual
|
|
|
|
optional space) by a file name, NASM will generate a
|
|
|
|
\i{source-listing file} for you, in which addresses and generated
|
|
|
|
code are listed on the left, and the actual source code, with
|
|
|
|
expansions of multi-line macros (except those which specifically
|
|
|
|
request no expansion in source listings: see \k{nolist}) on the
|
|
|
|
right. For example:
|
|
|
|
|
|
|
|
\c nasm -f elf myfile.asm -l myfile.lst
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
\S{opt-E} The \i\c{-E} Option: Send Errors to a File
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
Under MS-\i{DOS} it can be difficult (though there are ways) to
|
|
|
|
redirect the standard-error output of a program to a file. Since
|
|
|
|
NASM usually produces its warning and \i{error messages} on
|
|
|
|
\i\c{stderr}, this can make it hard to capture the errors if (for
|
|
|
|
example) you want to load them into an editor.
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
NASM therefore provides the \c{-E} option, taking a filename argument
|
|
|
|
which causes errors to be sent to the specified files rather than
|
2002-05-01 04:52:49 +08:00
|
|
|
standard error. Therefore you can \I{redirecting errors}redirect
|
|
|
|
the errors into a file by typing
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
\c nasm -E myfile.err -f obj myfile.asm
|
|
|
|
|
|
|
|
\S{opt-s} The \i\c{-s} Option: Send Errors to \i\c{stdout}
|
|
|
|
|
|
|
|
The \c{-s} option redirects \i{error messages} to \c{stdout} rather
|
|
|
|
than \c{stderr}, so it can be redirected under MS-\i{DOS}. To
|
|
|
|
assemble the file \c{myfile.asm} and pipe its output to the \c{more}
|
|
|
|
program, you can type:
|
|
|
|
|
|
|
|
\c nasm -s -f obj myfile.asm | more
|
|
|
|
|
|
|
|
See also the \c{-E} option, \k{opt-E}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
\S{opt-i} The \i\c{-i}\I\c{-I} Option: Include File Search Directories
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
When NASM sees the \i\c{%include} directive in a source file (see
|
|
|
|
\k{include}), it will search for the given file not only in the
|
|
|
|
current directory, but also in any directories specified on the
|
|
|
|
command line by the use of the \c{-i} option. Therefore you can
|
|
|
|
include files from a \i{macro library}, for example, by typing
|
|
|
|
|
|
|
|
\c nasm -ic:\\macrolib\\ -f obj myfile.asm
|
|
|
|
|
|
|
|
(As usual, a space between \c{-i} and the path name is allowed, and
|
|
|
|
optional).
|
|
|
|
|
|
|
|
NASM, in the interests of complete source-code portability, does not
|
|
|
|
understand the file naming conventions of the OS it is running on;
|
|
|
|
the string you provide as an argument to the \c{-i} option will be
|
|
|
|
prepended exactly as written to the name of the include file.
|
|
|
|
Therefore the trailing backslash in the above example is necessary.
|
|
|
|
Under Unix, a trailing forward slash is similarly necessary.
|
|
|
|
|
|
|
|
(You can use this to your advantage, if you're really \i{perverse},
|
|
|
|
by noting that the option \c{-ifoo} will cause \c{%include "bar.i"}
|
|
|
|
to search for the file \c{foobar.i}...)
|
|
|
|
|
|
|
|
If you want to define a \e{standard} \i{include search path},
|
|
|
|
similar to \c{/usr/include} on Unix systems, you should place one or
|
|
|
|
more \c{-i} directives in the \c{NASM} environment variable (see
|
|
|
|
\k{nasmenv}).
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
For Makefile compatibility with many C compilers, this option can also
|
|
|
|
be specified as \c{-I}.
|
|
|
|
|
|
|
|
\S{opt-p} The \i\c{-p}\I\c{-P} Option: \I{pre-including files}Pre-Include a File
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\I\c{%include}NASM allows you to specify files to be
|
|
|
|
\e{pre-included} into your source file, by the use of the \c{-p}
|
|
|
|
option. So running
|
|
|
|
|
|
|
|
\c nasm myfile.asm -p myinc.inc
|
|
|
|
|
|
|
|
is equivalent to running \c{nasm myfile.asm} and placing the
|
|
|
|
directive \c{%include "myinc.inc"} at the start of the file.
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
For consistency with the \c{-I}, \c{-D} and \c{-U} options, this
|
|
|
|
option can also be specified as \c{-P}.
|
|
|
|
|
|
|
|
\S{opt-d} The \i\c{-d}\I\c{-D} Option: \I{pre-defining macros} Pre-Define a Macro
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\I\c{%define}Just as the \c{-p} option gives an alternative to placing
|
|
|
|
\c{%include} directives at the start of a source file, the \c{-d}
|
|
|
|
option gives an alternative to placing a \c{%define} directive. You
|
|
|
|
could code
|
|
|
|
|
|
|
|
\c nasm myfile.asm -dFOO=100
|
|
|
|
|
|
|
|
as an alternative to placing the directive
|
|
|
|
|
|
|
|
\c %define FOO 100
|
|
|
|
|
|
|
|
at the start of the file. You can miss off the macro value, as well:
|
|
|
|
the option \c{-dFOO} is equivalent to coding \c{%define FOO}. This
|
|
|
|
form of the directive may be useful for selecting \i{assembly-time
|
|
|
|
options} which are then tested using \c{%ifdef}, for example
|
|
|
|
\c{-dDEBUG}.
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
For Makefile compatibility with many C compilers, this option can also
|
|
|
|
be specified as \c{-D}.
|
|
|
|
|
|
|
|
\S{opt-u} The \i\c{-u}\I\c{-U} Option: \I{Undefining macros} Undefine a Macro
|
|
|
|
|
|
|
|
\I\c{%undef}The \c{-u} option undefines a macro that would otherwise
|
|
|
|
have been pre-defined, either automatically or by a \c{-p} or \c{-d}
|
|
|
|
option specified earlier on the command lines.
|
|
|
|
|
|
|
|
For example, the following command line:
|
|
|
|
|
|
|
|
\c nasm myfile.asm -dFOO=100 -uFOO
|
|
|
|
|
|
|
|
would result in \c{FOO} \e{not} being a predefined macro in the
|
|
|
|
program. This is useful to override options specified at a different
|
|
|
|
point in a Makefile.
|
|
|
|
|
|
|
|
For Makefile compatibility with many C compilers, this option can also
|
|
|
|
be specified as \c{-U}.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\S{opt-e} The \i\c{-e} Option: Preprocess Only
|
|
|
|
|
|
|
|
NASM allows the \i{preprocessor} to be run on its own, up to a
|
|
|
|
point. Using the \c{-e} option (which requires no arguments) will
|
|
|
|
cause NASM to preprocess its input file, expand all the macro
|
|
|
|
references, remove all the comments and preprocessor directives, and
|
|
|
|
print the resulting file on standard output (or save it to a file,
|
|
|
|
if the \c{-o} option is also used).
|
|
|
|
|
|
|
|
This option cannot be applied to programs which require the
|
|
|
|
preprocessor to evaluate \I{preprocessor expressions}\i{expressions}
|
|
|
|
which depend on the values of symbols: so code such as
|
|
|
|
|
|
|
|
\c %assign tablesize ($-tablestart)
|
|
|
|
|
|
|
|
will cause an error in \i{preprocess-only mode}.
|
|
|
|
|
|
|
|
\S{opt-a} The \i\c{-a} Option: Don't Preprocess At All
|
|
|
|
|
|
|
|
If NASM is being used as the back end to a compiler, it might be
|
|
|
|
desirable to \I{suppressing preprocessing}suppress preprocessing
|
|
|
|
completely and assume the compiler has already done it, to save time
|
|
|
|
and increase compilation speeds. The \c{-a} option, requiring no
|
|
|
|
argument, instructs NASM to replace its powerful \i{preprocessor}
|
|
|
|
with a \i{stub preprocessor} which does nothing.
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\S{opt-t} The \i\c{-t} option: Enable TASM Compatibility Mode
|
|
|
|
|
|
|
|
NASM includes a limited form of compatibility with Borland's TASM.
|
|
|
|
When NASM's -t option is used, the following changes are made:
|
|
|
|
|
|
|
|
\b local labels may be prefixed with \c{@@} instead of \c{.}
|
|
|
|
|
|
|
|
\b TASM-style response files beginning with \c{@} may be specified on
|
|
|
|
the command line. This is different from the \c{-@resp} style that NASM
|
|
|
|
natively supports.
|
|
|
|
|
|
|
|
\b size override is supported within brackets. In TASM compatible mode,
|
|
|
|
a size override inside square brackets changes the size of the operand,
|
|
|
|
and not the address type of the operand as it does in NASM syntax. E.g.
|
|
|
|
\c{mov eax,[DWORD val]} is valid syntax in TASM compatibility mode.
|
|
|
|
Note that you lose the ability to override the default address type for
|
|
|
|
the instruction.
|
|
|
|
|
|
|
|
\b \c{%arg} preprocessor directive is supported which is similar to
|
|
|
|
TASM's ARG directive.
|
|
|
|
|
|
|
|
\b \c{%local} preprocessor directive
|
|
|
|
|
|
|
|
\b \c{%stacksize} preprocessor directive
|
|
|
|
|
|
|
|
\b unprefixed forms of some directives supported (arg, elif, else,
|
|
|
|
endif, if, ifdef, ifdifi, ifndef, include, local)
|
|
|
|
|
|
|
|
\b more...
|
|
|
|
|
|
|
|
For more information on the directives, see the section on TASM
|
|
|
|
Compatiblity preprocessor directives in \k{tasmcompat}.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\S{opt-w} The \i\c{-w} Option: Enable or Disable Assembly \i{Warnings}
|
|
|
|
|
|
|
|
NASM can observe many conditions during the course of assembly which
|
|
|
|
are worth mentioning to the user, but not a sufficiently severe
|
|
|
|
error to justify NASM refusing to generate an output file. These
|
|
|
|
conditions are reported like errors, but come up with the word
|
|
|
|
`warning' before the message. Warnings do not prevent NASM from
|
|
|
|
generating an output file and returning a success status to the
|
|
|
|
operating system.
|
|
|
|
|
|
|
|
Some conditions are even less severe than that: they are only
|
|
|
|
sometimes worth mentioning to the user. Therefore NASM supports the
|
|
|
|
\c{-w} command-line option, which enables or disables certain
|
|
|
|
classes of assembly warning. Such warning classes are described by a
|
|
|
|
name, for example \c{orphan-labels}; you can enable warnings of
|
|
|
|
this class by the command-line option \c{-w+orphan-labels} and
|
|
|
|
disable it by \c{-w-orphan-labels}.
|
|
|
|
|
|
|
|
The \i{suppressible warning} classes are:
|
|
|
|
|
|
|
|
\b \i\c{macro-params} covers warnings about \i{multi-line macros}
|
|
|
|
being invoked with the wrong number of parameters. This warning
|
|
|
|
class is enabled by default; see \k{mlmacover} for an example of why
|
|
|
|
you might want to disable it.
|
|
|
|
|
|
|
|
\b \i\c{orphan-labels} covers warnings about source lines which
|
|
|
|
contain no instruction but define a label without a trailing colon.
|
|
|
|
NASM does not warn about this somewhat obscure condition by default;
|
|
|
|
see \k{syntax} for an example of why you might want it to.
|
|
|
|
|
|
|
|
\b \i\c{number-overflow} covers warnings about numeric constants which
|
|
|
|
don't fit in 32 bits (for example, it's easy to type one too many Fs
|
|
|
|
and produce \c{0x7ffffffff} by mistake). This warning class is
|
|
|
|
enabled by default.
|
|
|
|
|
|
|
|
\S{nasmenv} The \c{NASM} \i{Environment} Variable
|
|
|
|
|
|
|
|
If you define an environment variable called \c{NASM}, the program
|
|
|
|
will interpret it as a list of extra command-line options, which are
|
|
|
|
processed before the real command line. You can use this to define
|
|
|
|
standard search directories for include files, by putting \c{-i}
|
|
|
|
options in the \c{NASM} variable.
|
|
|
|
|
|
|
|
The value of the variable is split up at white space, so that the
|
|
|
|
value \c{-s -ic:\\nasmlib} will be treated as two separate options.
|
|
|
|
However, that means that the value \c{-dNAME="my name"} won't do
|
|
|
|
what you might want, because it will be split at the space and the
|
|
|
|
NASM command-line processing will get confused by the two
|
|
|
|
nonsensical words \c{-dNAME="my} and \c{name"}.
|
|
|
|
|
|
|
|
To get round this, NASM provides a feature whereby, if you begin the
|
|
|
|
\c{NASM} environment variable with some character that isn't a minus
|
|
|
|
sign, then NASM will treat this character as the \i{separator
|
|
|
|
character} for options. So setting the \c{NASM} variable to the
|
|
|
|
value \c{!-s!-ic:\\nasmlib} is equivalent to setting it to \c{-s
|
|
|
|
-ic:\\nasmlib}, but \c{!-dNAME="my name"} will work.
|
|
|
|
|
|
|
|
\H{qstart} \i{Quick Start} for \i{MASM} Users
|
|
|
|
|
|
|
|
If you're used to writing programs with MASM, or with \i{TASM} in
|
|
|
|
MASM-compatible (non-Ideal) mode, or with \i\c{a86}, this section
|
|
|
|
attempts to outline the major differences between MASM's syntax and
|
|
|
|
NASM's. If you're not already used to MASM, it's probably worth
|
|
|
|
skipping this section.
|
|
|
|
|
|
|
|
\S{qscs} NASM Is \I{case sensitivity}Case-Sensitive
|
|
|
|
|
|
|
|
One simple difference is that NASM is case-sensitive. It makes a
|
|
|
|
difference whether you call your label \c{foo}, \c{Foo} or \c{FOO}.
|
|
|
|
If you're assembling to DOS or OS/2 \c{.OBJ} files, you can invoke
|
|
|
|
the \i\c{UPPERCASE} directive (documented in \k{objfmt}) to ensure
|
|
|
|
that all symbols exported to other code modules are forced to be
|
|
|
|
upper case; but even then, \e{within} a single module, NASM will
|
|
|
|
distinguish between labels differing only in case.
|
|
|
|
|
|
|
|
\S{qsbrackets} NASM Requires \i{Square Brackets} For \i{Memory References}
|
|
|
|
|
|
|
|
NASM was designed with simplicity of syntax in mind. One of the
|
|
|
|
\i{design goals} of NASM is that it should be possible, as far as is
|
|
|
|
practical, for the user to look at a single line of NASM code
|
|
|
|
and tell what opcode is generated by it. You can't do this in MASM:
|
|
|
|
if you declare, for example,
|
|
|
|
|
|
|
|
\c foo equ 1
|
|
|
|
\c bar dw 2
|
|
|
|
|
|
|
|
then the two lines of code
|
|
|
|
|
|
|
|
\c mov ax,foo
|
|
|
|
\c mov ax,bar
|
|
|
|
|
|
|
|
generate completely different opcodes, despite having
|
|
|
|
identical-looking syntaxes.
|
|
|
|
|
|
|
|
NASM avoids this undesirable situation by having a much simpler
|
|
|
|
syntax for memory references. The rule is simply that any access to
|
|
|
|
the \e{contents} of a memory location requires square brackets
|
|
|
|
around the address, and any access to the \e{address} of a variable
|
|
|
|
doesn't. So an instruction of the form \c{mov ax,foo} will
|
|
|
|
\e{always} refer to a compile-time constant, whether it's an \c{EQU}
|
|
|
|
or the address of a variable; and to access the \e{contents} of the
|
|
|
|
variable \c{bar}, you must code \c{mov ax,[bar]}.
|
|
|
|
|
|
|
|
This also means that NASM has no need for MASM's \i\c{OFFSET}
|
|
|
|
keyword, since the MASM code \c{mov ax,offset bar} means exactly the
|
|
|
|
same thing as NASM's \c{mov ax,bar}. If you're trying to get
|
|
|
|
large amounts of MASM code to assemble sensibly under NASM, you
|
|
|
|
can always code \c{%idefine offset} to make the preprocessor treat
|
|
|
|
the \c{OFFSET} keyword as a no-op.
|
|
|
|
|
|
|
|
This issue is even more confusing in \i\c{a86}, where declaring a
|
|
|
|
label with a trailing colon defines it to be a `label' as opposed to
|
|
|
|
a `variable' and causes \c{a86} to adopt NASM-style semantics; so in
|
|
|
|
\c{a86}, \c{mov ax,var} has different behaviour depending on whether
|
|
|
|
\c{var} was declared as \c{var: dw 0} (a label) or \c{var dw 0} (a
|
|
|
|
word-size variable). NASM is very simple by comparison:
|
|
|
|
\e{everything} is a label.
|
|
|
|
|
|
|
|
NASM, in the interests of simplicity, also does not support the
|
|
|
|
\i{hybrid syntaxes} supported by MASM and its clones, such as
|
|
|
|
\c{mov ax,table[bx]}, where a memory reference is denoted by one
|
|
|
|
portion outside square brackets and another portion inside. The
|
|
|
|
correct syntax for the above is \c{mov ax,[table+bx]}. Likewise,
|
|
|
|
\c{mov ax,es:[di]} is wrong and \c{mov ax,[es:di]} is right.
|
|
|
|
|
|
|
|
\S{qstypes} NASM Doesn't Store \i{Variable Types}
|
|
|
|
|
|
|
|
NASM, by design, chooses not to remember the types of variables you
|
|
|
|
declare. Whereas MASM will remember, on seeing \c{var dw 0}, that
|
|
|
|
you declared \c{var} as a word-size variable, and will then be able
|
|
|
|
to fill in the \i{ambiguity} in the size of the instruction \c{mov
|
|
|
|
var,2}, NASM will deliberately remember nothing about the symbol
|
|
|
|
\c{var} except where it begins, and so you must explicitly code
|
|
|
|
\c{mov word [var],2}.
|
|
|
|
|
|
|
|
For this reason, NASM doesn't support the \c{LODS}, \c{MOVS},
|
|
|
|
\c{STOS}, \c{SCAS}, \c{CMPS}, \c{INS}, or \c{OUTS} instructions,
|
|
|
|
but only supports the forms such as \c{LODSB}, \c{MOVSW}, and
|
|
|
|
\c{SCASD}, which explicitly specify the size of the components of
|
|
|
|
the strings being manipulated.
|
|
|
|
|
|
|
|
\S{qsassume} NASM Doesn't \i\c{ASSUME}
|
|
|
|
|
|
|
|
As part of NASM's drive for simplicity, it also does not support the
|
|
|
|
\c{ASSUME} directive. NASM will not keep track of what values you
|
|
|
|
choose to put in your segment registers, and will never
|
|
|
|
\e{automatically} generate a \i{segment override} prefix.
|
|
|
|
|
|
|
|
\S{qsmodel} NASM Doesn't Support \i{Memory Models}
|
|
|
|
|
|
|
|
NASM also does not have any directives to support different 16-bit
|
|
|
|
memory models. The programmer has to keep track of which functions
|
|
|
|
are supposed to be called with a \i{far call} and which with a
|
|
|
|
\i{near call}, and is responsible for putting the correct form of
|
|
|
|
\c{RET} instruction (\c{RETN} or \c{RETF}; NASM accepts \c{RET}
|
|
|
|
itself as an alternate form for \c{RETN}); in addition, the
|
|
|
|
programmer is responsible for coding CALL FAR instructions where
|
|
|
|
necessary when calling \e{external} functions, and must also keep
|
|
|
|
track of which external variable definitions are far and which are
|
|
|
|
near.
|
|
|
|
|
|
|
|
\S{qsfpu} \i{Floating-Point} Differences
|
|
|
|
|
|
|
|
NASM uses different names to refer to floating-point registers from
|
|
|
|
MASM: where MASM would call them \c{ST(0)}, \c{ST(1)} and so on, and
|
|
|
|
\i\c{a86} would call them simply \c{0}, \c{1} and so on, NASM
|
|
|
|
chooses to call them \c{st0}, \c{st1} etc.
|
|
|
|
|
|
|
|
As of version 0.96, NASM now treats the instructions with
|
|
|
|
\i{`nowait'} forms in the same way as MASM-compatible assemblers.
|
|
|
|
The idiosyncratic treatment employed by 0.95 and earlier was based
|
|
|
|
on a misunderstanding by the authors.
|
|
|
|
|
|
|
|
\S{qsother} Other Differences
|
|
|
|
|
|
|
|
For historical reasons, NASM uses the keyword \i\c{TWORD} where MASM
|
|
|
|
and compatible assemblers use \i\c{TBYTE}.
|
|
|
|
|
|
|
|
NASM does not declare \i{uninitialised storage} in the same way as
|
|
|
|
MASM: where a MASM programmer might use \c{stack db 64 dup (?)},
|
|
|
|
NASM requires \c{stack resb 64}, intended to be read as `reserve 64
|
|
|
|
bytes'. For a limited amount of compatibility, since NASM treats
|
|
|
|
\c{?} as a valid character in symbol names, you can code \c{? equ 0}
|
|
|
|
and then writing \c{dw ?} will at least do something vaguely useful.
|
|
|
|
\I\c{RESB}\i\c{DUP} is still not a supported syntax, however.
|
|
|
|
|
|
|
|
In addition to all of this, macros and directives work completely
|
|
|
|
differently to MASM. See \k{preproc} and \k{directive} for further
|
|
|
|
details.
|
|
|
|
|
|
|
|
\C{lang} The NASM Language
|
|
|
|
|
|
|
|
\H{syntax} Layout of a NASM Source Line
|
|
|
|
|
|
|
|
Like most assemblers, each NASM source line contains (unless it
|
|
|
|
is a macro, a preprocessor directive or an assembler directive: see
|
|
|
|
\k{preproc} and \k{directive}) some combination of the four fields
|
|
|
|
|
|
|
|
\c label: instruction operands ; comment
|
|
|
|
|
|
|
|
As usual, most of these fields are optional; the presence or absence
|
|
|
|
of any combination of a label, an instruction and a comment is allowed.
|
|
|
|
Of course, the operand field is either required or forbidden by the
|
|
|
|
presence and nature of the instruction field.
|
|
|
|
|
|
|
|
NASM places no restrictions on white space within a line: labels may
|
|
|
|
have white space before them, or instructions may have no space
|
|
|
|
before them, or anything. The \i{colon} after a label is also
|
|
|
|
optional. (Note that this means that if you intend to code \c{lodsb}
|
|
|
|
alone on a line, and type \c{lodab} by accident, then that's still a
|
|
|
|
valid source line which does nothing but define a label. Running
|
|
|
|
NASM with the command-line option
|
|
|
|
\I{orphan-labels}\c{-w+orphan-labels} will cause it to warn you if
|
|
|
|
you define a label alone on a line without a \i{trailing colon}.)
|
|
|
|
|
|
|
|
\i{Valid characters} in labels are letters, numbers, \c{_}, \c{$},
|
|
|
|
\c{#}, \c{@}, \c{~}, \c{.}, and \c{?}. The only characters which may
|
|
|
|
be used as the \e{first} character of an identifier are letters,
|
|
|
|
\c{.} (with special meaning: see \k{locallab}), \c{_} and \c{?}.
|
|
|
|
An identifier may also be prefixed with a \I{$prefix}\c{$} to
|
|
|
|
indicate that it is intended to be read as an identifier and not a
|
|
|
|
reserved word; thus, if some other module you are linking with
|
|
|
|
defines a symbol called \c{eax}, you can refer to \c{$eax} in NASM
|
|
|
|
code to distinguish the symbol from the register.
|
|
|
|
|
|
|
|
The instruction field may contain any machine instruction: Pentium
|
|
|
|
and P6 instructions, FPU instructions, MMX instructions and even
|
|
|
|
undocumented instructions are all supported. The instruction may be
|
|
|
|
prefixed by \c{LOCK}, \c{REP}, \c{REPE}/\c{REPZ} or
|
|
|
|
\c{REPNE}/\c{REPNZ}, in the usual way. Explicit \I{address-size
|
|
|
|
prefixes}address-size and \i{operand-size prefixes} \c{A16},
|
|
|
|
\c{A32}, \c{O16} and \c{O32} are provided - one example of their use
|
|
|
|
is given in \k{mixsize}. You can also use the name of a \I{segment
|
|
|
|
override}segment register as an instruction prefix: coding
|
|
|
|
\c{es mov [bx],ax} is equivalent to coding \c{mov [es:bx],ax}. We
|
|
|
|
recommend the latter syntax, since it is consistent with other
|
|
|
|
syntactic features of the language, but for instructions such as
|
|
|
|
\c{LODSB}, which has no operands and yet can require a segment
|
|
|
|
override, there is no clean syntactic way to proceed apart from
|
|
|
|
\c{es lodsb}.
|
|
|
|
|
|
|
|
An instruction is not required to use a prefix: prefixes such as
|
|
|
|
\c{CS}, \c{A32}, \c{LOCK} or \c{REPE} can appear on a line by
|
|
|
|
themselves, and NASM will just generate the prefix bytes.
|
|
|
|
|
|
|
|
In addition to actual machine instructions, NASM also supports a
|
|
|
|
number of pseudo-instructions, described in \k{pseudop}.
|
|
|
|
|
|
|
|
Instruction \i{operands} may take a number of forms: they can be
|
|
|
|
registers, described simply by the register name (e.g. \c{ax},
|
|
|
|
\c{bp}, \c{ebx}, \c{cr0}: NASM does not use the \c{gas}-style
|
|
|
|
syntax in which register names must be prefixed by a \c{%} sign), or
|
|
|
|
they can be \i{effective addresses} (see \k{effaddr}), constants
|
|
|
|
(\k{const}) or expressions (\k{expr}).
|
|
|
|
|
|
|
|
For \i{floating-point} instructions, NASM accepts a wide range of
|
|
|
|
syntaxes: you can use two-operand forms like MASM supports, or you
|
|
|
|
can use NASM's native single-operand forms in most cases. Details of
|
|
|
|
all forms of each supported instruction are given in
|
|
|
|
\k{iref}. For example, you can code:
|
|
|
|
|
|
|
|
\c fadd st1 ; this sets st0 := st0 + st1
|
|
|
|
\c fadd st0,st1 ; so does this
|
|
|
|
\c
|
|
|
|
\c fadd st1,st0 ; this sets st1 := st1 + st0
|
|
|
|
\c fadd to st1 ; so does this
|
|
|
|
|
|
|
|
Almost any floating-point instruction that references memory must
|
|
|
|
use one of the prefixes \i\c{DWORD}, \i\c{QWORD} or \i\c{TWORD} to
|
|
|
|
indicate what size of \i{memory operand} it refers to.
|
|
|
|
|
|
|
|
\H{pseudop} \i{Pseudo-Instructions}
|
|
|
|
|
|
|
|
Pseudo-instructions are things which, though not real x86 machine
|
|
|
|
instructions, are used in the instruction field anyway because
|
|
|
|
that's the most convenient place to put them. The current
|
|
|
|
pseudo-instructions are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and
|
|
|
|
\i\c{DT}, their \i{uninitialised} counterparts \i\c{RESB},
|
|
|
|
\i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST}, the \i\c{INCBIN}
|
|
|
|
command, the \i\c{EQU} command, and the \i\c{TIMES} prefix.
|
|
|
|
|
|
|
|
\S{db} \c{DB} and friends: Declaring Initialised Data
|
|
|
|
|
|
|
|
\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and \i\c{DT} are used, much
|
|
|
|
as in MASM, to declare initialised data in the output file. They can
|
|
|
|
be invoked in a wide range of ways:
|
|
|
|
\I{floating-point}\I{character constant}\I{string constant}
|
|
|
|
|
|
|
|
\c db 0x55 ; just the byte 0x55
|
|
|
|
\c db 0x55,0x56,0x57 ; three bytes in succession
|
|
|
|
\c db 'a',0x55 ; character constants are OK
|
|
|
|
\c db 'hello',13,10,'$' ; so are string constants
|
|
|
|
\c dw 0x1234 ; 0x34 0x12
|
|
|
|
\c dw 'a' ; 0x41 0x00 (it's just a number)
|
|
|
|
\c dw 'ab' ; 0x41 0x42 (character constant)
|
|
|
|
\c dw 'abc' ; 0x41 0x42 0x43 0x00 (string)
|
|
|
|
\c dd 0x12345678 ; 0x78 0x56 0x34 0x12
|
|
|
|
\c dd 1.234567e20 ; floating-point constant
|
|
|
|
\c dq 1.234567e20 ; double-precision float
|
|
|
|
\c dt 1.234567e20 ; extended-precision float
|
|
|
|
|
|
|
|
\c{DQ} and \c{DT} do not accept \i{numeric constants} or string
|
|
|
|
constants as operands.
|
|
|
|
|
|
|
|
\S{resb} \c{RESB} and friends: Declaring \i{Uninitialised} Data
|
|
|
|
|
|
|
|
\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST} are
|
|
|
|
designed to be used in the BSS section of a module: they declare
|
|
|
|
\e{uninitialised} storage space. Each takes a single operand, which
|
|
|
|
is the number of bytes, words, doublewords or whatever to reserve.
|
|
|
|
As stated in \k{qsother}, NASM does not support the MASM/TASM syntax
|
|
|
|
of reserving uninitialised space by writing \I\c{?}\c{DW ?} or
|
|
|
|
similar things: this is what it does instead. The operand to a
|
|
|
|
\c{RESB}-type pseudo-instruction is a \i\e{critical expression}: see
|
|
|
|
\k{crit}.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
|
|
|
|
\c buffer: resb 64 ; reserve 64 bytes
|
|
|
|
\c wordvar: resw 1 ; reserve a word
|
|
|
|
\c realarray resq 10 ; array of ten reals
|
|
|
|
|
|
|
|
\S{incbin} \i\c{INCBIN}: Including External \i{Binary Files}
|
|
|
|
|
|
|
|
\c{INCBIN} is borrowed from the old Amiga assembler \i{DevPac}: it
|
|
|
|
includes a binary file verbatim into the output file. This can be
|
|
|
|
handy for (for example) including \i{graphics} and \i{sound} data
|
|
|
|
directly into a game executable file. It can be called in one of
|
|
|
|
these three ways:
|
|
|
|
|
|
|
|
\c incbin "file.dat" ; include the whole file
|
|
|
|
\c incbin "file.dat",1024 ; skip the first 1024 bytes
|
|
|
|
\c incbin "file.dat",1024,512 ; skip the first 1024, and
|
|
|
|
\c ; actually include at most 512
|
|
|
|
|
|
|
|
\S{equ} \i\c{EQU}: Defining Constants
|
|
|
|
|
|
|
|
\c{EQU} defines a symbol to a given constant value: when \c{EQU} is
|
|
|
|
used, the source line must contain a label. The action of \c{EQU} is
|
|
|
|
to define the given label name to the value of its (only) operand.
|
|
|
|
This definition is absolute, and cannot change later. So, for
|
|
|
|
example,
|
|
|
|
|
|
|
|
\c message db 'hello, world'
|
|
|
|
\c msglen equ $-message
|
|
|
|
|
|
|
|
defines \c{msglen} to be the constant 12. \c{msglen} may not then be
|
|
|
|
redefined later. This is not a \i{preprocessor} definition either:
|
|
|
|
the value of \c{msglen} is evaluated \e{once}, using the value of
|
|
|
|
\c{$} (see \k{expr} for an explanation of \c{$}) at the point of
|
|
|
|
definition, rather than being evaluated wherever it is referenced
|
|
|
|
and using the value of \c{$} at the point of reference. Note that
|
|
|
|
the operand to an \c{EQU} is also a \i{critical expression}
|
|
|
|
(\k{crit}).
|
|
|
|
|
|
|
|
\S{times} \i\c{TIMES}: \i{Repeating} Instructions or Data
|
|
|
|
|
|
|
|
The \c{TIMES} prefix causes the instruction to be assembled multiple
|
|
|
|
times. This is partly present as NASM's equivalent of the \i\c{DUP}
|
|
|
|
syntax supported by \i{MASM}-compatible assemblers, in that you can
|
|
|
|
code
|
|
|
|
|
|
|
|
\c zerobuf: times 64 db 0
|
|
|
|
|
|
|
|
or similar things; but \c{TIMES} is more versatile than that. The
|
|
|
|
argument to \c{TIMES} is not just a numeric constant, but a numeric
|
|
|
|
\e{expression}, so you can do things like
|
|
|
|
|
|
|
|
\c buffer: db 'hello, world'
|
|
|
|
\c times 64-$+buffer db ' '
|
|
|
|
|
|
|
|
which will store exactly enough spaces to make the total length of
|
|
|
|
\c{buffer} up to 64. Finally, \c{TIMES} can be applied to ordinary
|
|
|
|
instructions, so you can code trivial \i{unrolled loops} in it:
|
|
|
|
|
|
|
|
\c times 100 movsb
|
|
|
|
|
|
|
|
Note that there is no effective difference between \c{times 100 resb
|
|
|
|
1} and \c{resb 100}, except that the latter will be assembled about
|
|
|
|
100 times faster due to the internal structure of the assembler.
|
|
|
|
|
|
|
|
The operand to \c{TIMES}, like that of \c{EQU} and those of \c{RESB}
|
|
|
|
and friends, is a critical expression (\k{crit}).
|
|
|
|
|
|
|
|
Note also that \c{TIMES} can't be applied to \i{macros}: the reason
|
|
|
|
for this is that \c{TIMES} is processed after the macro phase, which
|
|
|
|
allows the argument to \c{TIMES} to contain expressions such as
|
|
|
|
\c{64-$+buffer} as above. To repeat more than one line of code, or a
|
|
|
|
complex macro, use the preprocessor \i\c{%rep} directive.
|
|
|
|
|
|
|
|
\H{effaddr} Effective Addresses
|
|
|
|
|
|
|
|
An \i{effective address} is any operand to an instruction which
|
|
|
|
\I{memory reference}references memory. Effective addresses, in NASM,
|
|
|
|
have a very simple syntax: they consist of an expression evaluating
|
|
|
|
to the desired address, enclosed in \i{square brackets}. For
|
|
|
|
example:
|
|
|
|
|
|
|
|
\c wordvar dw 123
|
|
|
|
\c mov ax,[wordvar]
|
|
|
|
\c mov ax,[wordvar+1]
|
|
|
|
\c mov ax,[es:wordvar+bx]
|
|
|
|
|
|
|
|
Anything not conforming to this simple system is not a valid memory
|
|
|
|
reference in NASM, for example \c{es:wordvar[bx]}.
|
|
|
|
|
|
|
|
More complicated effective addresses, such as those involving more
|
|
|
|
than one register, work in exactly the same way:
|
|
|
|
|
|
|
|
\c mov eax,[ebx*2+ecx+offset]
|
|
|
|
\c mov ax,[bp+di+8]
|
|
|
|
|
|
|
|
NASM is capable of doing \i{algebra} on these effective addresses,
|
|
|
|
so that things which don't necessarily \e{look} legal are perfectly
|
|
|
|
all right:
|
|
|
|
|
|
|
|
\c mov eax,[ebx*5] ; assembles as [ebx*4+ebx]
|
|
|
|
\c mov eax,[label1*2-label2] ; ie [label1+(label1-label2)]
|
|
|
|
|
|
|
|
Some forms of effective address have more than one assembled form;
|
|
|
|
in most such cases NASM will generate the smallest form it can. For
|
|
|
|
example, there are distinct assembled forms for the 32-bit effective
|
|
|
|
addresses \c{[eax*2+0]} and \c{[eax+eax]}, and NASM will generally
|
|
|
|
generate the latter on the grounds that the former requires four
|
|
|
|
bytes to store a zero offset.
|
|
|
|
|
|
|
|
NASM has a hinting mechanism which will cause \c{[eax+ebx]} and
|
|
|
|
\c{[ebx+eax]} to generate different opcodes; this is occasionally
|
|
|
|
useful because \c{[esi+ebp]} and \c{[ebp+esi]} have different
|
|
|
|
default segment registers.
|
|
|
|
|
|
|
|
However, you can force NASM to generate an effective address in a
|
|
|
|
particular form by the use of the keywords \c{BYTE}, \c{WORD},
|
|
|
|
\c{DWORD} and \c{NOSPLIT}. If you need \c{[eax+3]} to be assembled
|
|
|
|
using a double-word offset field instead of the one byte NASM will
|
|
|
|
normally generate, you can code \c{[dword eax+3]}. Similarly, you
|
|
|
|
can force NASM to use a byte offset for a small value which it
|
|
|
|
hasn't seen on the first pass (see \k{crit} for an example of such a
|
|
|
|
code fragment) by using \c{[byte eax+offset]}. As special cases,
|
|
|
|
\c{[byte eax]} will code \c{[eax+0]} with a byte offset of zero, and
|
|
|
|
\c{[dword eax]} will code it with a double-word offset of zero. The
|
|
|
|
normal form, \c{[eax]}, will be coded with no offset field.
|
|
|
|
|
|
|
|
Similarly, NASM will split \c{[eax*2]} into \c{[eax+eax]} because
|
|
|
|
that allows the offset field to be absent and space to be saved; in
|
|
|
|
fact, it will also split \c{[eax*2+offset]} into
|
|
|
|
\c{[eax+eax+offset]}. You can combat this behaviour by the use of
|
|
|
|
the \c{NOSPLIT} keyword: \c{[nosplit eax*2]} will force
|
|
|
|
\c{[eax*2+0]} to be generated literally.
|
|
|
|
|
|
|
|
\H{const} \i{Constants}
|
|
|
|
|
|
|
|
NASM understands four different types of constant: numeric,
|
|
|
|
character, string and floating-point.
|
|
|
|
|
|
|
|
\S{numconst} \i{Numeric Constants}
|
|
|
|
|
|
|
|
A numeric constant is simply a number. NASM allows you to specify
|
|
|
|
numbers in a variety of number bases, in a variety of ways: you can
|
|
|
|
suffix \c{H}, \c{Q} and \c{B} for \i{hex}, \i{octal} and \i{binary},
|
|
|
|
or you can prefix \c{0x} for hex in the style of C, or you can
|
|
|
|
prefix \c{$} for hex in the style of Borland Pascal. Note, though,
|
|
|
|
that the \I{$prefix}\c{$} prefix does double duty as a prefix on
|
|
|
|
identifiers (see \k{syntax}), so a hex number prefixed with a \c{$}
|
|
|
|
sign must have a digit after the \c{$} rather than a letter.
|
|
|
|
|
|
|
|
Some examples:
|
|
|
|
|
|
|
|
\c mov ax,100 ; decimal
|
|
|
|
\c mov ax,0a2h ; hex
|
|
|
|
\c mov ax,$0a2 ; hex again: the 0 is required
|
|
|
|
\c mov ax,0xa2 ; hex yet again
|
|
|
|
\c mov ax,777q ; octal
|
|
|
|
\c mov ax,10010011b ; binary
|
|
|
|
|
|
|
|
\S{chrconst} \i{Character Constants}
|
|
|
|
|
|
|
|
A character constant consists of up to four characters enclosed in
|
|
|
|
either single or double quotes. The type of quote makes no
|
|
|
|
difference to NASM, except of course that surrounding the constant
|
|
|
|
with single quotes allows double quotes to appear within it and vice
|
|
|
|
versa.
|
|
|
|
|
|
|
|
A character constant with more than one character will be arranged
|
|
|
|
with \i{little-endian} order in mind: if you code
|
|
|
|
|
|
|
|
\c mov eax,'abcd'
|
|
|
|
|
|
|
|
then the constant generated is not \c{0x61626364}, but
|
|
|
|
\c{0x64636261}, so that if you were then to store the value into
|
|
|
|
memory, it would read \c{abcd} rather than \c{dcba}. This is also
|
|
|
|
the sense of character constants understood by the Pentium's
|
|
|
|
\i\c{CPUID} instruction (see \k{insCPUID}).
|
|
|
|
|
|
|
|
\S{strconst} String Constants
|
|
|
|
|
|
|
|
String constants are only acceptable to some pseudo-instructions,
|
|
|
|
namely the \I\c{DW}\I\c{DD}\I\c{DQ}\I\c{DT}\i\c{DB} family and
|
|
|
|
\i\c{INCBIN}.
|
|
|
|
|
|
|
|
A string constant looks like a character constant, only longer. It
|
|
|
|
is treated as a concatenation of maximum-size character constants
|
|
|
|
for the conditions. So the following are equivalent:
|
|
|
|
|
|
|
|
\c db 'hello' ; string constant
|
|
|
|
\c db 'h','e','l','l','o' ; equivalent character constants
|
|
|
|
|
|
|
|
And the following are also equivalent:
|
|
|
|
|
|
|
|
\c dd 'ninechars' ; doubleword string constant
|
|
|
|
\c dd 'nine','char','s' ; becomes three doublewords
|
|
|
|
\c db 'ninechars',0,0,0 ; and really looks like this
|
|
|
|
|
|
|
|
Note that when used as an operand to \c{db}, a constant like
|
|
|
|
\c{'ab'} is treated as a string constant despite being short enough
|
|
|
|
to be a character constant, because otherwise \c{db 'ab'} would have
|
|
|
|
the same effect as \c{db 'a'}, which would be silly. Similarly,
|
|
|
|
three-character or four-character constants are treated as strings
|
|
|
|
when they are operands to \c{dw}.
|
|
|
|
|
|
|
|
\S{fltconst} \I{floating-point, constants}Floating-Point Constants
|
|
|
|
|
|
|
|
\i{Floating-point} constants are acceptable only as arguments to
|
|
|
|
\i\c{DD}, \i\c{DQ} and \i\c{DT}. They are expressed in the
|
|
|
|
traditional form: digits, then a period, then optionally more
|
|
|
|
digits, then optionally an \c{E} followed by an exponent. The period
|
|
|
|
is mandatory, so that NASM can distinguish between \c{dd 1}, which
|
|
|
|
declares an integer constant, and \c{dd 1.0} which declares a
|
|
|
|
floating-point constant.
|
|
|
|
|
|
|
|
Some examples:
|
|
|
|
|
|
|
|
\c dd 1.2 ; an easy one
|
|
|
|
\c dq 1.e10 ; 10,000,000,000
|
|
|
|
\c dq 1.e+10 ; synonymous with 1.e10
|
|
|
|
\c dq 1.e-10 ; 0.000 000 000 1
|
|
|
|
\c dt 3.141592653589793238462 ; pi
|
|
|
|
|
|
|
|
NASM cannot do compile-time arithmetic on floating-point constants.
|
|
|
|
This is because NASM is designed to be portable - although it always
|
|
|
|
generates code to run on x86 processors, the assembler itself can
|
|
|
|
run on any system with an ANSI C compiler. Therefore, the assembler
|
|
|
|
cannot guarantee the presence of a floating-point unit capable of
|
|
|
|
handling the \i{Intel number formats}, and so for NASM to be able to
|
|
|
|
do floating arithmetic it would have to include its own complete set
|
|
|
|
of floating-point routines, which would significantly increase the
|
|
|
|
size of the assembler for very little benefit.
|
|
|
|
|
|
|
|
\H{expr} \i{Expressions}
|
|
|
|
|
|
|
|
Expressions in NASM are similar in syntax to those in C.
|
|
|
|
|
|
|
|
NASM does not guarantee the size of the integers used to evaluate
|
|
|
|
expressions at compile time: since NASM can compile and run on
|
|
|
|
64-bit systems quite happily, don't assume that expressions are
|
|
|
|
evaluated in 32-bit registers and so try to make deliberate use of
|
|
|
|
\i{integer overflow}. It might not always work. The only thing NASM
|
|
|
|
will guarantee is what's guaranteed by ANSI C: you always have \e{at
|
|
|
|
least} 32 bits to work in.
|
|
|
|
|
|
|
|
NASM supports two special tokens in expressions, allowing
|
|
|
|
calculations to involve the current assembly position: the
|
|
|
|
\I{$ here}\c{$} and \i\c{$$} tokens. \c{$} evaluates to the assembly
|
|
|
|
position at the beginning of the line containing the expression; so
|
|
|
|
you can code an \i{infinite loop} using \c{JMP $}. \c{$$} evaluates
|
|
|
|
to the beginning of the current section; so you can tell how far
|
|
|
|
into the section you are by using \c{($-$$)}.
|
|
|
|
|
|
|
|
The arithmetic \i{operators} provided by NASM are listed here, in
|
|
|
|
increasing order of \i{precedence}.
|
|
|
|
|
|
|
|
\S{expor} \i\c{|}: \i{Bitwise OR} Operator
|
|
|
|
|
|
|
|
The \c{|} operator gives a bitwise OR, exactly as performed by the
|
|
|
|
\c{OR} machine instruction. Bitwise OR is the lowest-priority
|
|
|
|
arithmetic operator supported by NASM.
|
|
|
|
|
|
|
|
\S{expxor} \i\c{^}: \i{Bitwise XOR} Operator
|
|
|
|
|
|
|
|
\c{^} provides the bitwise XOR operation.
|
|
|
|
|
|
|
|
\S{expand} \i\c{&}: \i{Bitwise AND} Operator
|
|
|
|
|
|
|
|
\c{&} provides the bitwise AND operation.
|
|
|
|
|
|
|
|
\S{expshift} \i\c{<<} and \i\c{>>}: \i{Bit Shift} Operators
|
|
|
|
|
|
|
|
\c{<<} gives a bit-shift to the left, just as it does in C. So \c{5<<3}
|
|
|
|
evaluates to 5 times 8, or 40. \c{>>} gives a bit-shift to the
|
|
|
|
right; in NASM, such a shift is \e{always} unsigned, so that
|
|
|
|
the bits shifted in from the left-hand end are filled with zero
|
|
|
|
rather than a sign-extension of the previous highest bit.
|
|
|
|
|
|
|
|
\S{expplmi} \I{+ opaddition}\c{+} and \I{- opsubtraction}\c{-}:
|
|
|
|
\i{Addition} and \i{Subtraction} Operators
|
|
|
|
|
|
|
|
The \c{+} and \c{-} operators do perfectly ordinary addition and
|
|
|
|
subtraction.
|
|
|
|
|
|
|
|
\S{expmul} \i\c{*}, \i\c{/}, \i\c{//}, \i\c{%} and \i\c{%%}:
|
|
|
|
\i{Multiplication} and \i{Division}
|
|
|
|
|
|
|
|
\c{*} is the multiplication operator. \c{/} and \c{//} are both
|
|
|
|
division operators: \c{/} is \i{unsigned division} and \c{//} is
|
|
|
|
\i{signed division}. Similarly, \c{%} and \c{%%} provide \I{unsigned
|
|
|
|
modulo}\I{modulo operators}unsigned and
|
|
|
|
\i{signed modulo} operators respectively.
|
|
|
|
|
|
|
|
NASM, like ANSI C, provides no guarantees about the sensible
|
|
|
|
operation of the signed modulo operator.
|
|
|
|
|
|
|
|
Since the \c{%} character is used extensively by the macro
|
|
|
|
\i{preprocessor}, you should ensure that both the signed and unsigned
|
|
|
|
modulo operators are followed by white space wherever they appear.
|
|
|
|
|
|
|
|
\S{expmul} \i{Unary Operators}: \I{+ opunary}\c{+}, \I{- opunary}\c{-},
|
|
|
|
\i\c{~} and \i\c{SEG}
|
|
|
|
|
|
|
|
The highest-priority operators in NASM's expression grammar are
|
|
|
|
those which only apply to one argument. \c{-} negates its operand,
|
|
|
|
\c{+} does nothing (it's provided for symmetry with \c{-}), \c{~}
|
|
|
|
computes the \i{one's complement} of its operand, and \c{SEG}
|
|
|
|
provides the \i{segment address} of its operand (explained in more
|
|
|
|
detail in \k{segwrt}).
|
|
|
|
|
|
|
|
\H{segwrt} \i\c{SEG} and \i\c{WRT}
|
|
|
|
|
|
|
|
When writing large 16-bit programs, which must be split into
|
|
|
|
multiple \i{segments}, it is often necessary to be able to refer to
|
|
|
|
the \I{segment address}segment part of the address of a symbol. NASM
|
|
|
|
supports the \c{SEG} operator to perform this function.
|
|
|
|
|
|
|
|
The \c{SEG} operator returns the \i\e{preferred} segment base of a
|
|
|
|
symbol, defined as the segment base relative to which the offset of
|
|
|
|
the symbol makes sense. So the code
|
|
|
|
|
|
|
|
\c mov ax,seg symbol
|
|
|
|
\c mov es,ax
|
|
|
|
\c mov bx,symbol
|
|
|
|
|
|
|
|
will load \c{ES:BX} with a valid pointer to the symbol \c{symbol}.
|
|
|
|
|
|
|
|
Things can be more complex than this: since 16-bit segments and
|
|
|
|
\i{groups} may \I{overlapping segments}overlap, you might occasionally
|
|
|
|
want to refer to some symbol using a different segment base from the
|
|
|
|
preferred one. NASM lets you do this, by the use of the \c{WRT}
|
|
|
|
(With Reference To) keyword. So you can do things like
|
|
|
|
|
|
|
|
\c mov ax,weird_seg ; weird_seg is a segment base
|
|
|
|
\c mov es,ax
|
|
|
|
\c mov bx,symbol wrt weird_seg
|
|
|
|
|
|
|
|
to load \c{ES:BX} with a different, but functionally equivalent,
|
|
|
|
pointer to the symbol \c{symbol}.
|
|
|
|
|
|
|
|
NASM supports far (inter-segment) calls and jumps by means of the
|
|
|
|
syntax \c{call segment:offset}, where \c{segment} and \c{offset}
|
|
|
|
both represent immediate values. So to call a far procedure, you
|
|
|
|
could code either of
|
|
|
|
|
|
|
|
\c call (seg procedure):procedure
|
|
|
|
\c call weird_seg:(procedure wrt weird_seg)
|
|
|
|
|
|
|
|
(The parentheses are included for clarity, to show the intended
|
|
|
|
parsing of the above instructions. They are not necessary in
|
|
|
|
practice.)
|
|
|
|
|
|
|
|
NASM supports the syntax \I\c{CALL FAR}\c{call far procedure} as a
|
|
|
|
synonym for the first of the above usages. \c{JMP} works identically
|
|
|
|
to \c{CALL} in these examples.
|
|
|
|
|
|
|
|
To declare a \i{far pointer} to a data item in a data segment, you
|
|
|
|
must code
|
|
|
|
|
|
|
|
\c dw symbol, seg symbol
|
|
|
|
|
|
|
|
NASM supports no convenient synonym for this, though you can always
|
|
|
|
invent one using the macro processor.
|
|
|
|
|
|
|
|
\H{crit} \i{Critical Expressions}
|
|
|
|
|
|
|
|
A limitation of NASM is that it is a \i{two-pass assembler}; unlike
|
|
|
|
TASM and others, it will always do exactly two \I{passes}\i{assembly
|
|
|
|
passes}. Therefore it is unable to cope with source files that are
|
|
|
|
complex enough to require three or more passes.
|
|
|
|
|
|
|
|
The first pass is used to determine the size of all the assembled
|
|
|
|
code and data, so that the second pass, when generating all the
|
|
|
|
code, knows all the symbol addresses the code refers to. So one
|
|
|
|
thing NASM can't handle is code whose size depends on the value of a
|
|
|
|
symbol declared after the code in question. For example,
|
|
|
|
|
|
|
|
\c times (label-$) db 0
|
|
|
|
\c label: db 'Where am I?'
|
|
|
|
|
|
|
|
The argument to \i\c{TIMES} in this case could equally legally
|
|
|
|
evaluate to anything at all; NASM will reject this example because
|
|
|
|
it cannot tell the size of the \c{TIMES} line when it first sees it.
|
|
|
|
It will just as firmly reject the slightly \I{paradox}paradoxical
|
|
|
|
code
|
|
|
|
|
|
|
|
\c times (label-$+1) db 0
|
|
|
|
\c label: db 'NOW where am I?'
|
|
|
|
|
|
|
|
in which \e{any} value for the \c{TIMES} argument is by definition
|
|
|
|
wrong!
|
|
|
|
|
|
|
|
NASM rejects these examples by means of a concept called a
|
|
|
|
\e{critical expression}, which is defined to be an expression whose
|
|
|
|
value is required to be computable in the first pass, and which must
|
|
|
|
therefore depend only on symbols defined before it. The argument to
|
|
|
|
the \c{TIMES} prefix is a critical expression; for the same reason,
|
|
|
|
the arguments to the \i\c{RESB} family of pseudo-instructions are
|
|
|
|
also critical expressions.
|
|
|
|
|
|
|
|
Critical expressions can crop up in other contexts as well: consider
|
|
|
|
the following code.
|
|
|
|
|
|
|
|
\c mov ax,symbol1
|
|
|
|
\c symbol1 equ symbol2
|
|
|
|
\c symbol2:
|
|
|
|
|
|
|
|
On the first pass, NASM cannot determine the value of \c{symbol1},
|
|
|
|
because \c{symbol1} is defined to be equal to \c{symbol2} which NASM
|
|
|
|
hasn't seen yet. On the second pass, therefore, when it encounters
|
|
|
|
the line \c{mov ax,symbol1}, it is unable to generate the code for
|
|
|
|
it because it still doesn't know the value of \c{symbol1}. On the
|
|
|
|
next line, it would see the \i\c{EQU} again and be able to determine
|
|
|
|
the value of \c{symbol1}, but by then it would be too late.
|
|
|
|
|
|
|
|
NASM avoids this problem by defining the right-hand side of an
|
|
|
|
\c{EQU} statement to be a critical expression, so the definition of
|
|
|
|
\c{symbol1} would be rejected in the first pass.
|
|
|
|
|
|
|
|
There is a related issue involving \i{forward references}: consider
|
|
|
|
this code fragment.
|
|
|
|
|
|
|
|
\c mov eax,[ebx+offset]
|
|
|
|
\c offset equ 10
|
|
|
|
|
|
|
|
NASM, on pass one, must calculate the size of the instruction \c{mov
|
|
|
|
eax,[ebx+offset]} without knowing the value of \c{offset}. It has no
|
|
|
|
way of knowing that \c{offset} is small enough to fit into a
|
|
|
|
one-byte offset field and that it could therefore get away with
|
|
|
|
generating a shorter form of the \i{effective-address} encoding; for
|
|
|
|
all it knows, in pass one, \c{offset} could be a symbol in the code
|
|
|
|
segment, and it might need the full four-byte form. So it is forced
|
|
|
|
to compute the size of the instruction to accommodate a four-byte
|
|
|
|
address part. In pass two, having made this decision, it is now
|
|
|
|
forced to honour it and keep the instruction large, so the code
|
|
|
|
generated in this case is not as small as it could have been. This
|
|
|
|
problem can be solved by defining \c{offset} before using it, or by
|
|
|
|
forcing byte size in the effective address by coding \c{[byte
|
|
|
|
ebx+offset]}.
|
|
|
|
|
|
|
|
\H{locallab} \i{Local Labels}
|
|
|
|
|
|
|
|
NASM gives special treatment to symbols beginning with a \i{period}.
|
|
|
|
A label beginning with a single period is treated as a \e{local}
|
|
|
|
label, which means that it is associated with the previous non-local
|
|
|
|
label. So, for example:
|
|
|
|
|
|
|
|
\c label1 ; some code
|
|
|
|
\c .loop ; some more code
|
|
|
|
\c jne .loop
|
|
|
|
\c ret
|
|
|
|
\c label2 ; some code
|
|
|
|
\c .loop ; some more code
|
|
|
|
\c jne .loop
|
|
|
|
\c ret
|
|
|
|
|
|
|
|
In the above code fragment, each \c{JNE} instruction jumps to the
|
|
|
|
line immediately before it, because the two definitions of \c{.loop}
|
|
|
|
are kept separate by virtue of each being associated with the
|
|
|
|
previous non-local label.
|
|
|
|
|
|
|
|
This form of local label handling is borrowed from the old Amiga
|
|
|
|
assembler \i{DevPac}; however, NASM goes one step further, in
|
|
|
|
allowing access to local labels from other parts of the code. This
|
|
|
|
is achieved by means of \e{defining} a local label in terms of the
|
|
|
|
previous non-local label: the first definition of \c{.loop} above is
|
|
|
|
really defining a symbol called \c{label1.loop}, and the second
|
|
|
|
defines a symbol called \c{label2.loop}. So, if you really needed
|
|
|
|
to, you could write
|
|
|
|
|
|
|
|
\c label3 ; some more code
|
|
|
|
\c ; and some more
|
|
|
|
\c jmp label1.loop
|
|
|
|
|
|
|
|
Sometimes it is useful - in a macro, for instance - to be able to
|
|
|
|
define a label which can be referenced from anywhere but which
|
|
|
|
doesn't interfere with the normal local-label mechanism. Such a
|
|
|
|
label can't be non-local because it would interfere with subsequent
|
|
|
|
definitions of, and references to, local labels; and it can't be
|
|
|
|
local because the macro that defined it wouldn't know the label's
|
|
|
|
full name. NASM therefore introduces a third type of label, which is
|
|
|
|
probably only useful in macro definitions: if a label begins with
|
|
|
|
the \I{label prefix}special prefix \i\c{..@}, then it does nothing
|
|
|
|
to the local label mechanism. So you could code
|
|
|
|
|
|
|
|
\c label1: ; a non-local label
|
|
|
|
\c .local: ; this is really label1.local
|
|
|
|
\c ..@foo: ; this is a special symbol
|
|
|
|
\c label2: ; another non-local label
|
|
|
|
\c .local: ; this is really label2.local
|
|
|
|
\c jmp ..@foo ; this will jump three lines up
|
|
|
|
|
|
|
|
NASM has the capacity to define other special symbols beginning with
|
|
|
|
a double period: for example, \c{..start} is used to specify the
|
|
|
|
entry point in the \c{obj} output format (see \k{dotdotstart}).
|
|
|
|
|
|
|
|
\C{preproc} The NASM \i{Preprocessor}
|
|
|
|
|
|
|
|
NASM contains a powerful \i{macro processor}, which supports
|
|
|
|
conditional assembly, multi-level file inclusion, two forms of macro
|
|
|
|
(single-line and multi-line), and a `context stack' mechanism for
|
|
|
|
extra macro power. Preprocessor directives all begin with a \c{%}
|
|
|
|
sign.
|
|
|
|
|
|
|
|
\H{slmacro} \i{Single-Line Macros}
|
|
|
|
|
|
|
|
\S{define} The Normal Way: \I\c{%idefine}\i\c{%define}
|
|
|
|
|
|
|
|
Single-line macros are defined using the \c{%define} preprocessor
|
|
|
|
directive. The definitions work in a similar way to C; so you can do
|
|
|
|
things like
|
|
|
|
|
|
|
|
\c %define ctrl 0x1F &
|
|
|
|
\c %define param(a,b) ((a)+(a)*(b))
|
|
|
|
\c mov byte [param(2,ebx)], ctrl 'D'
|
|
|
|
|
|
|
|
which will expand to
|
|
|
|
|
|
|
|
\c mov byte [(2)+(2)*(ebx)], 0x1F & 'D'
|
|
|
|
|
|
|
|
When the expansion of a single-line macro contains tokens which
|
|
|
|
invoke another macro, the expansion is performed at invocation time,
|
|
|
|
not at definition time. Thus the code
|
|
|
|
|
|
|
|
\c %define a(x) 1+b(x)
|
|
|
|
\c %define b(x) 2*x
|
|
|
|
\c mov ax,a(8)
|
|
|
|
|
|
|
|
will evaluate in the expected way to \c{mov ax,1+2*8}, even though
|
|
|
|
the macro \c{b} wasn't defined at the time of definition of \c{a}.
|
|
|
|
|
|
|
|
Macros defined with \c{%define} are \i{case sensitive}: after
|
|
|
|
\c{%define foo bar}, only \c{foo} will expand to \c{bar}: \c{Foo} or
|
|
|
|
\c{FOO} will not. By using \c{%idefine} instead of \c{%define} (the
|
|
|
|
`i' stands for `insensitive') you can define all the case variants
|
|
|
|
of a macro at once, so that \c{%idefine foo bar} would cause
|
|
|
|
\c{foo}, \c{Foo}, \c{FOO}, \c{fOO} and so on all to expand to
|
|
|
|
\c{bar}.
|
|
|
|
|
|
|
|
There is a mechanism which detects when a macro call has occurred as
|
|
|
|
a result of a previous expansion of the same macro, to guard against
|
|
|
|
\i{circular references} and infinite loops. If this happens, the
|
|
|
|
preprocessor will only expand the first occurrence of the macro.
|
|
|
|
Hence, if you code
|
|
|
|
|
|
|
|
\c %define a(x) 1+a(x)
|
|
|
|
\c mov ax,a(3)
|
|
|
|
|
|
|
|
the macro \c{a(3)} will expand once, becoming \c{1+a(3)}, and will
|
|
|
|
then expand no further. This behaviour can be useful: see \k{32c}
|
|
|
|
for an example of its use.
|
|
|
|
|
|
|
|
You can \I{overloading, single-line macros}overload single-line
|
|
|
|
macros: if you write
|
|
|
|
|
|
|
|
\c %define foo(x) 1+x
|
|
|
|
\c %define foo(x,y) 1+x*y
|
|
|
|
|
|
|
|
the preprocessor will be able to handle both types of macro call,
|
|
|
|
by counting the parameters you pass; so \c{foo(3)} will become
|
|
|
|
\c{1+3} whereas \c{foo(ebx,2)} will become \c{1+ebx*2}. However, if
|
|
|
|
you define
|
|
|
|
|
|
|
|
\c %define foo bar
|
|
|
|
|
|
|
|
then no other definition of \c{foo} will be accepted: a macro with
|
|
|
|
no parameters prohibits the definition of the same name as a macro
|
|
|
|
\e{with} parameters, and vice versa.
|
|
|
|
|
2002-05-01 04:53:16 +08:00
|
|
|
This doesn't prevent single-line macros being \e{redefined}: you can
|
|
|
|
perfectly well define a macro with
|
|
|
|
|
|
|
|
\c %define foo bar
|
|
|
|
|
|
|
|
and then re-define it later in the same source file with
|
|
|
|
|
|
|
|
\c %define foo baz
|
|
|
|
|
|
|
|
Then everywhere the macro \c{foo} is invoked, it will be expanded
|
|
|
|
according to the most recent definition. This is particularly useful
|
|
|
|
when defining single-line macros with \c{%assign} (see \k{assign}).
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
You can \i{pre-define} single-line macros using the `-d' option on
|
|
|
|
the NASM command line: see \k{opt-d}.
|
|
|
|
|
2002-05-01 04:58:18 +08:00
|
|
|
\S{undef} Undefining macros: \i\c{%undef}
|
|
|
|
|
|
|
|
Single-line macros can be removed with the \c{%undef} command. For
|
|
|
|
example, the following sequence:
|
|
|
|
|
|
|
|
\c %define foo bar
|
|
|
|
\c %undef foo
|
|
|
|
\c mov eax, foo
|
|
|
|
|
|
|
|
will expand to the instruction \c{mov eax, foo}, since after
|
|
|
|
\c{%undef} the macro \c{foo} is no longer defined.
|
|
|
|
|
|
|
|
Macros that would otherwise be pre-defined can be undefined on the
|
|
|
|
command-line using the `-u' option on the NASM command line: see
|
|
|
|
\k{opt-u}.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\S{assign} \i{Preprocessor Variables}: \i\c{%assign}
|
|
|
|
|
|
|
|
An alternative way to define single-line macros is by means of the
|
|
|
|
\c{%assign} command (and its \i{case sensitive}case-insensitive
|
|
|
|
counterpart \i\c{%iassign}, which differs from \c{%assign} in
|
|
|
|
exactly the same way that \c{%idefine} differs from \c{%define}).
|
|
|
|
|
|
|
|
\c{%assign} is used to define single-line macros which take no
|
|
|
|
parameters and have a numeric value. This value can be specified in
|
|
|
|
the form of an expression, and it will be evaluated once, when the
|
|
|
|
\c{%assign} directive is processed.
|
|
|
|
|
2002-05-01 04:53:16 +08:00
|
|
|
Like \c{%define}, macros defined using \c{%assign} can be re-defined
|
|
|
|
later, so you can do things like
|
|
|
|
|
|
|
|
\c %assign i i+1
|
|
|
|
|
|
|
|
to increment the numeric value of a macro.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\c{%assign} is useful for controlling the termination of \c{%rep}
|
|
|
|
preprocessor loops: see \k{rep} for an example of this. Another
|
|
|
|
use for \c{%assign} is given in \k{16c} and \k{32c}.
|
|
|
|
|
|
|
|
The expression passed to \c{%assign} is a \i{critical expression}
|
|
|
|
(see \k{crit}), and must also evaluate to a pure number (rather than
|
|
|
|
a relocatable reference such as a code or data address, or anything
|
|
|
|
involving a register).
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{strlen} \i{String Handling in Macros}: \i\c{%strlen} and \i\c{%substr}
|
|
|
|
|
|
|
|
It's often useful to be able to handle strings in macros. NASM
|
|
|
|
supports two simple string handling macro operators from which
|
|
|
|
more complex operations can be constructed.
|
|
|
|
|
|
|
|
\S{strlen} \i{String Length}: \i\c{%strlen}
|
|
|
|
|
|
|
|
The \c{%strlen} macro is like \c{%assign} macro in that it creates
|
|
|
|
(or redefines) a numeric value to a macro. The difference is that
|
|
|
|
with \c{%strlen}, the numeric value is the length of a string. An
|
|
|
|
example of the use of this would be:
|
|
|
|
|
|
|
|
\c %strlen charcnt 'my string'
|
|
|
|
|
|
|
|
In this example, \c{charcnt} would receive the value 8, just as
|
|
|
|
if an \c{%assign} had been used. In this example, \c{'my string'}
|
|
|
|
was a literal string but it could also have been a single-line
|
|
|
|
macro that expands to a string, as in the following example:
|
|
|
|
|
|
|
|
\c %define sometext 'my string'
|
|
|
|
\c %strlen charcnt sometext
|
|
|
|
|
|
|
|
As in the first case, this would result in \c{charcnt} being
|
|
|
|
assigned the value of 8.
|
|
|
|
|
|
|
|
\S{substr} \i{Sub-strings}: \i\c{%substr}
|
|
|
|
|
|
|
|
Individual letters in strings can be extracted using \c{%substr}.
|
|
|
|
An example of its use is probably more useful than the description:
|
|
|
|
|
|
|
|
\c %substr mychar 'xyz' 1 ; equivalent to %define mychar 'x'
|
|
|
|
\c %substr mychar 'xyz' 2 ; equivalent to %define mychar 'y'
|
|
|
|
\c %substr mychar 'xyz' 3 ; equivalent to %define mychar 'z'
|
|
|
|
|
|
|
|
In this example, mychar gets the value of 'y'. As with \c{%strlen}
|
|
|
|
(see \k{strlen}), the first parameter is the single-line macro to
|
|
|
|
be created and the second is the string. The third parameter
|
|
|
|
specifies which character is to be selected. Note that the first
|
|
|
|
index is 1, not 0 and the last index is equal to the value that
|
|
|
|
\c{%strlen} would assign given the same string. Index values out
|
|
|
|
of range result in an empty string.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{mlmacro} \i{Multi-Line Macros}: \I\c{%imacro}\i\c{%macro}
|
|
|
|
|
|
|
|
Multi-line macros are much more like the type of macro seen in MASM
|
|
|
|
and TASM: a multi-line macro definition in NASM looks something like
|
|
|
|
this.
|
|
|
|
|
|
|
|
\c %macro prologue 1
|
|
|
|
\c push ebp
|
|
|
|
\c mov ebp,esp
|
|
|
|
\c sub esp,%1
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This defines a C-like function prologue as a macro: so you would
|
|
|
|
invoke the macro with a call such as
|
|
|
|
|
|
|
|
\c myfunc: prologue 12
|
|
|
|
|
|
|
|
which would expand to the three lines of code
|
|
|
|
|
|
|
|
\c myfunc: push ebp
|
|
|
|
\c mov ebp,esp
|
|
|
|
\c sub esp,12
|
|
|
|
|
|
|
|
The number \c{1} after the macro name in the \c{%macro} line defines
|
|
|
|
the number of parameters the macro \c{prologue} expects to receive.
|
|
|
|
The use of \c{%1} inside the macro definition refers to the first
|
|
|
|
parameter to the macro call. With a macro taking more than one
|
|
|
|
parameter, subsequent parameters would be referred to as \c{%2},
|
|
|
|
\c{%3} and so on.
|
|
|
|
|
|
|
|
Multi-line macros, like single-line macros, are \i{case-sensitive},
|
|
|
|
unless you define them using the alternative directive \c{%imacro}.
|
|
|
|
|
|
|
|
If you need to pass a comma as \e{part} of a parameter to a
|
|
|
|
multi-line macro, you can do that by enclosing the entire parameter
|
|
|
|
in \I{braces, around macro parameters}braces. So you could code
|
|
|
|
things like
|
|
|
|
|
|
|
|
\c %macro silly 2
|
|
|
|
\c %2: db %1
|
|
|
|
\c %endmacro
|
|
|
|
\c silly 'a', letter_a ; letter_a: db 'a'
|
|
|
|
\c silly 'ab', string_ab ; string_ab: db 'ab'
|
|
|
|
\c silly {13,10}, crlf ; crlf: db 13,10
|
|
|
|
|
2002-05-01 04:53:16 +08:00
|
|
|
\S{mlmacover} \i{Overloading Multi-Line Macros}
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
As with single-line macros, multi-line macros can be overloaded by
|
|
|
|
defining the same macro name several times with different numbers of
|
|
|
|
parameters. This time, no exception is made for macros with no
|
|
|
|
parameters at all. So you could define
|
|
|
|
|
|
|
|
\c %macro prologue 0
|
|
|
|
\c push ebp
|
|
|
|
\c mov ebp,esp
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
to define an alternative form of the function prologue which
|
|
|
|
allocates no local stack space.
|
|
|
|
|
|
|
|
Sometimes, however, you might want to `overload' a machine
|
|
|
|
instruction; for example, you might want to define
|
|
|
|
|
|
|
|
\c %macro push 2
|
|
|
|
\c push %1
|
|
|
|
\c push %2
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
so that you could code
|
|
|
|
|
|
|
|
\c push ebx ; this line is not a macro call
|
|
|
|
\c push eax,ecx ; but this one is
|
|
|
|
|
|
|
|
Ordinarily, NASM will give a warning for the first of the above two
|
|
|
|
lines, since \c{push} is now defined to be a macro, and is being
|
|
|
|
invoked with a number of parameters for which no definition has been
|
|
|
|
given. The correct code will still be generated, but the assembler
|
|
|
|
will give a warning. This warning can be disabled by the use of the
|
|
|
|
\c{-w-macro-params} command-line option (see \k{opt-w}).
|
|
|
|
|
|
|
|
\S{maclocal} \i{Macro-Local Labels}
|
|
|
|
|
|
|
|
NASM allows you to define labels within a multi-line macro
|
|
|
|
definition in such a way as to make them local to the macro call: so
|
|
|
|
calling the same macro multiple times will use a different label
|
|
|
|
each time. You do this by prefixing \i\c{%%} to the label name. So
|
|
|
|
you can invent an instruction which executes a \c{RET} if the \c{Z}
|
|
|
|
flag is set by doing this:
|
|
|
|
|
|
|
|
\c %macro retz 0
|
|
|
|
\c jnz %%skip
|
|
|
|
\c ret
|
|
|
|
\c %%skip:
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
You can call this macro as many times as you want, and every time
|
|
|
|
you call it NASM will make up a different `real' name to substitute
|
|
|
|
for the label \c{%%skip}. The names NASM invents are of the form
|
|
|
|
\c{..@2345.skip}, where the number 2345 changes with every macro
|
|
|
|
call. The \i\c{..@} prefix prevents macro-local labels from
|
|
|
|
interfering with the local label mechanism, as described in
|
|
|
|
\k{locallab}. You should avoid defining your own labels in this form
|
|
|
|
(the \c{..@} prefix, then a number, then another period) in case
|
|
|
|
they interfere with macro-local labels.
|
|
|
|
|
|
|
|
\S{mlmacgre} \i{Greedy Macro Parameters}
|
|
|
|
|
|
|
|
Occasionally it is useful to define a macro which lumps its entire
|
|
|
|
command line into one parameter definition, possibly after
|
|
|
|
extracting one or two smaller parameters from the front. An example
|
|
|
|
might be a macro to write a text string to a file in MS-DOS, where
|
|
|
|
you might want to be able to write
|
|
|
|
|
|
|
|
\c writefile [filehandle],"hello, world",13,10
|
|
|
|
|
|
|
|
NASM allows you to define the last parameter of a macro to be
|
|
|
|
\e{greedy}, meaning that if you invoke the macro with more
|
|
|
|
parameters than it expects, all the spare parameters get lumped into
|
|
|
|
the last defined one along with the separating commas. So if you
|
|
|
|
code:
|
|
|
|
|
|
|
|
\c %macro writefile 2+
|
|
|
|
\c jmp %%endstr
|
|
|
|
\c %%str: db %2
|
|
|
|
\c %%endstr: mov dx,%%str
|
|
|
|
\c mov cx,%%endstr-%%str
|
|
|
|
\c mov bx,%1
|
|
|
|
\c mov ah,0x40
|
|
|
|
\c int 0x21
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
then the example call to \c{writefile} above will work as expected:
|
|
|
|
the text before the first comma, \c{[filehandle]}, is used as the
|
|
|
|
first macro parameter and expanded when \c{%1} is referred to, and
|
|
|
|
all the subsequent text is lumped into \c{%2} and placed after the
|
|
|
|
\c{db}.
|
|
|
|
|
|
|
|
The greedy nature of the macro is indicated to NASM by the use of
|
|
|
|
the \I{+ modifier}\c{+} sign after the parameter count on the
|
|
|
|
\c{%macro} line.
|
|
|
|
|
|
|
|
If you define a greedy macro, you are effectively telling NASM how
|
|
|
|
it should expand the macro given \e{any} number of parameters from
|
|
|
|
the actual number specified up to infinity; in this case, for
|
|
|
|
example, NASM now knows what to do when it sees a call to
|
|
|
|
\c{writefile} with 2, 3, 4 or more parameters. NASM will take this
|
|
|
|
into account when overloading macros, and will not allow you to
|
|
|
|
define another form of \c{writefile} taking 4 parameters (for
|
|
|
|
example).
|
|
|
|
|
|
|
|
Of course, the above macro could have been implemented as a
|
|
|
|
non-greedy macro, in which case the call to it would have had to
|
|
|
|
look like
|
|
|
|
|
|
|
|
\c writefile [filehandle], {"hello, world",13,10}
|
|
|
|
|
|
|
|
NASM provides both mechanisms for putting \i{commas in macro
|
|
|
|
parameters}, and you choose which one you prefer for each macro
|
|
|
|
definition.
|
|
|
|
|
|
|
|
See \k{sectmac} for a better way to write the above macro.
|
|
|
|
|
|
|
|
\S{mlmacdef} \i{Default Macro Parameters}
|
|
|
|
|
|
|
|
NASM also allows you to define a multi-line macro with a \e{range}
|
|
|
|
of allowable parameter counts. If you do this, you can specify
|
|
|
|
defaults for \i{omitted parameters}. So, for example:
|
|
|
|
|
|
|
|
\c %macro die 0-1 "Painful program death has occurred."
|
|
|
|
\c writefile 2,%1
|
|
|
|
\c mov ax,0x4c01
|
|
|
|
\c int 0x21
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This macro (which makes use of the \c{writefile} macro defined in
|
|
|
|
\k{mlmacgre}) can be called with an explicit error message, which it
|
|
|
|
will display on the error output stream before exiting, or it can be
|
|
|
|
called with no parameters, in which case it will use the default
|
|
|
|
error message supplied in the macro definition.
|
|
|
|
|
|
|
|
In general, you supply a minimum and maximum number of parameters
|
|
|
|
for a macro of this type; the minimum number of parameters are then
|
|
|
|
required in the macro call, and then you provide defaults for the
|
|
|
|
optional ones. So if a macro definition began with the line
|
|
|
|
|
|
|
|
\c %macro foobar 1-3 eax,[ebx+2]
|
|
|
|
|
|
|
|
then it could be called with between one and three parameters, and
|
|
|
|
\c{%1} would always be taken from the macro call. \c{%2}, if not
|
|
|
|
specified by the macro call, would default to \c{eax}, and \c{%3} if
|
|
|
|
not specified would default to \c{[ebx+2]}.
|
|
|
|
|
|
|
|
You may omit parameter defaults from the macro definition, in which
|
|
|
|
case the parameter default is taken to be blank. This can be useful
|
|
|
|
for macros which can take a variable number of parameters, since the
|
|
|
|
\i\c{%0} token (see \k{percent0}) allows you to determine how many
|
|
|
|
parameters were really passed to the macro call.
|
|
|
|
|
|
|
|
This defaulting mechanism can be combined with the greedy-parameter
|
|
|
|
mechanism; so the \c{die} macro above could be made more powerful,
|
|
|
|
and more useful, by changing the first line of the definition to
|
|
|
|
|
|
|
|
\c %macro die 0-1+ "Painful program death has occurred.",13,10
|
|
|
|
|
|
|
|
The maximum parameter count can be infinite, denoted by \c{*}. In
|
|
|
|
this case, of course, it is impossible to provide a \e{full} set of
|
|
|
|
default parameters. Examples of this usage are shown in \k{rotate}.
|
|
|
|
|
|
|
|
\S{percent0} \i\c{%0}: \I{counting macro parameters}Macro Parameter Counter
|
|
|
|
|
|
|
|
For a macro which can take a variable number of parameters, the
|
|
|
|
parameter reference \c{%0} will return a numeric constant giving the
|
|
|
|
number of parameters passed to the macro. This can be used as an
|
|
|
|
argument to \c{%rep} (see \k{rep}) in order to iterate through all
|
|
|
|
the parameters of a macro. Examples are given in \k{rotate}.
|
|
|
|
|
|
|
|
\S{rotate} \i\c{%rotate}: \i{Rotating Macro Parameters}
|
|
|
|
|
|
|
|
Unix shell programmers will be familiar with the \I{shift
|
|
|
|
command}\c{shift} shell command, which allows the arguments passed
|
|
|
|
to a shell script (referenced as \c{$1}, \c{$2} and so on) to be
|
|
|
|
moved left by one place, so that the argument previously referenced
|
|
|
|
as \c{$2} becomes available as \c{$1}, and the argument previously
|
|
|
|
referenced as \c{$1} is no longer available at all.
|
|
|
|
|
|
|
|
NASM provides a similar mechanism, in the form of \c{%rotate}. As
|
|
|
|
its name suggests, it differs from the Unix \c{shift} in that no
|
|
|
|
parameters are lost: parameters rotated off the left end of the
|
|
|
|
argument list reappear on the right, and vice versa.
|
|
|
|
|
|
|
|
\c{%rotate} is invoked with a single numeric argument (which may be
|
|
|
|
an expression). The macro parameters are rotated to the left by that
|
|
|
|
many places. If the argument to \c{%rotate} is negative, the macro
|
|
|
|
parameters are rotated to the right.
|
|
|
|
|
|
|
|
\I{iterating over macro parameters}So a pair of macros to save and
|
|
|
|
restore a set of registers might work as follows:
|
|
|
|
|
|
|
|
\c %macro multipush 1-*
|
|
|
|
\c %rep %0
|
|
|
|
\c push %1
|
|
|
|
\c %rotate 1
|
|
|
|
\c %endrep
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This macro invokes the \c{PUSH} instruction on each of its arguments
|
|
|
|
in turn, from left to right. It begins by pushing its first
|
|
|
|
argument, \c{%1}, then invokes \c{%rotate} to move all the arguments
|
|
|
|
one place to the left, so that the original second argument is now
|
|
|
|
available as \c{%1}. Repeating this procedure as many times as there
|
|
|
|
were arguments (achieved by supplying \c{%0} as the argument to
|
|
|
|
\c{%rep}) causes each argument in turn to be pushed.
|
|
|
|
|
|
|
|
Note also the use of \c{*} as the maximum parameter count,
|
|
|
|
indicating that there is no upper limit on the number of parameters
|
|
|
|
you may supply to the \i\c{multipush} macro.
|
|
|
|
|
|
|
|
It would be convenient, when using this macro, to have a \c{POP}
|
|
|
|
equivalent, which \e{didn't} require the arguments to be given in
|
|
|
|
reverse order. Ideally, you would write the \c{multipush} macro
|
|
|
|
call, then cut-and-paste the line to where the pop needed to be
|
|
|
|
done, and change the name of the called macro to \c{multipop}, and
|
|
|
|
the macro would take care of popping the registers in the opposite
|
|
|
|
order from the one in which they were pushed.
|
|
|
|
|
|
|
|
This can be done by the following definition:
|
|
|
|
|
|
|
|
\c %macro multipop 1-*
|
|
|
|
\c %rep %0
|
|
|
|
\c %rotate -1
|
|
|
|
\c pop %1
|
|
|
|
\c %endrep
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This macro begins by rotating its arguments one place to the
|
|
|
|
\e{right}, so that the original \e{last} argument appears as \c{%1}.
|
|
|
|
This is then popped, and the arguments are rotated right again, so
|
|
|
|
the second-to-last argument becomes \c{%1}. Thus the arguments are
|
|
|
|
iterated through in reverse order.
|
|
|
|
|
|
|
|
\S{concat} \i{Concatenating Macro Parameters}
|
|
|
|
|
|
|
|
NASM can concatenate macro parameters on to other text surrounding
|
|
|
|
them. This allows you to declare a family of symbols, for example,
|
|
|
|
in a macro definition. If, for example, you wanted to generate a
|
|
|
|
table of key codes along with offsets into the table, you could code
|
|
|
|
something like
|
|
|
|
|
|
|
|
\c %macro keytab_entry 2
|
|
|
|
\c keypos%1 equ $-keytab
|
|
|
|
\c db %2
|
|
|
|
\c %endmacro
|
|
|
|
\c keytab:
|
|
|
|
\c keytab_entry F1,128+1
|
|
|
|
\c keytab_entry F2,128+2
|
|
|
|
\c keytab_entry Return,13
|
|
|
|
|
|
|
|
which would expand to
|
|
|
|
|
|
|
|
\c keytab:
|
|
|
|
\c keyposF1 equ $-keytab
|
|
|
|
\c db 128+1
|
|
|
|
\c keyposF2 equ $-keytab
|
|
|
|
\c db 128+2
|
|
|
|
\c keyposReturn equ $-keytab
|
|
|
|
\c db 13
|
|
|
|
|
|
|
|
You can just as easily concatenate text on to the other end of a
|
|
|
|
macro parameter, by writing \c{%1foo}.
|
|
|
|
|
|
|
|
If you need to append a \e{digit} to a macro parameter, for example
|
|
|
|
defining labels \c{foo1} and \c{foo2} when passed the parameter
|
|
|
|
\c{foo}, you can't code \c{%11} because that would be taken as the
|
|
|
|
eleventh macro parameter. Instead, you must code
|
|
|
|
\I{braces, after % sign}\c{%\{1\}1}, which will separate the first
|
|
|
|
\c{1} (giving the number of the macro parameter) from the second
|
|
|
|
(literal text to be concatenated to the parameter).
|
|
|
|
|
|
|
|
This concatenation can also be applied to other preprocessor in-line
|
|
|
|
objects, such as macro-local labels (\k{maclocal}) and context-local
|
|
|
|
labels (\k{ctxlocal}). In all cases, ambiguities in syntax can be
|
|
|
|
resolved by enclosing everything after the \c{%} sign and before the
|
|
|
|
literal text in braces: so \c{%\{%foo\}bar} concatenates the text
|
|
|
|
\c{bar} to the end of the real name of the macro-local label
|
|
|
|
\c{%%foo}. (This is unnecessary, since the form NASM uses for the
|
|
|
|
real names of macro-local labels means that the two usages
|
|
|
|
\c{%\{%foo\}bar} and \c{%%foobar} would both expand to the same
|
|
|
|
thing anyway; nevertheless, the capability is there.)
|
|
|
|
|
|
|
|
\S{mlmaccc} \i{Condition Codes as Macro Parameters}
|
|
|
|
|
|
|
|
NASM can give special treatment to a macro parameter which contains
|
|
|
|
a condition code. For a start, you can refer to the macro parameter
|
|
|
|
\c{%1} by means of the alternative syntax \i\c{%+1}, which informs
|
|
|
|
NASM that this macro parameter is supposed to contain a condition
|
|
|
|
code, and will cause the preprocessor to report an error message if
|
|
|
|
the macro is called with a parameter which is \e{not} a valid
|
|
|
|
condition code.
|
|
|
|
|
|
|
|
Far more usefully, though, you can refer to the macro parameter by
|
|
|
|
means of \i\c{%-1}, which NASM will expand as the \e{inverse}
|
|
|
|
condition code. So the \c{retz} macro defined in \k{maclocal} can be
|
|
|
|
replaced by a general \i{conditional-return macro} like this:
|
|
|
|
|
|
|
|
\c %macro retc 1
|
|
|
|
\c j%-1 %%skip
|
|
|
|
\c ret
|
|
|
|
\c %%skip:
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This macro can now be invoked using calls like \c{retc ne}, which
|
|
|
|
will cause the conditional-jump instruction in the macro expansion
|
|
|
|
to come out as \c{JE}, or \c{retc po} which will make the jump a
|
|
|
|
\c{JPE}.
|
|
|
|
|
|
|
|
The \c{%+1} macro-parameter reference is quite happy to interpret
|
|
|
|
the arguments \c{CXZ} and \c{ECXZ} as valid condition codes;
|
|
|
|
however, \c{%-1} will report an error if passed either of these,
|
|
|
|
because no inverse condition code exists.
|
|
|
|
|
|
|
|
\S{nolist} \i{Disabling Listing Expansion}\I\c{.nolist}
|
|
|
|
|
|
|
|
When NASM is generating a listing file from your program, it will
|
|
|
|
generally expand multi-line macros by means of writing the macro
|
|
|
|
call and then listing each line of the expansion. This allows you to
|
|
|
|
see which instructions in the macro expansion are generating what
|
|
|
|
code; however, for some macros this clutters the listing up
|
|
|
|
unnecessarily.
|
|
|
|
|
|
|
|
NASM therefore provides the \c{.nolist} qualifier, which you can
|
|
|
|
include in a macro definition to inhibit the expansion of the macro
|
|
|
|
in the listing file. The \c{.nolist} qualifier comes directly after
|
|
|
|
the number of parameters, like this:
|
|
|
|
|
|
|
|
\c %macro foo 1.nolist
|
|
|
|
|
|
|
|
Or like this:
|
|
|
|
|
|
|
|
\c %macro bar 1-5+.nolist a,b,c,d,e,f,g,h
|
|
|
|
|
|
|
|
\H{condasm} \i{Conditional Assembly}\I\c{%if}
|
|
|
|
|
|
|
|
Similarly to the C preprocessor, NASM allows sections of a source
|
|
|
|
file to be assembled only if certain conditions are met. The general
|
|
|
|
syntax of this feature looks like this:
|
|
|
|
|
|
|
|
\c %if<condition>
|
|
|
|
\c ; some code which only appears if <condition> is met
|
|
|
|
\c %elif<condition2>
|
|
|
|
\c ; only appears if <condition> is not met but <condition2> is
|
|
|
|
\c %else
|
|
|
|
\c ; this appears if neither <condition> nor <condition2> was met
|
|
|
|
\c %endif
|
|
|
|
|
|
|
|
The \i\c{%else} clause is optional, as is the \i\c{%elif} clause.
|
|
|
|
You can have more than one \c{%elif} clause as well.
|
|
|
|
|
|
|
|
\S{ifdef} \i\c{%ifdef}: \i{Testing Single-Line Macro Existence}
|
|
|
|
|
|
|
|
Beginning a conditional-assembly block with the line \c{%ifdef
|
|
|
|
MACRO} will assemble the subsequent code if, and only if, a
|
|
|
|
single-line macro called \c{MACRO} is defined. If not, then the
|
|
|
|
\c{%elif} and \c{%else} blocks (if any) will be processed instead.
|
|
|
|
|
|
|
|
For example, when debugging a program, you might want to write code
|
|
|
|
such as
|
|
|
|
|
|
|
|
\c ; perform some function
|
|
|
|
\c %ifdef DEBUG
|
|
|
|
\c writefile 2,"Function performed successfully",13,10
|
|
|
|
\c %endif
|
|
|
|
\c ; go and do something else
|
|
|
|
|
|
|
|
Then you could use the command-line option \c{-dDEBUG} to create a
|
|
|
|
version of the program which produced debugging messages, and remove
|
|
|
|
the option to generate the final release version of the program.
|
|
|
|
|
|
|
|
You can test for a macro \e{not} being defined by using
|
|
|
|
\i\c{%ifndef} instead of \c{%ifdef}. You can also test for macro
|
|
|
|
definitions in \c{%elif} blocks by using \i\c{%elifdef} and
|
|
|
|
\i\c{%elifndef}.
|
|
|
|
|
|
|
|
\S{ifctx} \i\c{%ifctx}: \i{Testing the Context Stack}
|
|
|
|
|
|
|
|
The conditional-assembly construct \c{%ifctx ctxname} will cause the
|
|
|
|
subsequent code to be assembled if and only if the top context on
|
|
|
|
the preprocessor's context stack has the name \c{ctxname}. As with
|
|
|
|
\c{%ifdef}, the inverse and \c{%elif} forms \i\c{%ifnctx},
|
|
|
|
\i\c{%elifctx} and \i\c{%elifnctx} are also supported.
|
|
|
|
|
|
|
|
For more details of the context stack, see \k{ctxstack}. For a
|
|
|
|
sample use of \c{%ifctx}, see \k{blockif}.
|
|
|
|
|
|
|
|
\S{if} \i\c{%if}: \i{Testing Arbitrary Numeric Expressions}
|
|
|
|
|
|
|
|
The conditional-assembly construct \c{%if expr} will cause the
|
|
|
|
subsequent code to be assembled if and only if the value of the
|
|
|
|
numeric expression \c{expr} is non-zero. An example of the use of
|
|
|
|
this feature is in deciding when to break out of a \c{%rep}
|
|
|
|
preprocessor loop: see \k{rep} for a detailed example.
|
|
|
|
|
|
|
|
The expression given to \c{%if}, and its counterpart \i\c{%elif}, is
|
|
|
|
a critical expression (see \k{crit}).
|
|
|
|
|
|
|
|
\c{%if} extends the normal NASM expression syntax, by providing a
|
|
|
|
set of \i{relational operators} which are not normally available in
|
|
|
|
expressions. The operators \i\c{=}, \i\c{<}, \i\c{>}, \i\c{<=},
|
|
|
|
\i\c{>=} and \i\c{<>} test equality, less-than, greater-than,
|
|
|
|
less-or-equal, greater-or-equal and not-equal respectively. The
|
|
|
|
C-like forms \i\c{==} and \i\c{!=} are supported as alternative
|
|
|
|
forms of \c{=} and \c{<>}. In addition, low-priority logical
|
|
|
|
operators \i\c{&&}, \i\c{^^} and \i\c{||} are provided, supplying
|
|
|
|
\i{logical AND}, \i{logical XOR} and \i{logical OR}. These work like
|
|
|
|
the C logical operators (although C has no logical XOR), in that
|
|
|
|
they always return either 0 or 1, and treat any non-zero input as 1
|
|
|
|
(so that \c{^^}, for example, returns 1 if exactly one of its inputs
|
|
|
|
is zero, and 0 otherwise). The relational operators also return 1
|
|
|
|
for true and 0 for false.
|
|
|
|
|
|
|
|
\S{ifidn} \i\c{%ifidn} and \i\c{%ifidni}: \i{Testing Exact Text
|
|
|
|
Identity}
|
|
|
|
|
|
|
|
The construct \c{%ifidn text1,text2} will cause the subsequent code
|
|
|
|
to be assembled if and only if \c{text1} and \c{text2}, after
|
|
|
|
expanding single-line macros, are identical pieces of text.
|
|
|
|
Differences in white space are not counted.
|
|
|
|
|
|
|
|
\c{%ifidni} is similar to \c{%ifidn}, but is \i{case-insensitive}.
|
|
|
|
|
|
|
|
For example, the following macro pushes a register or number on the
|
|
|
|
stack, and allows you to treat \c{IP} as a real register:
|
|
|
|
|
|
|
|
\c %macro pushparam 1
|
|
|
|
\c %ifidni %1,ip
|
|
|
|
\c call %%label
|
|
|
|
\c %%label:
|
|
|
|
\c %else
|
|
|
|
\c push %1
|
|
|
|
\c %endif
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
Like most other \c{%if} constructs, \c{%ifidn} has a counterpart
|
|
|
|
\i\c{%elifidn}, and negative forms \i\c{%ifnidn} and \i\c{%elifnidn}.
|
|
|
|
Similarly, \c{%ifidni} has counterparts \i\c{%elifidni},
|
|
|
|
\i\c{%ifnidni} and \i\c{%elifnidni}.
|
|
|
|
|
|
|
|
\S{iftyp} \i\c{%ifid}, \i\c{%ifnum}, \i\c{%ifstr}: \i{Testing Token
|
|
|
|
Types}
|
|
|
|
|
|
|
|
Some macros will want to perform different tasks depending on
|
|
|
|
whether they are passed a number, a string, or an identifier. For
|
|
|
|
example, a string output macro might want to be able to cope with
|
|
|
|
being passed either a string constant or a pointer to an existing
|
|
|
|
string.
|
|
|
|
|
|
|
|
The conditional assembly construct \c{%ifid}, taking one parameter
|
|
|
|
(which may be blank), assembles the subsequent code if and only if
|
|
|
|
the first token in the parameter exists and is an identifier.
|
|
|
|
\c{%ifnum} works similarly, but tests for the token being a numeric
|
|
|
|
constant; \c{%ifstr} tests for it being a string.
|
|
|
|
|
|
|
|
For example, the \c{writefile} macro defined in \k{mlmacgre} can be
|
|
|
|
extended to take advantage of \c{%ifstr} in the following fashion:
|
|
|
|
|
|
|
|
\c %macro writefile 2-3+
|
|
|
|
\c %ifstr %2
|
|
|
|
\c jmp %%endstr
|
|
|
|
\c %if %0 = 3
|
|
|
|
\c %%str: db %2,%3
|
|
|
|
\c %else
|
|
|
|
\c %%str: db %2
|
|
|
|
\c %endif
|
|
|
|
\c %%endstr: mov dx,%%str
|
|
|
|
\c mov cx,%%endstr-%%str
|
|
|
|
\c %else
|
|
|
|
\c mov dx,%2
|
|
|
|
\c mov cx,%3
|
|
|
|
\c %endif
|
|
|
|
\c mov bx,%1
|
|
|
|
\c mov ah,0x40
|
|
|
|
\c int 0x21
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
Then the \c{writefile} macro can cope with being called in either of
|
|
|
|
the following two ways:
|
|
|
|
|
|
|
|
\c writefile [file], strpointer, length
|
|
|
|
\c writefile [file], "hello", 13, 10
|
|
|
|
|
|
|
|
In the first, \c{strpointer} is used as the address of an
|
|
|
|
already-declared string, and \c{length} is used as its length; in
|
|
|
|
the second, a string is given to the macro, which therefore declares
|
|
|
|
it itself and works out the address and length for itself.
|
|
|
|
|
|
|
|
Note the use of \c{%if} inside the \c{%ifstr}: this is to detect
|
|
|
|
whether the macro was passed two arguments (so the string would be a
|
|
|
|
single string constant, and \c{db %2} would be adequate) or more (in
|
|
|
|
which case, all but the first two would be lumped together into
|
|
|
|
\c{%3}, and \c{db %2,%3} would be required).
|
|
|
|
|
|
|
|
\I\c{%ifnid}\I\c{%elifid}\I\c{%elifnid}\I\c{%ifnnum}\I\c{%elifnum}\I\c{%elifnnum}\I\c{%ifnstr}\I\c{%elifstr}\I\c{%elifnstr}
|
|
|
|
The usual \c{%elifXXX}, \c{%ifnXXX} and \c{%elifnXXX} versions exist
|
|
|
|
for each of \c{%ifid}, \c{%ifnum} and \c{%ifstr}.
|
|
|
|
|
|
|
|
\S{pperror} \i\c{%error}: Reporting \i{User-Defined Errors}
|
|
|
|
|
|
|
|
The preprocessor directive \c{%error} will cause NASM to report an
|
|
|
|
error if it occurs in assembled code. So if other users are going to
|
|
|
|
try to assemble your source files, you can ensure that they define
|
|
|
|
the right macros by means of code like this:
|
|
|
|
|
|
|
|
\c %ifdef SOME_MACRO
|
|
|
|
\c ; do some setup
|
|
|
|
\c %elifdef SOME_OTHER_MACRO
|
|
|
|
\c ; do some different setup
|
|
|
|
\c %else
|
|
|
|
\c %error Neither SOME_MACRO nor SOME_OTHER_MACRO was defined.
|
|
|
|
\c %endif
|
|
|
|
|
|
|
|
Then any user who fails to understand the way your code is supposed
|
|
|
|
to be assembled will be quickly warned of their mistake, rather than
|
|
|
|
having to wait until the program crashes on being run and then not
|
|
|
|
knowing what went wrong.
|
|
|
|
|
|
|
|
\H{rep} \i{Preprocessor Loops}\I{repeating code}: \i\c{%rep}
|
|
|
|
|
|
|
|
NASM's \c{TIMES} prefix, though useful, cannot be used to invoke a
|
|
|
|
multi-line macro multiple times, because it is processed by NASM
|
|
|
|
after macros have already been expanded. Therefore NASM provides
|
|
|
|
another form of loop, this time at the preprocessor level: \c{%rep}.
|
|
|
|
|
|
|
|
The directives \c{%rep} and \i\c{%endrep} (\c{%rep} takes a numeric
|
|
|
|
argument, which can be an expression; \c{%endrep} takes no
|
|
|
|
arguments) can be used to enclose a chunk of code, which is then
|
|
|
|
replicated as many times as specified by the preprocessor:
|
|
|
|
|
|
|
|
\c %assign i 0
|
|
|
|
\c %rep 64
|
|
|
|
\c inc word [table+2*i]
|
|
|
|
\c %assign i i+1
|
|
|
|
\c %endrep
|
|
|
|
|
|
|
|
This will generate a sequence of 64 \c{INC} instructions,
|
|
|
|
incrementing every word of memory from \c{[table]} to
|
|
|
|
\c{[table+126]}.
|
|
|
|
|
|
|
|
For more complex termination conditions, or to break out of a repeat
|
|
|
|
loop part way along, you can use the \i\c{%exitrep} directive to
|
|
|
|
terminate the loop, like this:
|
|
|
|
|
|
|
|
\c fibonacci:
|
|
|
|
\c %assign i 0
|
|
|
|
\c %assign j 1
|
|
|
|
\c %rep 100
|
|
|
|
\c %if j > 65535
|
|
|
|
\c %exitrep
|
|
|
|
\c %endif
|
|
|
|
\c dw j
|
|
|
|
\c %assign k j+i
|
|
|
|
\c %assign i j
|
|
|
|
\c %assign j k
|
|
|
|
\c %endrep
|
|
|
|
\c fib_number equ ($-fibonacci)/2
|
|
|
|
|
|
|
|
This produces a list of all the Fibonacci numbers that will fit in
|
|
|
|
16 bits. Note that a maximum repeat count must still be given to
|
|
|
|
\c{%rep}. This is to prevent the possibility of NASM getting into an
|
|
|
|
infinite loop in the preprocessor, which (on multitasking or
|
|
|
|
multi-user systems) would typically cause all the system memory to
|
|
|
|
be gradually used up and other applications to start crashing.
|
|
|
|
|
|
|
|
\H{include} \i{Including Other Files}
|
|
|
|
|
|
|
|
Using, once again, a very similar syntax to the C preprocessor,
|
|
|
|
NASM's preprocessor lets you include other source files into your
|
|
|
|
code. This is done by the use of the \i\c{%include} directive:
|
|
|
|
|
|
|
|
\c %include "macros.mac"
|
|
|
|
|
|
|
|
will include the contents of the file \c{macros.mac} into the source
|
|
|
|
file containing the \c{%include} directive.
|
|
|
|
|
|
|
|
Include files are \I{searching for include files}searched for in the
|
|
|
|
current directory (the directory you're in when you run NASM, as
|
|
|
|
opposed to the location of the NASM executable or the location of
|
|
|
|
the source file), plus any directories specified on the NASM command
|
|
|
|
line using the \c{-i} option.
|
|
|
|
|
|
|
|
The standard C idiom for preventing a file being included more than
|
|
|
|
once is just as applicable in NASM: if the file \c{macros.mac} has
|
|
|
|
the form
|
|
|
|
|
|
|
|
\c %ifndef MACROS_MAC
|
|
|
|
\c %define MACROS_MAC
|
|
|
|
\c ; now define some macros
|
|
|
|
\c %endif
|
|
|
|
|
|
|
|
then including the file more than once will not cause errors,
|
|
|
|
because the second time the file is included nothing will happen
|
|
|
|
because the macro \c{MACROS_MAC} will already be defined.
|
|
|
|
|
|
|
|
You can force a file to be included even if there is no \c{%include}
|
|
|
|
directive that explicitly includes it, by using the \i\c{-p} option
|
|
|
|
on the NASM command line (see \k{opt-p}).
|
|
|
|
|
|
|
|
\H{ctxstack} The \i{Context Stack}
|
|
|
|
|
|
|
|
Having labels that are local to a macro definition is sometimes not
|
|
|
|
quite powerful enough: sometimes you want to be able to share labels
|
|
|
|
between several macro calls. An example might be a \c{REPEAT} ...
|
|
|
|
\c{UNTIL} loop, in which the expansion of the \c{REPEAT} macro
|
|
|
|
would need to be able to refer to a label which the \c{UNTIL} macro
|
|
|
|
had defined. However, for such a macro you would also want to be
|
|
|
|
able to nest these loops.
|
|
|
|
|
|
|
|
NASM provides this level of power by means of a \e{context stack}.
|
|
|
|
The preprocessor maintains a stack of \e{contexts}, each of which is
|
|
|
|
characterised by a name. You add a new context to the stack using
|
|
|
|
the \i\c{%push} directive, and remove one using \i\c{%pop}. You can
|
|
|
|
define labels that are local to a particular context on the stack.
|
|
|
|
|
|
|
|
\S{pushpop} \i\c{%push} and \i\c{%pop}: \I{creating
|
|
|
|
contexts}\I{removing contexts}Creating and Removing Contexts
|
|
|
|
|
|
|
|
The \c{%push} directive is used to create a new context and place it
|
|
|
|
on the top of the context stack. \c{%push} requires one argument,
|
|
|
|
which is the name of the context. For example:
|
|
|
|
|
|
|
|
\c %push foobar
|
|
|
|
|
|
|
|
This pushes a new context called \c{foobar} on the stack. You can
|
|
|
|
have several contexts on the stack with the same name: they can
|
|
|
|
still be distinguished.
|
|
|
|
|
|
|
|
The directive \c{%pop}, requiring no arguments, removes the top
|
|
|
|
context from the context stack and destroys it, along with any
|
|
|
|
labels associated with it.
|
|
|
|
|
|
|
|
\S{ctxlocal} \i{Context-Local Labels}
|
|
|
|
|
|
|
|
Just as the usage \c{%%foo} defines a label which is local to the
|
|
|
|
particular macro call in which it is used, the usage \I{%$}\c{%$foo}
|
|
|
|
is used to define a label which is local to the context on the top
|
|
|
|
of the context stack. So the \c{REPEAT} and \c{UNTIL} example given
|
|
|
|
above could be implemented by means of:
|
|
|
|
|
|
|
|
\c %macro repeat 0
|
|
|
|
\c %push repeat
|
|
|
|
\c %$begin:
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
\c %macro until 1
|
|
|
|
\c j%-1 %$begin
|
|
|
|
\c %pop
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
and invoked by means of, for example,
|
|
|
|
|
|
|
|
\c mov cx,string
|
|
|
|
\c repeat
|
|
|
|
\c add cx,3
|
|
|
|
\c scasb
|
|
|
|
\c until e
|
|
|
|
|
|
|
|
which would scan every fourth byte of a string in search of the byte
|
|
|
|
in \c{AL}.
|
|
|
|
|
|
|
|
If you need to define, or access, labels local to the context
|
|
|
|
\e{below} the top one on the stack, you can use \I{%$$}\c{%$$foo}, or
|
|
|
|
\c{%$$$foo} for the context below that, and so on.
|
|
|
|
|
|
|
|
\S{ctxdefine} \i{Context-Local Single-Line Macros}
|
|
|
|
|
|
|
|
NASM also allows you to define single-line macros which are local to
|
|
|
|
a particular context, in just the same way:
|
|
|
|
|
|
|
|
\c %define %$localmac 3
|
|
|
|
|
|
|
|
will define the single-line macro \c{%$localmac} to be local to the
|
|
|
|
top context on the stack. Of course, after a subsequent \c{%push},
|
|
|
|
it can then still be accessed by the name \c{%$$localmac}.
|
|
|
|
|
|
|
|
\S{ctxrepl} \i\c{%repl}: \I{renaming contexts}Renaming a Context
|
|
|
|
|
|
|
|
If you need to change the name of the top context on the stack (in
|
|
|
|
order, for example, to have it respond differently to \c{%ifctx}),
|
|
|
|
you can execute a \c{%pop} followed by a \c{%push}; but this will
|
|
|
|
have the side effect of destroying all context-local labels and
|
|
|
|
macros associated with the context that was just popped.
|
|
|
|
|
|
|
|
NASM provides the directive \c{%repl}, which \e{replaces} a context
|
|
|
|
with a different name, without touching the associated macros and
|
|
|
|
labels. So you could replace the destructive code
|
|
|
|
|
|
|
|
\c %pop
|
|
|
|
\c %push newname
|
|
|
|
|
|
|
|
with the non-destructive version \c{%repl newname}.
|
|
|
|
|
|
|
|
\S{blockif} Example Use of the \i{Context Stack}: \i{Block IFs}
|
|
|
|
|
|
|
|
This example makes use of almost all the context-stack features,
|
|
|
|
including the conditional-assembly construct \i\c{%ifctx}, to
|
|
|
|
implement a block IF statement as a set of macros.
|
|
|
|
|
|
|
|
\c %macro if 1
|
|
|
|
\c %push if
|
|
|
|
\c j%-1 %$ifnot
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
\c %macro else 0
|
|
|
|
\c %ifctx if
|
|
|
|
\c %repl else
|
|
|
|
\c jmp %$ifend
|
|
|
|
\c %$ifnot:
|
|
|
|
\c %else
|
|
|
|
\c %error "expected `if' before `else'"
|
|
|
|
\c %endif
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
\c %macro endif 0
|
|
|
|
\c %ifctx if
|
|
|
|
\c %$ifnot:
|
|
|
|
\c %pop
|
|
|
|
\c %elifctx else
|
|
|
|
\c %$ifend:
|
|
|
|
\c %pop
|
|
|
|
\c %else
|
|
|
|
\c %error "expected `if' or `else' before `endif'"
|
|
|
|
\c %endif
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This code is more robust than the \c{REPEAT} and \c{UNTIL} macros
|
|
|
|
given in \k{ctxlocal}, because it uses conditional assembly to check
|
|
|
|
that the macros are issued in the right order (for example, not
|
|
|
|
calling \c{endif} before \c{if}) and issues a \c{%error} if they're
|
|
|
|
not.
|
|
|
|
|
|
|
|
In addition, the \c{endif} macro has to be able to cope with the two
|
|
|
|
distinct cases of either directly following an \c{if}, or following
|
|
|
|
an \c{else}. It achieves this, again, by using conditional assembly
|
|
|
|
to do different things depending on whether the context on top of
|
|
|
|
the stack is \c{if} or \c{else}.
|
|
|
|
|
|
|
|
The \c{else} macro has to preserve the context on the stack, in
|
|
|
|
order to have the \c{%$ifnot} referred to by the \c{if} macro be the
|
|
|
|
same as the one defined by the \c{endif} macro, but has to change
|
|
|
|
the context's name so that \c{endif} will know there was an
|
|
|
|
intervening \c{else}. It does this by the use of \c{%repl}.
|
|
|
|
|
|
|
|
A sample usage of these macros might look like:
|
|
|
|
|
|
|
|
\c cmp ax,bx
|
|
|
|
\c if ae
|
|
|
|
\c cmp bx,cx
|
|
|
|
\c if ae
|
|
|
|
\c mov ax,cx
|
|
|
|
\c else
|
|
|
|
\c mov ax,bx
|
|
|
|
\c endif
|
|
|
|
\c else
|
|
|
|
\c cmp ax,cx
|
|
|
|
\c if ae
|
|
|
|
\c mov ax,cx
|
|
|
|
\c endif
|
|
|
|
\c endif
|
|
|
|
|
|
|
|
The block-\c{IF} macros handle nesting quite happily, by means of
|
|
|
|
pushing another context, describing the inner \c{if}, on top of the
|
|
|
|
one describing the outer \c{if}; thus \c{else} and \c{endif} always
|
|
|
|
refer to the last unmatched \c{if} or \c{else}.
|
|
|
|
|
|
|
|
\H{stdmac} \i{Standard Macros}
|
|
|
|
|
|
|
|
NASM defines a set of standard macros, which are already defined
|
|
|
|
when it starts to process any source file. If you really need a
|
|
|
|
program to be assembled with no pre-defined macros, you can use the
|
|
|
|
\i\c{%clear} directive to empty the preprocessor of everything.
|
|
|
|
|
|
|
|
Most \i{user-level assembler directives} (see \k{directive}) are
|
|
|
|
implemented as macros which invoke primitive directives; these are
|
|
|
|
described in \k{directive}. The rest of the standard macro set is
|
|
|
|
described here.
|
|
|
|
|
|
|
|
\S{stdmacver} \i\c{__NASM_MAJOR__} and \i\c{__NASM_MINOR__}: \i{NASM
|
|
|
|
Version}
|
|
|
|
|
|
|
|
The single-line macros \c{__NASM_MAJOR__} and \c{__NASM_MINOR__}
|
|
|
|
expand to the major and minor parts of the \i{version number of
|
|
|
|
NASM} being used. So, under NASM 0.96 for example,
|
|
|
|
\c{__NASM_MAJOR__} would be defined to be 0 and \c{__NASM_MINOR__}
|
|
|
|
would be defined as 96.
|
|
|
|
|
|
|
|
\S{fileline} \i\c{__FILE__} and \i\c{__LINE__}: File Name and Line Number
|
|
|
|
|
|
|
|
Like the C preprocessor, NASM allows the user to find out the file
|
|
|
|
name and line number containing the current instruction. The macro
|
|
|
|
\c{__FILE__} expands to a string constant giving the name of the
|
|
|
|
current input file (which may change through the course of assembly
|
|
|
|
if \c{%include} directives are used), and \c{__LINE__} expands to a
|
|
|
|
numeric constant giving the current line number in the input file.
|
|
|
|
|
|
|
|
These macros could be used, for example, to communicate debugging
|
|
|
|
information to a macro, since invoking \c{__LINE__} inside a macro
|
|
|
|
definition (either single-line or multi-line) will return the line
|
|
|
|
number of the macro \e{call}, rather than \e{definition}. So to
|
|
|
|
determine where in a piece of code a crash is occurring, for
|
|
|
|
example, one could write a routine \c{stillhere}, which is passed a
|
|
|
|
line number in \c{EAX} and outputs something like `line 155: still
|
|
|
|
here'. You could then write a macro
|
|
|
|
|
|
|
|
\c %macro notdeadyet 0
|
|
|
|
\c push eax
|
|
|
|
\c mov eax,__LINE__
|
|
|
|
\c call stillhere
|
|
|
|
\c pop eax
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
and then pepper your code with calls to \c{notdeadyet} until you
|
|
|
|
find the crash point.
|
|
|
|
|
|
|
|
\S{struc} \i\c{STRUC} and \i\c{ENDSTRUC}: \i{Declaring Structure} Data Types
|
|
|
|
|
|
|
|
The core of NASM contains no intrinsic means of defining data
|
|
|
|
structures; instead, the preprocessor is sufficiently powerful that
|
|
|
|
data structures can be implemented as a set of macros. The macros
|
|
|
|
\c{STRUC} and \c{ENDSTRUC} are used to define a structure data type.
|
|
|
|
|
|
|
|
\c{STRUC} takes one parameter, which is the name of the data type.
|
|
|
|
This name is defined as a symbol with the value zero, and also has
|
|
|
|
the suffix \c{_size} appended to it and is then defined as an
|
|
|
|
\c{EQU} giving the size of the structure. Once \c{STRUC} has been
|
|
|
|
issued, you are defining the structure, and should define fields
|
|
|
|
using the \c{RESB} family of pseudo-instructions, and then invoke
|
|
|
|
\c{ENDSTRUC} to finish the definition.
|
|
|
|
|
|
|
|
For example, to define a structure called \c{mytype} containing a
|
|
|
|
longword, a word, a byte and a string of bytes, you might code
|
|
|
|
|
|
|
|
\c struc mytype
|
|
|
|
\c mt_long: resd 1
|
|
|
|
\c mt_word: resw 1
|
|
|
|
\c mt_byte: resb 1
|
|
|
|
\c mt_str: resb 32
|
|
|
|
\c endstruc
|
|
|
|
|
|
|
|
The above code defines six symbols: \c{mt_long} as 0 (the offset
|
|
|
|
from the beginning of a \c{mytype} structure to the longword field),
|
|
|
|
\c{mt_word} as 4, \c{mt_byte} as 6, \c{mt_str} as 7, \c{mytype_size}
|
|
|
|
as 39, and \c{mytype} itself as zero.
|
|
|
|
|
|
|
|
The reason why the structure type name is defined at zero is a side
|
|
|
|
effect of allowing structures to work with the local label
|
|
|
|
mechanism: if your structure members tend to have the same names in
|
|
|
|
more than one structure, you can define the above structure like this:
|
|
|
|
|
|
|
|
\c struc mytype
|
|
|
|
\c .long: resd 1
|
|
|
|
\c .word: resw 1
|
|
|
|
\c .byte: resb 1
|
|
|
|
\c .str: resb 32
|
|
|
|
\c endstruc
|
|
|
|
|
|
|
|
This defines the offsets to the structure fields as \c{mytype.long},
|
|
|
|
\c{mytype.word}, \c{mytype.byte} and \c{mytype.str}.
|
|
|
|
|
|
|
|
NASM, since it has no \e{intrinsic} structure support, does not
|
|
|
|
support any form of period notation to refer to the elements of a
|
|
|
|
structure once you have one (except the above local-label notation),
|
|
|
|
so code such as \c{mov ax,[mystruc.mt_word]} is not valid.
|
|
|
|
\c{mt_word} is a constant just like any other constant, so the
|
|
|
|
correct syntax is \c{mov ax,[mystruc+mt_word]} or \c{mov
|
|
|
|
ax,[mystruc+mytype.word]}.
|
|
|
|
|
|
|
|
\S{istruc} \i\c{ISTRUC}, \i\c{AT} and \i\c{IEND}: Declaring
|
|
|
|
\i{Instances of Structures}
|
|
|
|
|
|
|
|
Having defined a structure type, the next thing you typically want
|
|
|
|
to do is to declare instances of that structure in your data
|
|
|
|
segment. NASM provides an easy way to do this in the \c{ISTRUC}
|
|
|
|
mechanism. To declare a structure of type \c{mytype} in a program,
|
|
|
|
you code something like this:
|
|
|
|
|
|
|
|
\c mystruc: istruc mytype
|
|
|
|
\c at mt_long, dd 123456
|
|
|
|
\c at mt_word, dw 1024
|
|
|
|
\c at mt_byte, db 'x'
|
|
|
|
\c at mt_str, db 'hello, world', 13, 10, 0
|
|
|
|
\c iend
|
|
|
|
|
|
|
|
The function of the \c{AT} macro is to make use of the \c{TIMES}
|
|
|
|
prefix to advance the assembly position to the correct point for the
|
|
|
|
specified structure field, and then to declare the specified data.
|
|
|
|
Therefore the structure fields must be declared in the same order as
|
|
|
|
they were specified in the structure definition.
|
|
|
|
|
|
|
|
If the data to go in a structure field requires more than one source
|
|
|
|
line to specify, the remaining source lines can easily come after
|
|
|
|
the \c{AT} line. For example:
|
|
|
|
|
|
|
|
\c at mt_str, db 123,134,145,156,167,178,189
|
|
|
|
\c db 190,100,0
|
|
|
|
|
|
|
|
Depending on personal taste, you can also omit the code part of the
|
|
|
|
\c{AT} line completely, and start the structure field on the next
|
|
|
|
line:
|
|
|
|
|
|
|
|
\c at mt_str
|
|
|
|
\c db 'hello, world'
|
|
|
|
\c db 13,10,0
|
|
|
|
|
|
|
|
\S{align} \i\c{ALIGN} and \i\c{ALIGNB}: Data Alignment
|
|
|
|
|
|
|
|
The \c{ALIGN} and \c{ALIGNB} macros provides a convenient way to
|
|
|
|
align code or data on a word, longword, paragraph or other boundary.
|
|
|
|
(Some assemblers call this directive \i\c{EVEN}.) The syntax of the
|
|
|
|
\c{ALIGN} and \c{ALIGNB} macros is
|
|
|
|
|
|
|
|
\c align 4 ; align on 4-byte boundary
|
|
|
|
\c align 16 ; align on 16-byte boundary
|
|
|
|
\c align 8,db 0 ; pad with 0s rather than NOPs
|
|
|
|
\c align 4,resb 1 ; align to 4 in the BSS
|
|
|
|
\c alignb 4 ; equivalent to previous line
|
|
|
|
|
|
|
|
Both macros require their first argument to be a power of two; they
|
|
|
|
both compute the number of additional bytes required to bring the
|
|
|
|
length of the current section up to a multiple of that power of two,
|
|
|
|
and then apply the \c{TIMES} prefix to their second argument to
|
|
|
|
perform the alignment.
|
|
|
|
|
|
|
|
If the second argument is not specified, the default for \c{ALIGN}
|
|
|
|
is \c{NOP}, and the default for \c{ALIGNB} is \c{RESB 1}. So if the
|
|
|
|
second argument is specified, the two macros are equivalent.
|
|
|
|
Normally, you can just use \c{ALIGN} in code and data sections and
|
|
|
|
\c{ALIGNB} in BSS sections, and never need the second argument
|
|
|
|
except for special purposes.
|
|
|
|
|
|
|
|
\c{ALIGN} and \c{ALIGNB}, being simple macros, perform no error
|
|
|
|
checking: they cannot warn you if their first argument fails to be a
|
|
|
|
power of two, or if their second argument generates more than one
|
|
|
|
byte of code. In each of these cases they will silently do the wrong
|
|
|
|
thing.
|
|
|
|
|
|
|
|
\c{ALIGNB} (or \c{ALIGN} with a second argument of \c{RESB 1}) can
|
|
|
|
be used within structure definitions:
|
|
|
|
|
|
|
|
\c struc mytype2
|
|
|
|
\c mt_byte: resb 1
|
|
|
|
\c alignb 2
|
|
|
|
\c mt_word: resw 1
|
|
|
|
\c alignb 4
|
|
|
|
\c mt_long: resd 1
|
|
|
|
\c mt_str: resb 32
|
|
|
|
\c endstruc
|
|
|
|
|
|
|
|
This will ensure that the structure members are sensibly aligned
|
|
|
|
relative to the base of the structure.
|
|
|
|
|
|
|
|
A final caveat: \c{ALIGN} and \c{ALIGNB} work relative to the
|
|
|
|
beginning of the \e{section}, not the beginning of the address space
|
|
|
|
in the final executable. Aligning to a 16-byte boundary when the
|
|
|
|
section you're in is only guaranteed to be aligned to a 4-byte
|
|
|
|
boundary, for example, is a waste of effort. Again, NASM does not
|
|
|
|
check that the section's alignment characteristics are sensible for
|
|
|
|
the use of \c{ALIGN} or \c{ALIGNB}.
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\H{tasmcompat} \i{TASM Compatible Preprocessor Directives}
|
|
|
|
|
|
|
|
The following preprocessor directives may only be used when TASM
|
|
|
|
compatibility is turned on using the \c{-t} command line switch
|
|
|
|
(This switch is described in \k{opt-t}.)
|
|
|
|
|
|
|
|
\b\c{%arg} (see \k{arg})
|
|
|
|
|
|
|
|
\b\c{%stacksize} (see \k{stacksize})
|
|
|
|
|
|
|
|
\b\c{%local} (see \k{local})
|
|
|
|
|
|
|
|
\S{arg} \i\c{%arg} Directive
|
|
|
|
|
|
|
|
The \c{%arg} directive is used to simplify the handling of
|
|
|
|
parameters passed on the stack. Stack based parameter passing
|
|
|
|
is used by many high level languages, including C, C++ and Pascal.
|
|
|
|
|
|
|
|
While NASM comes with macros which attempt to duplicate this
|
|
|
|
functionality (see \k{16cmacro}), the syntax is not particularly
|
|
|
|
convenient to use and is not TASM compatible. Here is an example
|
|
|
|
which shows the use of \c{%arg} without any external macros:
|
|
|
|
|
|
|
|
\c some_function:
|
|
|
|
\c %push mycontext ; save the current context
|
|
|
|
\c %stacksize large ; tell NASM to use bp
|
|
|
|
\c %arg i:word, j_ptr:word
|
|
|
|
\c mov ax,[i]
|
|
|
|
\c mov bx,[j_ptr]
|
|
|
|
\c add ax,[bx]
|
|
|
|
\c ret
|
|
|
|
\c %pop ; restore original context
|
|
|
|
|
|
|
|
This is similar to the procedure defined in \k{16cmacro} and adds
|
|
|
|
the value in i to the value pointed to by j_ptr and returns the
|
|
|
|
sum in the ax register. See \k{pushpop} for an explanation of
|
|
|
|
\c{push} and \c{pop} and the use of context stacks.
|
|
|
|
|
|
|
|
\S{stacksize} \i\c{%stacksize} Directive
|
|
|
|
|
|
|
|
The \c{%stacksize} directive is used in conjunction with the
|
|
|
|
\c{%arg} (see \k{arg}) and the \c{%local} (see \k{local}) directives.
|
|
|
|
It tells NASM the default size to use for subsequent \c{%arg} and
|
|
|
|
\c{%local} directives. The \c{%stacksize} directive takes one
|
|
|
|
required argument which is one of \c{flat}, \c{large} or \c{small}.
|
|
|
|
|
|
|
|
\c %stacksize flat
|
|
|
|
|
|
|
|
This form causes NASM to use stack-based parameter addressing
|
|
|
|
relative to \c{ebp} and it assumes that a near form of call was used
|
|
|
|
to get to this label (i.e. that \c{eip} is on the stack).
|
|
|
|
|
|
|
|
\c %stacksize large
|
|
|
|
|
|
|
|
This form uses \c{bp} to do stack-based parameter addressing and
|
|
|
|
assumes that a far form of call was used to get to this address
|
|
|
|
(i.e. that \c{ip} and \c{cs} are on the stack).
|
|
|
|
|
|
|
|
\c %stacksize small
|
|
|
|
|
|
|
|
This form also uses \c{bp} to address stack parameters, but it is
|
|
|
|
different from \c{large} because it also assumes that the old value
|
|
|
|
of bp is pushed onto the stack (i.e. it expects an \c{ENTER}
|
|
|
|
instruction). In other words, it expects that \c{bp}, \c{ip} and
|
|
|
|
\c{cs} are on the top of the stack, underneath any local space which
|
|
|
|
may have been allocated by \c{ENTER}. This form is probably most
|
|
|
|
useful when used in combination with the \c{%local} directive
|
|
|
|
(see \k{local}).
|
|
|
|
|
|
|
|
\S{local} \i\c{%local} Directive
|
|
|
|
|
|
|
|
The \c{%local} directive is used to simplify the use of local
|
|
|
|
temporary stack variables allocated in a stack frame. Automatic
|
|
|
|
local variables in C are an example of this kind of variable. The
|
|
|
|
\c{%local} directive is most useful when used with the \c{%stacksize}
|
|
|
|
(see \k{stacksize} and is also compatible with the \c{%arg} directive
|
|
|
|
(see \k{arg}). It allows simplified reference to variables on the
|
|
|
|
stack which have been allocated typically by using the \c{ENTER}
|
|
|
|
instruction (see \k{insENTER} for a description of that instruction).
|
|
|
|
An example of its use is the following:
|
|
|
|
|
|
|
|
\c silly_swap:
|
|
|
|
\c %push mycontext ; save the current context
|
|
|
|
\c %stacksize small ; tell NASM to use bp
|
|
|
|
\c %assign %$localsize 0 ; see text for explanation
|
|
|
|
\c %local old_ax:word, old_dx:word
|
|
|
|
\c enter %$localsize,0 ; see text for explanation
|
|
|
|
\c mov [old_ax],ax ; swap ax & bx
|
|
|
|
\c mov [old_dx],dx ; and swap dx & cx
|
|
|
|
\c mov ax,bx
|
|
|
|
\c mov dx,cx
|
|
|
|
\c mov bx,[old_ax]
|
|
|
|
\c mov cx,[old_dx]
|
|
|
|
\c leave ; restore old bp
|
|
|
|
\c ret ;
|
|
|
|
\c %pop ; restore original context
|
|
|
|
|
|
|
|
The \c{%$localsize} variable is used internally by the
|
|
|
|
\c{%local} directive and \e{must} be defined within the
|
|
|
|
current context before the \c{%local} directive may be used.
|
|
|
|
Failure to do so will result in one expression syntax error for
|
|
|
|
each \c{%local} variable declared. It then may be used in
|
|
|
|
the construction of an appropriately sized ENTER instruction
|
|
|
|
as shown in the example.
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\C{directive} \i{Assembler Directives}
|
|
|
|
|
|
|
|
NASM, though it attempts to avoid the bureaucracy of assemblers like
|
|
|
|
MASM and TASM, is nevertheless forced to support a \e{few}
|
|
|
|
directives. These are described in this chapter.
|
|
|
|
|
|
|
|
NASM's directives come in two types: \i{user-level
|
|
|
|
directives}\e{user-level} directives and \i{primitive
|
|
|
|
directives}\e{primitive} directives. Typically, each directive has a
|
|
|
|
user-level form and a primitive form. In almost all cases, we
|
|
|
|
recommend that users use the user-level forms of the directives,
|
|
|
|
which are implemented as macros which call the primitive forms.
|
|
|
|
|
|
|
|
Primitive directives are enclosed in square brackets; user-level
|
|
|
|
directives are not.
|
|
|
|
|
|
|
|
In addition to the universal directives described in this chapter,
|
|
|
|
each object file format can optionally supply extra directives in
|
|
|
|
order to control particular features of that file format. These
|
|
|
|
\i{format-specific directives}\e{format-specific} directives are
|
|
|
|
documented along with the formats that implement them, in \k{outfmt}.
|
|
|
|
|
|
|
|
\H{bits} \i\c{BITS}: Specifying Target \i{Processor Mode}
|
|
|
|
|
|
|
|
The \c{BITS} directive specifies whether NASM should generate code
|
|
|
|
\I{16-bit mode, versus 32-bit mode}designed to run on a processor
|
|
|
|
operating in 16-bit mode, or code designed to run on a processor
|
|
|
|
operating in 32-bit mode. The syntax is \c{BITS 16} or \c{BITS 32}.
|
|
|
|
|
|
|
|
In most cases, you should not need to use \c{BITS} explicitly. The
|
|
|
|
\c{aout}, \c{coff}, \c{elf} and \c{win32} object formats, which are
|
|
|
|
designed for use in 32-bit operating systems, all cause NASM to
|
|
|
|
select 32-bit mode by default. The \c{obj} object format allows you
|
|
|
|
to specify each segment you define as either \c{USE16} or \c{USE32},
|
|
|
|
and NASM will set its operating mode accordingly, so the use of the
|
|
|
|
\c{BITS} directive is once again unnecessary.
|
|
|
|
|
|
|
|
The most likely reason for using the \c{BITS} directive is to write
|
|
|
|
32-bit code in a flat binary file; this is because the \c{bin}
|
|
|
|
output format defaults to 16-bit mode in anticipation of it being
|
|
|
|
used most frequently to write DOS \c{.COM} programs, DOS \c{.SYS}
|
|
|
|
device drivers and boot loader software.
|
|
|
|
|
|
|
|
You do \e{not} need to specify \c{BITS 32} merely in order to use
|
|
|
|
32-bit instructions in a 16-bit DOS program; if you do, the
|
|
|
|
assembler will generate incorrect code because it will be writing
|
|
|
|
code targeted at a 32-bit platform, to be run on a 16-bit one.
|
|
|
|
|
|
|
|
When NASM is in \c{BITS 16} state, instructions which use 32-bit
|
|
|
|
data are prefixed with an 0x66 byte, and those referring to 32-bit
|
|
|
|
addresses have an 0x67 prefix. In \c{BITS 32} state, the reverse is
|
|
|
|
true: 32-bit instructions require no prefixes, whereas instructions
|
|
|
|
using 16-bit data need an 0x66 and those working in 16-bit addresses
|
|
|
|
need an 0x67.
|
|
|
|
|
|
|
|
The \c{BITS} directive has an exactly equivalent primitive form,
|
|
|
|
\c{[BITS 16]} and \c{[BITS 32]}. The user-level form is a macro
|
|
|
|
which has no function other than to call the primitive form.
|
|
|
|
|
|
|
|
\H{section} \i\c{SECTION} or \i\c{SEGMENT}: Changing and \i{Defining
|
|
|
|
Sections}
|
|
|
|
|
|
|
|
\I{changing sections}\I{switching between sections}The \c{SECTION}
|
|
|
|
directive (\c{SEGMENT} is an exactly equivalent synonym) changes
|
|
|
|
which section of the output file the code you write will be
|
|
|
|
assembled into. In some object file formats, the number and names of
|
|
|
|
sections are fixed; in others, the user may make up as many as they
|
|
|
|
wish. Hence \c{SECTION} may sometimes give an error message, or may
|
|
|
|
define a new section, if you try to switch to a section that does
|
|
|
|
not (yet) exist.
|
|
|
|
|
|
|
|
The Unix object formats, and the \c{bin} object format, all support
|
|
|
|
the \i{standardised section names} \c{.text}, \c{.data} and \c{.bss}
|
|
|
|
for the code, data and uninitialised-data sections. The \c{obj}
|
|
|
|
format, by contrast, does not recognise these section names as being
|
|
|
|
special, and indeed will strip off the leading period of any section
|
|
|
|
name that has one.
|
|
|
|
|
|
|
|
\S{sectmac} The \i\c{__SECT__} Macro
|
|
|
|
|
|
|
|
The \c{SECTION} directive is unusual in that its user-level form
|
|
|
|
functions differently from its primitive form. The primitive form,
|
|
|
|
\c{[SECTION xyz]}, simply switches the current target section to the
|
|
|
|
one given. The user-level form, \c{SECTION xyz}, however, first
|
|
|
|
defines the single-line macro \c{__SECT__} to be the primitive
|
|
|
|
\c{[SECTION]} directive which it is about to issue, and then issues
|
|
|
|
it. So the user-level directive
|
|
|
|
|
|
|
|
\c SECTION .text
|
|
|
|
|
|
|
|
expands to the two lines
|
|
|
|
|
|
|
|
\c %define __SECT__ [SECTION .text]
|
|
|
|
\c [SECTION .text]
|
|
|
|
|
|
|
|
Users may find it useful to make use of this in their own macros.
|
|
|
|
For example, the \c{writefile} macro defined in \k{mlmacgre} can be
|
|
|
|
usefully rewritten in the following more sophisticated form:
|
|
|
|
|
|
|
|
\c %macro writefile 2+
|
|
|
|
\c [section .data]
|
|
|
|
\c %%str: db %2
|
|
|
|
\c %%endstr:
|
|
|
|
\c __SECT__
|
|
|
|
\c mov dx,%%str
|
|
|
|
\c mov cx,%%endstr-%%str
|
|
|
|
\c mov bx,%1
|
|
|
|
\c mov ah,0x40
|
|
|
|
\c int 0x21
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
This form of the macro, once passed a string to output, first
|
|
|
|
switches temporarily to the data section of the file, using the
|
|
|
|
primitive form of the \c{SECTION} directive so as not to modify
|
|
|
|
\c{__SECT__}. It then declares its string in the data section, and
|
|
|
|
then invokes \c{__SECT__} to switch back to \e{whichever} section
|
|
|
|
the user was previously working in. It thus avoids the need, in the
|
|
|
|
previous version of the macro, to include a \c{JMP} instruction to
|
|
|
|
jump over the data, and also does not fail if, in a complicated
|
|
|
|
\c{OBJ} format module, the user could potentially be assembling the
|
|
|
|
code in any of several separate code sections.
|
|
|
|
|
|
|
|
\H{absolute} \i\c{ABSOLUTE}: Defining Absolute Labels
|
|
|
|
|
|
|
|
The \c{ABSOLUTE} directive can be thought of as an alternative form
|
|
|
|
of \c{SECTION}: it causes the subsequent code to be directed at no
|
|
|
|
physical section, but at the hypothetical section starting at the
|
|
|
|
given absolute address. The only instructions you can use in this
|
|
|
|
mode are the \c{RESB} family.
|
|
|
|
|
|
|
|
\c{ABSOLUTE} is used as follows:
|
|
|
|
|
|
|
|
\c absolute 0x1A
|
|
|
|
\c kbuf_chr resw 1
|
|
|
|
\c kbuf_free resw 1
|
|
|
|
\c kbuf resw 16
|
|
|
|
|
|
|
|
This example describes a section of the PC BIOS data area, at
|
|
|
|
segment address 0x40: the above code defines \c{kbuf_chr} to be
|
|
|
|
0x1A, \c{kbuf_free} to be 0x1C, and \c{kbuf} to be 0x1E.
|
|
|
|
|
|
|
|
The user-level form of \c{ABSOLUTE}, like that of \c{SECTION},
|
|
|
|
redefines the \i\c{__SECT__} macro when it is invoked.
|
|
|
|
|
|
|
|
\i\c{STRUC} and \i\c{ENDSTRUC} are defined as macros which use
|
|
|
|
\c{ABSOLUTE} (and also \c{__SECT__}).
|
|
|
|
|
|
|
|
\c{ABSOLUTE} doesn't have to take an absolute constant as an
|
|
|
|
argument: it can take an expression (actually, a \i{critical
|
|
|
|
expression}: see \k{crit}) and it can be a value in a segment. For
|
|
|
|
example, a TSR can re-use its setup code as run-time BSS like this:
|
|
|
|
|
|
|
|
\c org 100h ; it's a .COM program
|
|
|
|
\c jmp setup ; setup code comes last
|
|
|
|
\c ; the resident part of the TSR goes here
|
|
|
|
\c setup: ; now write the code that installs the TSR here
|
|
|
|
\c absolute setup
|
|
|
|
\c runtimevar1 resw 1
|
|
|
|
\c runtimevar2 resd 20
|
|
|
|
\c tsr_end:
|
|
|
|
|
|
|
|
This defines some variables `on top of' the setup code, so that
|
|
|
|
after the setup has finished running, the space it took up can be
|
|
|
|
re-used as data storage for the running TSR. The symbol `tsr_end'
|
|
|
|
can be used to calculate the total size of the part of the TSR that
|
|
|
|
needs to be made resident.
|
|
|
|
|
|
|
|
\H{extern} \i\c{EXTERN}: \i{Importing Symbols} from Other Modules
|
|
|
|
|
|
|
|
\c{EXTERN} is similar to the MASM directive \c{EXTRN} and the C
|
|
|
|
keyword \c{extern}: it is used to declare a symbol which is not
|
|
|
|
defined anywhere in the module being assembled, but is assumed to be
|
|
|
|
defined in some other module and needs to be referred to by this
|
|
|
|
one. Not every object-file format can support external variables:
|
|
|
|
the \c{bin} format cannot.
|
|
|
|
|
|
|
|
The \c{EXTERN} directive takes as many arguments as you like. Each
|
|
|
|
argument is the name of a symbol:
|
|
|
|
|
|
|
|
\c extern _printf
|
|
|
|
\c extern _sscanf,_fscanf
|
|
|
|
|
|
|
|
Some object-file formats provide extra features to the \c{EXTERN}
|
|
|
|
directive. In all cases, the extra features are used by suffixing a
|
|
|
|
colon to the symbol name followed by object-format specific text.
|
|
|
|
For example, the \c{obj} format allows you to declare that the
|
|
|
|
default segment base of an external should be the group \c{dgroup}
|
|
|
|
by means of the directive
|
|
|
|
|
|
|
|
\c extern _variable:wrt dgroup
|
|
|
|
|
|
|
|
The primitive form of \c{EXTERN} differs from the user-level form
|
|
|
|
only in that it can take only one argument at a time: the support
|
|
|
|
for multiple arguments is implemented at the preprocessor level.
|
|
|
|
|
|
|
|
You can declare the same variable as \c{EXTERN} more than once: NASM
|
|
|
|
will quietly ignore the second and later redeclarations. You can't
|
|
|
|
declare a variable as \c{EXTERN} as well as something else, though.
|
|
|
|
|
|
|
|
\H{global} \i\c{GLOBAL}: \i{Exporting Symbols} to Other Modules
|
|
|
|
|
|
|
|
\c{GLOBAL} is the other end of \c{EXTERN}: if one module declares a
|
|
|
|
symbol as \c{EXTERN} and refers to it, then in order to prevent
|
|
|
|
linker errors, some other module must actually \e{define} the
|
|
|
|
symbol and declare it as \c{GLOBAL}. Some assemblers use the name
|
|
|
|
\i\c{PUBLIC} for this purpose.
|
|
|
|
|
|
|
|
The \c{GLOBAL} directive applying to a symbol must appear \e{before}
|
|
|
|
the definition of the symbol.
|
|
|
|
|
|
|
|
\c{GLOBAL} uses the same syntax as \c{EXTERN}, except that it must
|
|
|
|
refer to symbols which \e{are} defined in the same module as the
|
|
|
|
\c{GLOBAL} directive. For example:
|
|
|
|
|
|
|
|
\c global _main
|
|
|
|
\c _main: ; some code
|
|
|
|
|
|
|
|
\c{GLOBAL}, like \c{EXTERN}, allows object formats to define private
|
|
|
|
extensions by means of a colon. The \c{elf} object format, for
|
|
|
|
example, lets you specify whether global data items are functions or
|
|
|
|
data:
|
|
|
|
|
|
|
|
\c global hashlookup:function, hashtable:data
|
|
|
|
|
|
|
|
Like \c{EXTERN}, the primitive form of \c{GLOBAL} differs from the
|
|
|
|
user-level form only in that it can take only one argument at a
|
|
|
|
time.
|
|
|
|
|
|
|
|
\H{common} \i\c{COMMON}: Defining Common Data Areas
|
|
|
|
|
|
|
|
The \c{COMMON} directive is used to declare \i\e{common variables}.
|
|
|
|
A common variable is much like a global variable declared in the
|
|
|
|
uninitialised data section, so that
|
|
|
|
|
|
|
|
\c common intvar 4
|
|
|
|
|
|
|
|
is similar in function to
|
|
|
|
|
|
|
|
\c global intvar
|
|
|
|
\c section .bss
|
|
|
|
\c intvar resd 1
|
|
|
|
|
|
|
|
The difference is that if more than one module defines the same
|
|
|
|
common variable, then at link time those variables will be
|
|
|
|
\e{merged}, and references to \c{intvar} in all modules will point
|
|
|
|
at the same piece of memory.
|
|
|
|
|
|
|
|
Like \c{GLOBAL} and \c{EXTERN}, \c{COMMON} supports object-format
|
|
|
|
specific extensions. For example, the \c{obj} format allows common
|
|
|
|
variables to be NEAR or FAR, and the \c{elf} format allows you to
|
|
|
|
specify the alignment requirements of a common variable:
|
|
|
|
|
|
|
|
\c common commvar 4:near ; works in OBJ
|
|
|
|
\c common intarray 100:4 ; works in ELF: 4 byte aligned
|
|
|
|
|
|
|
|
Once again, like \c{EXTERN} and \c{GLOBAL}, the primitive form of
|
|
|
|
\c{COMMON} differs from the user-level form only in that it can take
|
|
|
|
only one argument at a time.
|
|
|
|
|
|
|
|
\C{outfmt} \i{Output Formats}
|
|
|
|
|
|
|
|
NASM is a portable assembler, designed to be able to compile on any
|
|
|
|
ANSI C-supporting platform and produce output to run on a variety of
|
|
|
|
Intel x86 operating systems. For this reason, it has a large number
|
|
|
|
of available output formats, selected using the \i\c{-f} option on
|
|
|
|
the NASM \i{command line}. Each of these formats, along with its
|
|
|
|
extensions to the base NASM syntax, is detailed in this chapter.
|
|
|
|
|
|
|
|
As stated in \k{opt-o}, NASM chooses a \i{default name} for your
|
|
|
|
output file based on the input file name and the chosen output
|
|
|
|
format. This will be generated by removing the \i{extension}
|
|
|
|
(\c{.asm}, \c{.s}, or whatever you like to use) from the input file
|
|
|
|
name, and substituting an extension defined by the output format.
|
|
|
|
The extensions are given with each format below.
|
|
|
|
|
|
|
|
\H{binfmt} \i\c{bin}: \i{Flat-Form Binary}\I{pure binary} Output
|
|
|
|
|
|
|
|
The \c{bin} format does not produce object files: it generates
|
|
|
|
nothing in the output file except the code you wrote. Such `pure
|
|
|
|
binary' files are used by \i{MS-DOS}: \i\c{.COM} executables and
|
|
|
|
\i\c{.SYS} device drivers are pure binary files. Pure binary output
|
|
|
|
is also useful for \i{operating-system} and \i{boot loader}
|
|
|
|
development.
|
|
|
|
|
|
|
|
\c{bin} supports the three \i{standardised section names} \i\c{.text},
|
|
|
|
\i\c{.data} and \i\c{.bss} only. The file NASM outputs will contain the
|
|
|
|
contents of the \c{.text} section first, followed by the contents of
|
|
|
|
the \c{.data} section, aligned on a four-byte boundary. The \c{.bss}
|
|
|
|
section is not stored in the output file at all, but is assumed to
|
|
|
|
appear directly after the end of the \c{.data} section, again
|
|
|
|
aligned on a four-byte boundary.
|
|
|
|
|
|
|
|
If you specify no explicit \c{SECTION} directive, the code you write
|
|
|
|
will be directed by default into the \c{.text} section.
|
|
|
|
|
|
|
|
Using the \c{bin} format puts NASM by default into 16-bit mode (see
|
|
|
|
\k{bits}). In order to use \c{bin} to write 32-bit code such as an
|
|
|
|
OS kernel, you need to explicitly issue the \I\c{BITS}\c{BITS 32}
|
|
|
|
directive.
|
|
|
|
|
|
|
|
\c{bin} has no default output file name extension: instead, it
|
|
|
|
leaves your file name as it is once the original extension has been
|
|
|
|
removed. Thus, the default is for NASM to assemble \c{binprog.asm}
|
|
|
|
into a binary file called \c{binprog}.
|
|
|
|
|
|
|
|
\S{org} \i\c{ORG}: Binary File \i{Program Origin}
|
|
|
|
|
|
|
|
The \c{bin} format provides an additional directive to the list
|
|
|
|
given in \k{directive}: \c{ORG}. The function of the \c{ORG}
|
|
|
|
directive is to specify the origin address which NASM will assume
|
|
|
|
the program begins at when it is loaded into memory.
|
|
|
|
|
|
|
|
For example, the following code will generate the longword
|
|
|
|
\c{0x00000104}:
|
|
|
|
|
|
|
|
\c org 0x100
|
|
|
|
\c dd label
|
|
|
|
\c label:
|
|
|
|
|
|
|
|
Unlike the \c{ORG} directive provided by MASM-compatible assemblers,
|
|
|
|
which allows you to jump around in the object file and overwrite
|
|
|
|
code you have already generated, NASM's \c{ORG} does exactly what
|
|
|
|
the directive says: \e{origin}. Its sole function is to specify one
|
|
|
|
offset which is added to all internal address references within the
|
|
|
|
file; it does not permit any of the trickery that MASM's version
|
|
|
|
does. See \k{proborg} for further comments.
|
|
|
|
|
|
|
|
\S{binseg} \c{bin} Extensions to the \c{SECTION}
|
|
|
|
Directive\I{SECTION, bin extensions to}
|
|
|
|
|
|
|
|
The \c{bin} output format extends the \c{SECTION} (or \c{SEGMENT})
|
|
|
|
directive to allow you to specify the alignment requirements of
|
|
|
|
segments. This is done by appending the \i\c{ALIGN} qualifier to the
|
|
|
|
end of the section-definition line. For example,
|
|
|
|
|
|
|
|
\c section .data align=16
|
|
|
|
|
|
|
|
switches to the section \c{.data} and also specifies that it must be
|
|
|
|
aligned on a 16-byte boundary.
|
|
|
|
|
|
|
|
The parameter to \c{ALIGN} specifies how many low bits of the
|
|
|
|
section start address must be forced to zero. The alignment value
|
|
|
|
given may be any power of two.\I{section alignment, in
|
|
|
|
bin}\I{segment alignment, in bin}\I{alignment, in bin sections}
|
|
|
|
|
|
|
|
\H{objfmt} \i\c{obj}: \i{Microsoft OMF}\I{OMF} Object Files
|
|
|
|
|
|
|
|
The \c{obj} file format (NASM calls it \c{obj} rather than \c{omf}
|
|
|
|
for historical reasons) is the one produced by \i{MASM} and
|
|
|
|
\i{TASM}, which is typically fed to 16-bit DOS linkers to produce
|
|
|
|
\i\c{.EXE} files. It is also the format used by \i{OS/2}.
|
|
|
|
|
|
|
|
\c{obj} provides a default output file-name extension of \c{.obj}.
|
|
|
|
|
|
|
|
\c{obj} is not exclusively a 16-bit format, though: NASM has full
|
|
|
|
support for the 32-bit extensions to the format. In particular,
|
|
|
|
32-bit \c{obj} format files are used by \i{Borland's Win32
|
|
|
|
compilers}, instead of using Microsoft's newer \i\c{win32} object
|
|
|
|
file format.
|
|
|
|
|
|
|
|
The \c{obj} format does not define any special segment names: you
|
|
|
|
can call your segments anything you like. Typical names for segments
|
|
|
|
in \c{obj} format files are \c{CODE}, \c{DATA} and \c{BSS}.
|
|
|
|
|
|
|
|
If your source file contains code before specifying an explicit
|
|
|
|
\c{SEGMENT} directive, then NASM will invent its own segment called
|
|
|
|
\i\c{__NASMDEFSEG} for you.
|
|
|
|
|
|
|
|
When you define a segment in an \c{obj} file, NASM defines the
|
|
|
|
segment name as a symbol as well, so that you can access the segment
|
|
|
|
address of the segment. So, for example:
|
|
|
|
|
|
|
|
\c segment data
|
|
|
|
\c dvar: dw 1234
|
|
|
|
\c segment code
|
|
|
|
\c function: mov ax,data ; get segment address of data
|
|
|
|
\c mov ds,ax ; and move it into DS
|
|
|
|
\c inc word [dvar] ; now this reference will work
|
|
|
|
\c ret
|
|
|
|
|
|
|
|
The \c{obj} format also enables the use of the \i\c{SEG} and
|
|
|
|
\i\c{WRT} operators, so that you can write code which does things
|
|
|
|
like
|
|
|
|
|
|
|
|
\c extern foo
|
|
|
|
\c mov ax,seg foo ; get preferred segment of foo
|
|
|
|
\c mov ds,ax
|
|
|
|
\c mov ax,data ; a different segment
|
|
|
|
\c mov es,ax
|
|
|
|
\c mov ax,[ds:foo] ; this accesses `foo'
|
|
|
|
\c mov [es:foo wrt data],bx ; so does this
|
|
|
|
|
|
|
|
\S{objseg} \c{obj} Extensions to the \c{SEGMENT}
|
|
|
|
Directive\I{SEGMENT, obj extensions to}
|
|
|
|
|
|
|
|
The \c{obj} output format extends the \c{SEGMENT} (or \c{SECTION})
|
|
|
|
directive to allow you to specify various properties of the segment
|
|
|
|
you are defining. This is done by appending extra qualifiers to the
|
|
|
|
end of the segment-definition line. For example,
|
|
|
|
|
|
|
|
\c segment code private align=16
|
|
|
|
|
|
|
|
defines the segment \c{code}, but also declares it to be a private
|
|
|
|
segment, and requires that the portion of it described in this code
|
|
|
|
module must be aligned on a 16-byte boundary.
|
|
|
|
|
|
|
|
The available qualifiers are:
|
|
|
|
|
|
|
|
\b \i\c{PRIVATE}, \i\c{PUBLIC}, \i\c{COMMON} and \i\c{STACK} specify
|
|
|
|
the combination characteristics of the segment. \c{PRIVATE} segments
|
|
|
|
do not get combined with any others by the linker; \c{PUBLIC} and
|
|
|
|
\c{STACK} segments get concatenated together at link time; and
|
|
|
|
\c{COMMON} segments all get overlaid on top of each other rather
|
|
|
|
than stuck end-to-end.
|
|
|
|
|
|
|
|
\b \i\c{ALIGN} is used, as shown above, to specify how many low bits
|
|
|
|
of the segment start address must be forced to zero. The alignment
|
|
|
|
value given may be any power of two from 1 to 4096; in reality, the
|
|
|
|
only values supported are 1, 2, 4, 16, 256 and 4096, so if 8 is
|
|
|
|
specified it will be rounded up to 16, and 32, 64 and 128 will all
|
|
|
|
be rounded up to 256, and so on. Note that alignment to 4096-byte
|
|
|
|
boundaries is a \i{PharLap} extension to the format and may not be
|
|
|
|
supported by all linkers.\I{section alignment, in OBJ}\I{segment
|
|
|
|
alignment, in OBJ}\I{alignment, in OBJ sections}
|
|
|
|
|
|
|
|
\b \i\c{CLASS} can be used to specify the segment class; this feature
|
|
|
|
indicates to the linker that segments of the same class should be
|
|
|
|
placed near each other in the output file. The class name can be any
|
|
|
|
word, e.g. \c{CLASS=CODE}.
|
|
|
|
|
|
|
|
\b \i\c{OVERLAY}, like \c{CLASS}, is specified with an arbitrary word
|
|
|
|
as an argument, and provides overlay information to an
|
|
|
|
overlay-capable linker.
|
|
|
|
|
|
|
|
\b Segments can be declared as \i\c{USE16} or \i\c{USE32}, which has
|
|
|
|
the effect of recording the choice in the object file and also
|
|
|
|
ensuring that NASM's default assembly mode when assembling in that
|
|
|
|
segment is 16-bit or 32-bit respectively.
|
|
|
|
|
|
|
|
\b When writing \i{OS/2} object files, you should declare 32-bit
|
|
|
|
segments as \i\c{FLAT}, which causes the default segment base for
|
|
|
|
anything in the segment to be the special group \c{FLAT}, and also
|
|
|
|
defines the group if it is not already defined.
|
|
|
|
|
|
|
|
\b The \c{obj} file format also allows segments to be declared as
|
|
|
|
having a pre-defined absolute segment address, although no linkers
|
|
|
|
are currently known to make sensible use of this feature;
|
|
|
|
nevertheless, NASM allows you to declare a segment such as
|
|
|
|
\c{SEGMENT SCREEN ABSOLUTE=0xB800} if you need to. The \i\c{ABSOLUTE}
|
|
|
|
and \c{ALIGN} keywords are mutually exclusive.
|
|
|
|
|
|
|
|
NASM's default segment attributes are \c{PUBLIC}, \c{ALIGN=1}, no
|
|
|
|
class, no overlay, and \c{USE16}.
|
|
|
|
|
|
|
|
\S{group} \i\c{GROUP}: Defining Groups of Segments\I{segments, groups of}
|
|
|
|
|
|
|
|
The \c{obj} format also allows segments to be grouped, so that a
|
|
|
|
single segment register can be used to refer to all the segments in
|
|
|
|
a group. NASM therefore supplies the \c{GROUP} directive, whereby
|
|
|
|
you can code
|
|
|
|
|
|
|
|
\c segment data
|
|
|
|
\c ; some data
|
|
|
|
\c segment bss
|
|
|
|
\c ; some uninitialised data
|
|
|
|
\c group dgroup data bss
|
|
|
|
|
|
|
|
which will define a group called \c{dgroup} to contain the segments
|
|
|
|
\c{data} and \c{bss}. Like \c{SEGMENT}, \c{GROUP} causes the group
|
|
|
|
name to be defined as a symbol, so that you can refer to a variable
|
|
|
|
\c{var} in the \c{data} segment as \c{var wrt data} or as \c{var wrt
|
|
|
|
dgroup}, depending on which segment value is currently in your
|
|
|
|
segment register.
|
|
|
|
|
|
|
|
If you just refer to \c{var}, however, and \c{var} is declared in a
|
|
|
|
segment which is part of a group, then NASM will default to giving
|
|
|
|
you the offset of \c{var} from the beginning of the \e{group}, not
|
|
|
|
the \e{segment}. Therefore \c{SEG var}, also, will return the group
|
|
|
|
base rather than the segment base.
|
|
|
|
|
|
|
|
NASM will allow a segment to be part of more than one group, but
|
|
|
|
will generate a warning if you do this. Variables declared in a
|
|
|
|
segment which is part of more than one group will default to being
|
|
|
|
relative to the first group that was defined to contain the segment.
|
|
|
|
|
|
|
|
A group does not have to contain any segments; you can still make
|
|
|
|
\c{WRT} references to a group which does not contain the variable
|
|
|
|
you are referring to. OS/2, for example, defines the special group
|
|
|
|
\c{FLAT} with no segments in it.
|
|
|
|
|
|
|
|
\S{uppercase} \i\c{UPPERCASE}: Disabling Case Sensitivity in Output
|
|
|
|
|
|
|
|
Although NASM itself is \i{case sensitive}, some OMF linkers are
|
|
|
|
not; therefore it can be useful for NASM to output single-case
|
|
|
|
object files. The \c{UPPERCASE} format-specific directive causes all
|
|
|
|
segment, group and symbol names that are written to the object file
|
|
|
|
to be forced to upper case just before being written. Within a
|
|
|
|
source file, NASM is still case-sensitive; but the object file can
|
|
|
|
be written entirely in upper case if desired.
|
|
|
|
|
|
|
|
\c{UPPERCASE} is used alone on a line; it requires no parameters.
|
|
|
|
|
|
|
|
\S{import} \i\c{IMPORT}: Importing DLL Symbols\I{DLL symbols,
|
|
|
|
importing}\I{symbols, importing from DLLs}
|
|
|
|
|
|
|
|
The \c{IMPORT} format-specific directive defines a symbol to be
|
|
|
|
imported from a DLL, for use if you are writing a DLL's \i{import
|
|
|
|
library} in NASM. You still need to declare the symbol as \c{EXTERN}
|
|
|
|
as well as using the \c{IMPORT} directive.
|
|
|
|
|
|
|
|
The \c{IMPORT} directive takes two required parameters, separated by
|
|
|
|
white space, which are (respectively) the name of the symbol you
|
|
|
|
wish to import and the name of the library you wish to import it
|
|
|
|
from. For example:
|
|
|
|
|
|
|
|
\c import WSAStartup wsock32.dll
|
|
|
|
|
|
|
|
A third optional parameter gives the name by which the symbol is
|
|
|
|
known in the library you are importing it from, in case this is not
|
|
|
|
the same as the name you wish the symbol to be known by to your code
|
|
|
|
once you have imported it. For example:
|
|
|
|
|
|
|
|
\c import asyncsel wsock32.dll WSAAsyncSelect
|
|
|
|
|
|
|
|
\S{export} \i\c{EXPORT}: Exporting DLL Symbols\I{DLL symbols,
|
|
|
|
exporting}\I{symbols, exporting from DLLs}
|
|
|
|
|
|
|
|
The \c{EXPORT} format-specific directive defines a global symbol to
|
|
|
|
be exported as a DLL symbol, for use if you are writing a DLL in
|
|
|
|
NASM. You still need to declare the symbol as \c{GLOBAL} as well as
|
|
|
|
using the \c{EXPORT} directive.
|
|
|
|
|
|
|
|
\c{EXPORT} takes one required parameter, which is the name of the
|
|
|
|
symbol you wish to export, as it was defined in your source file. An
|
|
|
|
optional second parameter (separated by white space from the first)
|
|
|
|
gives the \e{external} name of the symbol: the name by which you
|
|
|
|
wish the symbol to be known to programs using the DLL. If this name
|
|
|
|
is the same as the internal name, you may leave the second parameter
|
|
|
|
off.
|
|
|
|
|
|
|
|
Further parameters can be given to define attributes of the exported
|
|
|
|
symbol. These parameters, like the second, are separated by white
|
|
|
|
space. If further parameters are given, the external name must also
|
|
|
|
be specified, even if it is the same as the internal name. The
|
|
|
|
available attributes are:
|
|
|
|
|
|
|
|
\b \c{resident} indicates that the exported name is to be kept
|
|
|
|
resident by the system loader. This is an optimisation for
|
|
|
|
frequently used symbols imported by name.
|
|
|
|
|
|
|
|
\b \c{nodata} indicates that the exported symbol is a function which
|
|
|
|
does not make use of any initialised data.
|
|
|
|
|
|
|
|
\b \c{parm=NNN}, where \c{NNN} is an integer, sets the number of
|
|
|
|
parameter words for the case in which the symbol is a call gate
|
|
|
|
between 32-bit and 16-bit segments.
|
|
|
|
|
|
|
|
\b An attribute which is just a number indicates that the symbol
|
|
|
|
should be exported with an identifying number (ordinal), and gives
|
|
|
|
the desired number.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
|
|
|
|
\c export myfunc
|
|
|
|
\c export myfunc TheRealMoreFormalLookingFunctionName
|
|
|
|
\c export myfunc myfunc 1234 ; export by ordinal
|
|
|
|
\c export myfunc myfunc resident parm=23 nodata
|
|
|
|
|
|
|
|
\S{dotdotstart} \i\c{..start}: Defining the \i{Program Entry
|
|
|
|
Point}
|
|
|
|
|
|
|
|
OMF linkers require exactly one of the object files being linked to
|
|
|
|
define the program entry point, where execution will begin when the
|
|
|
|
program is run. If the object file that defines the entry point is
|
|
|
|
assembled using NASM, you specify the entry point by declaring the
|
|
|
|
special symbol \c{..start} at the point where you wish execution to
|
|
|
|
begin.
|
|
|
|
|
|
|
|
\S{objextern} \c{obj} Extensions to the \c{EXTERN}
|
|
|
|
Directive\I{EXTERN, obj extensions to}
|
|
|
|
|
|
|
|
If you declare an external symbol with the directive
|
|
|
|
|
|
|
|
\c extern foo
|
|
|
|
|
|
|
|
then references such as \c{mov ax,foo} will give you the offset of
|
|
|
|
\c{foo} from its preferred segment base (as specified in whichever
|
|
|
|
module \c{foo} is actually defined in). So to access the contents of
|
|
|
|
\c{foo} you will usually need to do something like
|
|
|
|
|
|
|
|
\c mov ax,seg foo ; get preferred segment base
|
|
|
|
\c mov es,ax ; move it into ES
|
|
|
|
\c mov ax,[es:foo] ; and use offset `foo' from it
|
|
|
|
|
|
|
|
This is a little unwieldy, particularly if you know that an external
|
|
|
|
is going to be accessible from a given segment or group, say
|
|
|
|
\c{dgroup}. So if \c{DS} already contained \c{dgroup}, you could
|
|
|
|
simply code
|
|
|
|
|
|
|
|
\c mov ax,[foo wrt dgroup]
|
|
|
|
|
|
|
|
However, having to type this every time you want to access \c{foo}
|
|
|
|
can be a pain; so NASM allows you to declare \c{foo} in the
|
|
|
|
alternative form
|
|
|
|
|
|
|
|
\c extern foo:wrt dgroup
|
|
|
|
|
|
|
|
This form causes NASM to pretend that the preferred segment base of
|
|
|
|
\c{foo} is in fact \c{dgroup}; so the expression \c{seg foo} will
|
|
|
|
now return \c{dgroup}, and the expression \c{foo} is equivalent to
|
|
|
|
\c{foo wrt dgroup}.
|
|
|
|
|
|
|
|
This \I{default-WRT mechanism}default-\c{WRT} mechanism can be used
|
|
|
|
to make externals appear to be relative to any group or segment in
|
|
|
|
your program. It can also be applied to common variables: see
|
|
|
|
\k{objcommon}.
|
|
|
|
|
|
|
|
\S{objcommon} \c{obj} Extensions to the \c{COMMON}
|
|
|
|
Directive\I{COMMON, obj extensions to}
|
|
|
|
|
|
|
|
The \c{obj} format allows common variables to be either near\I{near
|
|
|
|
common variables} or far\I{far common variables}; NASM allows you to
|
|
|
|
specify which your variables should be by the use of the syntax
|
|
|
|
|
|
|
|
\c common nearvar 2:near ; `nearvar' is a near common
|
|
|
|
\c common farvar 10:far ; and `farvar' is far
|
|
|
|
|
|
|
|
Far common variables may be greater in size than 64Kb, and so the
|
|
|
|
OMF specification says that they are declared as a number of
|
|
|
|
\e{elements} of a given size. So a 10-byte far common variable could
|
|
|
|
be declared as ten one-byte elements, five two-byte elements, two
|
|
|
|
five-byte elements or one ten-byte element.
|
|
|
|
|
|
|
|
Some OMF linkers require the \I{element size, in common
|
|
|
|
variables}\I{common variables, element size}element size, as well as
|
|
|
|
the variable size, to match when resolving common variables declared
|
|
|
|
in more than one module. Therefore NASM must allow you to specify
|
|
|
|
the element size on your far common variables. This is done by the
|
|
|
|
following syntax:
|
|
|
|
|
|
|
|
\c common c_5by2 10:far 5 ; two five-byte elements
|
|
|
|
\c common c_2by5 10:far 2 ; five two-byte elements
|
|
|
|
|
|
|
|
If no element size is specified, the default is 1. Also, the \c{FAR}
|
|
|
|
keyword is not required when an element size is specified, since
|
|
|
|
only far commons may have element sizes at all. So the above
|
|
|
|
declarations could equivalently be
|
|
|
|
|
|
|
|
\c common c_5by2 10:5 ; two five-byte elements
|
|
|
|
\c common c_2by5 10:2 ; five two-byte elements
|
|
|
|
|
|
|
|
In addition to these extensions, the \c{COMMON} directive in \c{obj}
|
|
|
|
also supports default-\c{WRT} specification like \c{EXTERN} does
|
|
|
|
(explained in \k{objextern}). So you can also declare things like
|
|
|
|
|
|
|
|
\c common foo 10:wrt dgroup
|
|
|
|
\c common bar 16:far 2:wrt data
|
|
|
|
\c common baz 24:wrt data:6
|
|
|
|
|
|
|
|
\H{win32fmt} \i\c{win32}: Microsoft Win32 Object Files
|
|
|
|
|
|
|
|
The \c{win32} output format generates Microsoft Win32 object files,
|
|
|
|
suitable for passing to Microsoft linkers such as \i{Visual C++}.
|
|
|
|
Note that Borland Win32 compilers do not use this format, but use
|
|
|
|
\c{obj} instead (see \k{objfmt}).
|
|
|
|
|
|
|
|
\c{win32} provides a default output file-name extension of \c{.obj}.
|
|
|
|
|
|
|
|
Note that although Microsoft say that Win32 object files follow the
|
|
|
|
COFF (Common Object File Format) standard, the object files produced
|
|
|
|
by Microsoft Win32 compilers are not compatible with COFF linkers
|
|
|
|
such as DJGPP's, and vice versa. This is due to a difference of
|
|
|
|
opinion over the precise semantics of PC-relative relocations. To
|
|
|
|
produce COFF files suitable for DJGPP, use NASM's \c{coff} output
|
|
|
|
format; conversely, the \c{coff} format does not produce object
|
|
|
|
files that Win32 linkers can generate correct output from.
|
|
|
|
|
|
|
|
\S{win32sect} \c{win32} Extensions to the \c{SECTION}
|
|
|
|
Directive\I{SECTION, win32 extensions to}
|
|
|
|
|
|
|
|
Like the \c{obj} format, \c{win32} allows you to specify additional
|
|
|
|
information on the \c{SECTION} directive line, to control the type
|
|
|
|
and properties of sections you declare. Section types and properties
|
|
|
|
are generated automatically by NASM for the \i{standard section names}
|
|
|
|
\c{.text}, \c{.data} and \c{.bss}, but may still be overridden by
|
|
|
|
these qualifiers.
|
|
|
|
|
|
|
|
The available qualifiers are:
|
|
|
|
|
|
|
|
\b \c{code}, or equivalently \c{text}, defines the section to be a
|
|
|
|
code section. This marks the section as readable and executable, but
|
|
|
|
not writable, and also indicates to the linker that the type of the
|
|
|
|
section is code.
|
|
|
|
|
|
|
|
\b \c{data} and \c{bss} define the section to be a data section,
|
|
|
|
analogously to \c{code}. Data sections are marked as readable and
|
|
|
|
writable, but not executable. \c{data} declares an initialised data
|
|
|
|
section, whereas \c{bss} declares an uninitialised data section.
|
|
|
|
|
|
|
|
\b \c{info} defines the section to be an \i{informational section},
|
|
|
|
which is not included in the executable file by the linker, but may
|
|
|
|
(for example) pass information \e{to} the linker. For example,
|
|
|
|
declaring an \c{info}-type section called \i\c{.drectve} causes the
|
|
|
|
linker to interpret the contents of the section as command-line
|
|
|
|
options.
|
|
|
|
|
|
|
|
\b \c{align=}, used with a trailing number as in \c{obj}, gives the
|
|
|
|
\I{section alignment, in win32}\I{alignment, in win32
|
|
|
|
sections}alignment requirements of the section. The maximum you may
|
|
|
|
specify is 64: the Win32 object file format contains no means to
|
|
|
|
request a greater section alignment than this. If alignment is not
|
|
|
|
explicitly specified, the defaults are 16-byte alignment for code
|
|
|
|
sections, and 4-byte alignment for data (and BSS) sections.
|
|
|
|
Informational sections get a default alignment of 1 byte (no
|
|
|
|
alignment), though the value does not matter.
|
|
|
|
|
|
|
|
The defaults assumed by NASM if you do not specify the above
|
|
|
|
qualifiers are:
|
|
|
|
|
|
|
|
\c section .text code align=16
|
|
|
|
\c section .data data align=4
|
|
|
|
\c section .bss bss align=4
|
|
|
|
|
|
|
|
Any other section name is treated by default like \c{.text}.
|
|
|
|
|
|
|
|
\H{cofffmt} \i\c{coff}: \i{Common Object File Format}
|
|
|
|
|
|
|
|
The \c{coff} output type produces COFF object files suitable for
|
|
|
|
linking with the \i{DJGPP} linker.
|
|
|
|
|
|
|
|
\c{coff} provides a default output file-name extension of \c{.o}.
|
|
|
|
|
|
|
|
The \c{coff} format supports the same extensions to the \c{SECTION}
|
|
|
|
directive as \c{win32} does, except that the \c{align} qualifier and
|
|
|
|
the \c{info} section type are not supported.
|
|
|
|
|
|
|
|
\H{elffmt} \i\c{elf}: \i{Linux ELF}\I{Executable and Linkable
|
|
|
|
Format}Object Files
|
|
|
|
|
|
|
|
The \c{elf} output format generates ELF32 (Executable and Linkable
|
|
|
|
Format) object files, as used by Linux. \c{elf} provides a default
|
|
|
|
output file-name extension of \c{.o}.
|
|
|
|
|
|
|
|
\S{elfsect} \c{elf} Extensions to the \c{SECTION}
|
|
|
|
Directive\I{SECTION, elf extensions to}
|
|
|
|
|
|
|
|
Like the \c{obj} format, \c{elf} allows you to specify additional
|
|
|
|
information on the \c{SECTION} directive line, to control the type
|
|
|
|
and properties of sections you declare. Section types and properties
|
|
|
|
are generated automatically by NASM for the \i{standard section
|
|
|
|
names} \i\c{.text}, \i\c{.data} and \i\c{.bss}, but may still be
|
|
|
|
overridden by these qualifiers.
|
|
|
|
|
|
|
|
The available qualifiers are:
|
|
|
|
|
|
|
|
\b \i\c{alloc} defines the section to be one which is loaded into
|
|
|
|
memory when the program is run. \i\c{noalloc} defines it to be one
|
|
|
|
which is not, such as an informational or comment section.
|
|
|
|
|
|
|
|
\b \i\c{exec} defines the section to be one which should have execute
|
|
|
|
permission when the program is run. \i\c{noexec} defines it as one
|
|
|
|
which should not.
|
|
|
|
|
|
|
|
\b \i\c{write} defines the section to be one which should be writable
|
|
|
|
when the program is run. \i\c{nowrite} defines it as one which should
|
|
|
|
not.
|
|
|
|
|
|
|
|
\b \i\c{progbits} defines the section to be one with explicit contents
|
|
|
|
stored in the object file: an ordinary code or data section, for
|
|
|
|
example, \i\c{nobits} defines the section to be one with no explicit
|
|
|
|
contents given, such as a BSS section.
|
|
|
|
|
|
|
|
\b \c{align=}, used with a trailing number as in \c{obj}, gives the
|
|
|
|
\I{section alignment, in elf}\I{alignment, in elf sections}alignment
|
|
|
|
requirements of the section.
|
|
|
|
|
|
|
|
The defaults assumed by NASM if you do not specify the above
|
|
|
|
qualifiers are:
|
|
|
|
|
|
|
|
\c section .text progbits alloc exec nowrite align=16
|
|
|
|
\c section .data progbits alloc noexec write align=4
|
|
|
|
\c section .bss nobits alloc noexec write align=4
|
|
|
|
\c section other progbits alloc noexec nowrite align=1
|
|
|
|
|
|
|
|
(Any section name other than \c{.text}, \c{.data} and \c{.bss} is
|
|
|
|
treated by default like \c{other} in the above code.)
|
|
|
|
|
|
|
|
\S{elfwrt} \i{Position-Independent Code}\I{PIC}: \c{elf} Special
|
|
|
|
Symbols and \i\c{WRT}
|
|
|
|
|
|
|
|
The ELF specification contains enough features to allow
|
|
|
|
position-independent code (PIC) to be written, which makes \i{ELF
|
|
|
|
shared libraries} very flexible. However, it also means NASM has to
|
|
|
|
be able to generate a variety of strange relocation types in ELF
|
|
|
|
object files, if it is to be an assembler which can write PIC.
|
|
|
|
|
|
|
|
Since ELF does not support segment-base references, the \c{WRT}
|
|
|
|
operator is not used for its normal purpose; therefore NASM's
|
|
|
|
\c{elf} output format makes use of \c{WRT} for a different purpose,
|
|
|
|
namely the PIC-specific \I{relocations, PIC-specific}relocation
|
|
|
|
types.
|
|
|
|
|
|
|
|
\c{elf} defines five special symbols which you can use as the
|
|
|
|
right-hand side of the \c{WRT} operator to obtain PIC relocation
|
|
|
|
types. They are \i\c{..gotpc}, \i\c{..gotoff}, \i\c{..got},
|
|
|
|
\i\c{..plt} and \i\c{..sym}. Their functions are summarised here:
|
|
|
|
|
|
|
|
\b Referring to the symbol marking the global offset table base
|
|
|
|
using \c{wrt ..gotpc} will end up giving the distance from the
|
|
|
|
beginning of the current section to the global offset table.
|
|
|
|
(\i\c{_GLOBAL_OFFSET_TABLE_} is the standard symbol name used to
|
|
|
|
refer to the \i{GOT}.) So you would then need to add \i\c{$$} to the
|
|
|
|
result to get the real address of the GOT.
|
|
|
|
|
|
|
|
\b Referring to a location in one of your own sections using \c{wrt
|
|
|
|
..gotoff} will give the distance from the beginning of the GOT to
|
|
|
|
the specified location, so that adding on the address of the GOT
|
|
|
|
would give the real address of the location you wanted.
|
|
|
|
|
|
|
|
\b Referring to an external or global symbol using \c{wrt ..got}
|
|
|
|
causes the linker to build an entry \e{in} the GOT containing the
|
|
|
|
address of the symbol, and the reference gives the distance from the
|
|
|
|
beginning of the GOT to the entry; so you can add on the address of
|
|
|
|
the GOT, load from the resulting address, and end up with the
|
|
|
|
address of the symbol.
|
|
|
|
|
|
|
|
\b Referring to a procedure name using \c{wrt ..plt} causes the
|
|
|
|
linker to build a \i{procedure linkage table} entry for the symbol,
|
|
|
|
and the reference gives the address of the \i{PLT} entry. You can
|
|
|
|
only use this in contexts which would generate a PC-relative
|
|
|
|
relocation normally (i.e. as the destination for \c{CALL} or
|
|
|
|
\c{JMP}), since ELF contains no relocation type to refer to PLT
|
|
|
|
entries absolutely.
|
|
|
|
|
|
|
|
\b Referring to a symbol name using \c{wrt ..sym} causes NASM to
|
|
|
|
write an ordinary relocation, but instead of making the relocation
|
|
|
|
relative to the start of the section and then adding on the offset
|
|
|
|
to the symbol, it will write a relocation record aimed directly at
|
|
|
|
the symbol in question. The distinction is a necessary one due to a
|
|
|
|
peculiarity of the dynamic linker.
|
|
|
|
|
|
|
|
A fuller explanation of how to use these relocation types to write
|
|
|
|
shared libraries entirely in NASM is given in \k{picdll}.
|
|
|
|
|
|
|
|
\S{elfglob} \c{elf} Extensions to the \c{GLOBAL} Directive\I{GLOBAL,
|
|
|
|
elf extensions to}\I{GLOBAL, aoutb extensions to}
|
|
|
|
|
|
|
|
ELF object files can contain more information about a global symbol
|
|
|
|
than just its address: they can contain the \I{symbol sizes,
|
|
|
|
specifying}\I{size, of symbols}size of the symbol and its \I{symbol
|
|
|
|
types, specifying}\I{type, of symbols}type as well. These are not
|
|
|
|
merely debugger conveniences, but are actually necessary when the
|
|
|
|
program being written is a \i{shared library}. NASM therefore
|
|
|
|
supports some extensions to the \c{GLOBAL} directive, allowing you
|
|
|
|
to specify these features.
|
|
|
|
|
|
|
|
You can specify whether a global variable is a function or a data
|
|
|
|
object by suffixing the name with a colon and the word
|
|
|
|
\i\c{function} or \i\c{data}. (\i\c{object} is a synonym for
|
|
|
|
\c{data}.) For example:
|
|
|
|
|
|
|
|
\c global hashlookup:function, hashtable:data
|
|
|
|
|
|
|
|
exports the global symbol \c{hashlookup} as a function and
|
|
|
|
\c{hashtable} as a data object.
|
|
|
|
|
|
|
|
You can also specify the size of the data associated with the
|
|
|
|
symbol, as a numeric expression (which may involve labels, and even
|
|
|
|
forward references) after the type specifier. Like this:
|
|
|
|
|
|
|
|
\c global hashtable:data (hashtable.end - hashtable)
|
|
|
|
\c hashtable:
|
|
|
|
\c db this,that,theother ; some data here
|
|
|
|
\c .end:
|
|
|
|
|
|
|
|
This makes NASM automatically calculate the length of the table and
|
|
|
|
place that information into the ELF symbol table.
|
|
|
|
|
|
|
|
Declaring the type and size of global symbols is necessary when
|
|
|
|
writing shared library code. For more information, see
|
|
|
|
\k{picglobal}.
|
|
|
|
|
|
|
|
\S{elfcomm} \c{elf} Extensions to the \c{COMMON} Directive\I{COMMON,
|
|
|
|
elf extensions to}
|
|
|
|
|
|
|
|
ELF also allows you to specify alignment requirements \I{common
|
|
|
|
variables, alignment in elf}\I{alignment, of elf common variables}on
|
|
|
|
common variables. This is done by putting a number (which must be a
|
|
|
|
power of two) after the name and size of the common variable,
|
|
|
|
separated (as usual) by a colon. For example, an array of
|
|
|
|
doublewords would benefit from 4-byte alignment:
|
|
|
|
|
|
|
|
\c common dwordarray 128:4
|
|
|
|
|
|
|
|
This declares the total size of the array to be 128 bytes, and
|
|
|
|
requires that it be aligned on a 4-byte boundary.
|
|
|
|
|
|
|
|
\H{aoutfmt} \i\c{aout}: Linux \I{a.out, Linux version}\c{a.out} Object Files
|
|
|
|
|
|
|
|
The \c{aout} format generates \c{a.out} object files, in the form
|
|
|
|
used by early Linux systems. (These differ from other \c{a.out}
|
|
|
|
object files in that the magic number in the first four bytes of the
|
|
|
|
file is different. Also, some implementations of \c{a.out}, for
|
|
|
|
example NetBSD's, support position-independent code, which Linux's
|
|
|
|
implementation doesn't.)
|
|
|
|
|
|
|
|
\c{a.out} provides a default output file-name extension of \c{.o}.
|
|
|
|
|
|
|
|
\c{a.out} is a very simple object format. It supports no special
|
|
|
|
directives, no special symbols, no use of \c{SEG} or \c{WRT}, and no
|
|
|
|
extensions to any standard directives. It supports only the three
|
|
|
|
\i{standard section names} \i\c{.text}, \i\c{.data} and \i\c{.bss}.
|
|
|
|
|
|
|
|
\H{aoutfmt} \i\c{aoutb}: \i{NetBSD}/\i{FreeBSD}/\i{OpenBSD}
|
|
|
|
\I{a.out, BSD version}\c{a.out} Object Files
|
|
|
|
|
|
|
|
The \c{aoutb} format generates \c{a.out} object files, in the form
|
|
|
|
used by the various free BSD Unix clones, NetBSD, FreeBSD and
|
|
|
|
OpenBSD. For simple object files, this object format is exactly the
|
|
|
|
same as \c{aout} except for the magic number in the first four bytes
|
|
|
|
of the file. However, the \c{aoutb} format supports
|
|
|
|
\I{PIC}\i{position-independent code} in the same way as the \c{elf}
|
|
|
|
format, so you can use it to write BSD \i{shared libraries}.
|
|
|
|
|
|
|
|
\c{aoutb} provides a default output file-name extension of \c{.o}.
|
|
|
|
|
|
|
|
\c{aoutb} supports no special directives, no special symbols, and
|
|
|
|
only the three \i{standard section names} \i\c{.text}, \i\c{.data}
|
|
|
|
and \i\c{.bss}. However, it also supports the same use of \i\c{WRT} as
|
|
|
|
\c{elf} does, to provide position-independent code relocation types.
|
|
|
|
See \k{elfwrt} for full documentation of this feature.
|
|
|
|
|
|
|
|
\c{aoutb} also supports the same extensions to the \c{GLOBAL}
|
|
|
|
directive as \c{elf} does: see \k{elfglob} for documentation of
|
|
|
|
this.
|
|
|
|
|
|
|
|
\H{as86fmt} \c{as86}: Linux \i\c{as86} Object Files
|
|
|
|
|
|
|
|
The Linux 16-bit assembler \c{as86} has its own non-standard object
|
|
|
|
file format. Although its companion linker \i\c{ld86} produces
|
|
|
|
something close to ordinary \c{a.out} binaries as output, the object
|
|
|
|
file format used to communicate between \c{as86} and \c{ld86} is not
|
|
|
|
itself \c{a.out}.
|
|
|
|
|
|
|
|
NASM supports this format, just in case it is useful, as \c{as86}.
|
|
|
|
\c{as86} provides a default output file-name extension of \c{.o}.
|
|
|
|
|
|
|
|
\c{as86} is a very simple object format (from the NASM user's point
|
|
|
|
of view). It supports no special directives, no special symbols, no
|
|
|
|
use of \c{SEG} or \c{WRT}, and no extensions to any standard
|
|
|
|
directives. It supports only the three \i{standard section names}
|
|
|
|
\i\c{.text}, \i\c{.data} and \i\c{.bss}.
|
|
|
|
|
|
|
|
\H{rdffmt} \I{RDOFF}\i\c{rdf}: \i{Relocatable Dynamic Object File
|
|
|
|
Format}
|
|
|
|
|
|
|
|
The \c{rdf} output format produces RDOFF object files. RDOFF
|
|
|
|
(Relocatable Dynamic Object File Format) is a home-grown object-file
|
|
|
|
format, designed alongside NASM itself and reflecting in its file
|
|
|
|
format the internal structure of the assembler.
|
|
|
|
|
|
|
|
RDOFF is not used by any well-known operating systems. Those writing
|
|
|
|
their own systems, however, may well wish to use RDOFF as their
|
|
|
|
object format, on the grounds that it is designed primarily for
|
|
|
|
simplicity and contains very little file-header bureaucracy.
|
|
|
|
|
|
|
|
The Unix NASM archive, and the DOS archive which includes sources,
|
|
|
|
both contain an \I{rdoff subdirectory}\c{rdoff} subdirectory holding
|
|
|
|
a set of RDOFF utilities: an RDF linker, an RDF static-library
|
|
|
|
manager, an RDF file dump utility, and a program which will load and
|
|
|
|
execute an RDF executable under Linux.
|
|
|
|
|
|
|
|
\c{rdf} supports only the \i{standard section names} \i\c{.text},
|
|
|
|
\i\c{.data} and \i\c{.bss}.
|
|
|
|
|
|
|
|
\S{rdflib} Requiring a Library: The \i\c{LIBRARY} Directive
|
|
|
|
|
|
|
|
RDOFF contains a mechanism for an object file to demand a given
|
|
|
|
library to be linked to the module, either at load time or run time.
|
|
|
|
This is done by the \c{LIBRARY} directive, which takes one argument
|
|
|
|
which is the name of the module:
|
|
|
|
|
|
|
|
\c library mylib.rdl
|
|
|
|
|
|
|
|
\H{dbgfmt} \i\c{dbg}: Debugging Format
|
|
|
|
|
|
|
|
The \c{dbg} output format is not built into NASM in the default
|
|
|
|
configuration. If you are building your own NASM executable from the
|
|
|
|
sources, you can define \i\c{OF_DBG} in \c{outform.h} or on the
|
|
|
|
compiler command line, and obtain the \c{dbg} output format.
|
|
|
|
|
|
|
|
The \c{dbg} format does not output an object file as such; instead,
|
|
|
|
it outputs a text file which contains a complete list of all the
|
|
|
|
transactions between the main body of NASM and the output-format
|
|
|
|
back end module. It is primarily intended to aid people who want to
|
|
|
|
write their own output drivers, so that they can get a clearer idea
|
|
|
|
of the various requests the main program makes of the output driver,
|
|
|
|
and in what order they happen.
|
|
|
|
|
|
|
|
For simple files, one can easily use the \c{dbg} format like this:
|
|
|
|
|
|
|
|
\c nasm -f dbg filename.asm
|
|
|
|
|
|
|
|
which will generate a diagnostic file called \c{filename.dbg}.
|
|
|
|
However, this will not work well on files which were designed for a
|
|
|
|
different object format, because each object format defines its own
|
|
|
|
macros (usually user-level forms of directives), and those macros
|
|
|
|
will not be defined in the \c{dbg} format. Therefore it can be
|
|
|
|
useful to run NASM twice, in order to do the preprocessing with the
|
|
|
|
native object format selected:
|
|
|
|
|
|
|
|
\c nasm -e -f rdf -o rdfprog.i rdfprog.asm
|
|
|
|
\c nasm -a -f dbg rdfprog.i
|
|
|
|
|
|
|
|
This preprocesses \c{rdfprog.asm} into \c{rdfprog.i}, keeping the
|
|
|
|
\c{rdf} object format selected in order to make sure RDF special
|
|
|
|
directives are converted into primitive form correctly. Then the
|
|
|
|
preprocessed source is fed through the \c{dbg} format to generate
|
|
|
|
the final diagnostic output.
|
|
|
|
|
|
|
|
This workaround will still typically not work for programs intended
|
|
|
|
for \c{obj} format, because the \c{obj} \c{SEGMENT} and \c{GROUP}
|
|
|
|
directives have side effects of defining the segment and group names
|
|
|
|
as symbols; \c{dbg} will not do this, so the program will not
|
|
|
|
assemble. You will have to work around that by defining the symbols
|
|
|
|
yourself (using \c{EXTERN}, for example) if you really need to get a
|
|
|
|
\c{dbg} trace of an \c{obj}-specific source file.
|
|
|
|
|
|
|
|
\c{dbg} accepts any section name and any directives at all, and logs
|
|
|
|
them all to its output file.
|
|
|
|
|
|
|
|
\C{16bit} Writing 16-bit Code (DOS, Windows 3/3.1)
|
|
|
|
|
|
|
|
This chapter attempts to cover some of the common issues encountered
|
|
|
|
when writing 16-bit code to run under MS-DOS or Windows 3.x. It
|
|
|
|
covers how to link programs to produce \c{.EXE} or \c{.COM} files,
|
|
|
|
how to write \c{.SYS} device drivers, and how to interface assembly
|
|
|
|
language code with 16-bit C compilers and with Borland Pascal.
|
|
|
|
|
|
|
|
\H{exefiles} Producing \i\c{.EXE} Files
|
|
|
|
|
|
|
|
Any large program written under DOS needs to be built as a \c{.EXE}
|
|
|
|
file: only \c{.EXE} files have the necessary internal structure
|
|
|
|
required to span more than one 64K segment. \i{Windows} programs,
|
|
|
|
also, have to be built as \c{.EXE} files, since Windows does not
|
|
|
|
support the \c{.COM} format.
|
|
|
|
|
|
|
|
In general, you generate \c{.EXE} files by using the \c{obj} output
|
|
|
|
format to produce one or more \i\c{.OBJ} files, and then linking
|
|
|
|
them together using a linker. However, NASM also supports the direct
|
|
|
|
generation of simple DOS \c{.EXE} files using the \c{bin} output
|
|
|
|
format (by using \c{DB} and \c{DW} to construct the \c{.EXE} file
|
|
|
|
header), and a macro package is supplied to do this. Thanks to
|
|
|
|
Yann Guidon for contributing the code for this.
|
|
|
|
|
|
|
|
NASM may also support \c{.EXE} natively as another output format in
|
|
|
|
future releases.
|
|
|
|
|
|
|
|
\S{objexe} Using the \c{obj} Format To Generate \c{.EXE} Files
|
|
|
|
|
|
|
|
This section describes the usual method of generating \c{.EXE} files
|
|
|
|
by linking \c{.OBJ} files together.
|
|
|
|
|
|
|
|
Most 16-bit programming language packages come with a suitable
|
|
|
|
linker; if you have none of these, there is a free linker called
|
|
|
|
\i{VAL}\I{linker, free}, available in \c{LZH} archive format from
|
|
|
|
\W{ftp://x2ftp.oulu.fi/pub/msdos/programming/lang/}\i\c{x2ftp.oulu.fi}.
|
|
|
|
An LZH archiver can be found at
|
|
|
|
\W{ftp://ftp.simtel.net/pub/simtelnet/msdos/arcers}\i\c{ftp.simtel.net}.
|
|
|
|
There is another `free' linker (though this one doesn't come with
|
|
|
|
sources) called \i{FREELINK}, available from
|
|
|
|
\W{http://www.pcorner.com/tpc/old/3-101.html}\i\c{www.pcorner.com}.
|
|
|
|
A third, \i\c{djlink}, written by DJ Delorie, is available at
|
|
|
|
\W{http://www.delorie.com/djgpp/16bit/djlink/}\i\c{www.delorie.com}.
|
|
|
|
|
|
|
|
When linking several \c{.OBJ} files into a \c{.EXE} file, you should
|
|
|
|
ensure that exactly one of them has a start point defined (using the
|
|
|
|
\I{program entry point}\i\c{..start} special symbol defined by the
|
|
|
|
\c{obj} format: see \k{dotdotstart}). If no module defines a start
|
|
|
|
point, the linker will not know what value to give the entry-point
|
|
|
|
field in the output file header; if more than one defines a start
|
|
|
|
point, the linker will not know \e{which} value to use.
|
|
|
|
|
|
|
|
An example of a NASM source file which can be assembled to a
|
|
|
|
\c{.OBJ} file and linked on its own to a \c{.EXE} is given here. It
|
|
|
|
demonstrates the basic principles of defining a stack, initialising
|
|
|
|
the segment registers, and declaring a start point. This file is
|
|
|
|
also provided in the \I{test subdirectory}\c{test} subdirectory of
|
|
|
|
the NASM archives, under the name \c{objexe.asm}.
|
|
|
|
|
|
|
|
\c segment code
|
|
|
|
\c
|
|
|
|
\c ..start: mov ax,data
|
|
|
|
\c mov ds,ax
|
|
|
|
\c mov ax,stack
|
|
|
|
\c mov ss,ax
|
|
|
|
\c mov sp,stacktop
|
|
|
|
|
|
|
|
This initial piece of code sets up \c{DS} to point to the data
|
|
|
|
segment, and initialises \c{SS} and \c{SP} to point to the top of
|
|
|
|
the provided stack. Notice that interrupts are implicitly disabled
|
|
|
|
for one instruction after a move into \c{SS}, precisely for this
|
|
|
|
situation, so that there's no chance of an interrupt occurring
|
|
|
|
between the loads of \c{SS} and \c{SP} and not having a stack to
|
|
|
|
execute on.
|
|
|
|
|
|
|
|
Note also that the special symbol \c{..start} is defined at the
|
|
|
|
beginning of this code, which means that will be the entry point
|
|
|
|
into the resulting executable file.
|
|
|
|
|
|
|
|
\c mov dx,hello
|
|
|
|
\c mov ah,9
|
|
|
|
\c int 0x21
|
|
|
|
|
|
|
|
The above is the main program: load \c{DS:DX} with a pointer to the
|
|
|
|
greeting message (\c{hello} is implicitly relative to the segment
|
|
|
|
\c{data}, which was loaded into \c{DS} in the setup code, so the
|
|
|
|
full pointer is valid), and call the DOS print-string function.
|
|
|
|
|
|
|
|
\c mov ax,0x4c00
|
|
|
|
\c int 0x21
|
|
|
|
|
|
|
|
This terminates the program using another DOS system call.
|
|
|
|
|
|
|
|
\c segment data
|
|
|
|
\c hello: db 'hello, world', 13, 10, '$'
|
|
|
|
|
|
|
|
The data segment contains the string we want to display.
|
|
|
|
|
|
|
|
\c segment stack stack
|
|
|
|
\c resb 64
|
|
|
|
\c stacktop:
|
|
|
|
|
|
|
|
The above code declares a stack segment containing 64 bytes of
|
|
|
|
uninitialised stack space, and points \c{stacktop} at the top of it.
|
|
|
|
The directive \c{segment stack stack} defines a segment \e{called}
|
|
|
|
\c{stack}, and also of \e{type} \c{STACK}. The latter is not
|
|
|
|
necessary to the correct running of the program, but linkers are
|
|
|
|
likely to issue warnings or errors if your program has no segment of
|
|
|
|
type \c{STACK}.
|
|
|
|
|
|
|
|
The above file, when assembled into a \c{.OBJ} file, will link on
|
|
|
|
its own to a valid \c{.EXE} file, which when run will print `hello,
|
|
|
|
world' and then exit.
|
|
|
|
|
|
|
|
\S{binexe} Using the \c{bin} Format To Generate \c{.EXE} Files
|
|
|
|
|
|
|
|
The \c{.EXE} file format is simple enough that it's possible to
|
|
|
|
build a \c{.EXE} file by writing a pure-binary program and sticking
|
|
|
|
a 32-byte header on the front. This header is simple enough that it
|
|
|
|
can be generated using \c{DB} and \c{DW} commands by NASM itself, so
|
|
|
|
that you can use the \c{bin} output format to directly generate
|
|
|
|
\c{.EXE} files.
|
|
|
|
|
|
|
|
Included in the NASM archives, in the \I{misc subdirectory}\c{misc}
|
|
|
|
subdirectory, is a file \i\c{exebin.mac} of macros. It defines three
|
|
|
|
macros: \i\c{EXE_begin}, \i\c{EXE_stack} and \i\c{EXE_end}.
|
|
|
|
|
|
|
|
To produce a \c{.EXE} file using this method, you should start by
|
|
|
|
using \c{%include} to load the \c{exebin.mac} macro package into
|
|
|
|
your source file. You should then issue the \c{EXE_begin} macro call
|
|
|
|
(which takes no arguments) to generate the file header data. Then
|
|
|
|
write code as normal for the \c{bin} format - you can use all three
|
|
|
|
standard sections \c{.text}, \c{.data} and \c{.bss}. At the end of
|
|
|
|
the file you should call the \c{EXE_end} macro (again, no arguments),
|
|
|
|
which defines some symbols to mark section sizes, and these symbols
|
|
|
|
are referred to in the header code generated by \c{EXE_begin}.
|
|
|
|
|
|
|
|
In this model, the code you end up writing starts at \c{0x100}, just
|
|
|
|
like a \c{.COM} file - in fact, if you strip off the 32-byte header
|
|
|
|
from the resulting \c{.EXE} file, you will have a valid \c{.COM}
|
|
|
|
program. All the segment bases are the same, so you are limited to a
|
|
|
|
64K program, again just like a \c{.COM} file. Note that an \c{ORG}
|
|
|
|
directive is issued by the \c{EXE_begin} macro, so you should not
|
|
|
|
explicitly issue one of your own.
|
|
|
|
|
|
|
|
You can't directly refer to your segment base value, unfortunately,
|
|
|
|
since this would require a relocation in the header, and things
|
|
|
|
would get a lot more complicated. So you should get your segment
|
|
|
|
base by copying it out of \c{CS} instead.
|
|
|
|
|
|
|
|
On entry to your \c{.EXE} file, \c{SS:SP} are already set up to
|
|
|
|
point to the top of a 2Kb stack. You can adjust the default stack
|
|
|
|
size of 2Kb by calling the \c{EXE_stack} macro. For example, to
|
|
|
|
change the stack size of your program to 64 bytes, you would call
|
|
|
|
\c{EXE_stack 64}.
|
|
|
|
|
|
|
|
A sample program which generates a \c{.EXE} file in this way is
|
|
|
|
given in the \c{test} subdirectory of the NASM archive, as
|
|
|
|
\c{binexe.asm}.
|
|
|
|
|
|
|
|
\H{comfiles} Producing \i\c{.COM} Files
|
|
|
|
|
|
|
|
While large DOS programs must be written as \c{.EXE} files, small
|
|
|
|
ones are often better written as \c{.COM} files. \c{.COM} files are
|
|
|
|
pure binary, and therefore most easily produced using the \c{bin}
|
|
|
|
output format.
|
|
|
|
|
|
|
|
\S{combinfmt} Using the \c{bin} Format To Generate \c{.COM} Files
|
|
|
|
|
|
|
|
\c{.COM} files expect to be loaded at offset \c{100h} into their
|
|
|
|
segment (though the segment may change). Execution then begins at
|
|
|
|
\I\c{ORG}\c{100h}, i.e. right at the start of the program. So to
|
|
|
|
write a \c{.COM} program, you would create a source file looking
|
|
|
|
like
|
|
|
|
|
|
|
|
\c org 100h
|
|
|
|
\c section .text
|
|
|
|
\c start: ; put your code here
|
|
|
|
\c section .data
|
|
|
|
\c ; put data items here
|
|
|
|
\c section .bss
|
|
|
|
\c ; put uninitialised data here
|
|
|
|
|
|
|
|
The \c{bin} format puts the \c{.text} section first in the file, so
|
|
|
|
you can declare data or BSS items before beginning to write code if
|
|
|
|
you want to and the code will still end up at the front of the file
|
|
|
|
where it belongs.
|
|
|
|
|
|
|
|
The BSS (uninitialised data) section does not take up space in the
|
|
|
|
\c{.COM} file itself: instead, addresses of BSS items are resolved
|
|
|
|
to point at space beyond the end of the file, on the grounds that
|
|
|
|
this will be free memory when the program is run. Therefore you
|
|
|
|
should not rely on your BSS being initialised to all zeros when you
|
|
|
|
run.
|
|
|
|
|
|
|
|
To assemble the above program, you should use a command line like
|
|
|
|
|
|
|
|
\c nasm myprog.asm -fbin -o myprog.com
|
|
|
|
|
|
|
|
The \c{bin} format would produce a file called \c{myprog} if no
|
|
|
|
explicit output file name were specified, so you have to override it
|
|
|
|
and give the desired file name.
|
|
|
|
|
|
|
|
\S{comobjfmt} Using the \c{obj} Format To Generate \c{.COM} Files
|
|
|
|
|
|
|
|
If you are writing a \c{.COM} program as more than one module, you
|
|
|
|
may wish to assemble several \c{.OBJ} files and link them together
|
|
|
|
into a \c{.COM} program. You can do this, provided you have a linker
|
|
|
|
capable of outputting \c{.COM} files directly (\i{TLINK} does this),
|
|
|
|
or alternatively a converter program such as \i\c{EXE2BIN} to
|
|
|
|
transform the \c{.EXE} file output from the linker into a \c{.COM}
|
|
|
|
file.
|
|
|
|
|
|
|
|
If you do this, you need to take care of several things:
|
|
|
|
|
|
|
|
\b The first object file containing code should start its code
|
|
|
|
segment with a line like \c{RESB 100h}. This is to ensure that the
|
|
|
|
code begins at offset \c{100h} relative to the beginning of the code
|
|
|
|
segment, so that the linker or converter program does not have to
|
|
|
|
adjust address references within the file when generating the
|
|
|
|
\c{.COM} file. Other assemblers use an \i\c{ORG} directive for this
|
|
|
|
purpose, but \c{ORG} in NASM is a format-specific directive to the
|
|
|
|
\c{bin} output format, and does not mean the same thing as it does
|
|
|
|
in MASM-compatible assemblers.
|
|
|
|
|
|
|
|
\b You don't need to define a stack segment.
|
|
|
|
|
|
|
|
\b All your segments should be in the same group, so that every time
|
|
|
|
your code or data references a symbol offset, all offsets are
|
|
|
|
relative to the same segment base. This is because, when a \c{.COM}
|
|
|
|
file is loaded, all the segment registers contain the same value.
|
|
|
|
|
|
|
|
\H{sysfiles} Producing \i\c{.SYS} Files
|
|
|
|
|
|
|
|
\i{MS-DOS device drivers} - \c{.SYS} files - are pure binary files,
|
|
|
|
similar to \c{.COM} files, except that they start at origin zero
|
|
|
|
rather than \c{100h}. Therefore, if you are writing a device driver
|
|
|
|
using the \c{bin} format, you do not need the \c{ORG} directive,
|
|
|
|
since the default origin for \c{bin} is zero. Similarly, if you are
|
|
|
|
using \c{obj}, you do not need the \c{RESB 100h} at the start of
|
|
|
|
your code segment.
|
|
|
|
|
|
|
|
\c{.SYS} files start with a header structure, containing pointers to
|
|
|
|
the various routines inside the driver which do the work. This
|
|
|
|
structure should be defined at the start of the code segment, even
|
|
|
|
though it is not actually code.
|
|
|
|
|
|
|
|
For more information on the format of \c{.SYS} files, and the data
|
|
|
|
which has to go in the header structure, a list of books is given in
|
|
|
|
the Frequently Asked Questions list for the newsgroup
|
|
|
|
\W{news:comp.os.msdos.programmer}\i\c{comp.os.msdos.programmer}.
|
|
|
|
|
|
|
|
\H{16c} Interfacing to 16-bit C Programs
|
|
|
|
|
|
|
|
This section covers the basics of writing assembly routines that
|
|
|
|
call, or are called from, C programs. To do this, you would
|
|
|
|
typically write an assembly module as a \c{.OBJ} file, and link it
|
|
|
|
with your C modules to produce a \i{mixed-language program}.
|
|
|
|
|
|
|
|
\S{16cunder} External Symbol Names
|
|
|
|
|
|
|
|
\I{C symbol names}\I{underscore, in C symbols}C compilers have the
|
|
|
|
convention that the names of all global symbols (functions or data)
|
|
|
|
they define are formed by prefixing an underscore to the name as it
|
|
|
|
appears in the C program. So, for example, the function a C
|
|
|
|
programmer thinks of as \c{printf} appears to an assembly language
|
|
|
|
programmer as \c{_printf}. This means that in your assembly
|
|
|
|
programs, you can define symbols without a leading underscore, and
|
|
|
|
not have to worry about name clashes with C symbols.
|
|
|
|
|
|
|
|
If you find the underscores inconvenient, you can define macros to
|
|
|
|
replace the \c{GLOBAL} and \c{EXTERN} directives as follows:
|
|
|
|
|
|
|
|
\c %macro cglobal 1
|
|
|
|
\c global _%1
|
|
|
|
\c %define %1 _%1
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
\c %macro cextern 1
|
|
|
|
\c extern _%1
|
|
|
|
\c %define %1 _%1
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
(These forms of the macros only take one argument at a time; a
|
|
|
|
\c{%rep} construct could solve this.)
|
|
|
|
|
|
|
|
If you then declare an external like this:
|
|
|
|
|
|
|
|
\c cextern printf
|
|
|
|
|
|
|
|
then the macro will expand it as
|
|
|
|
|
|
|
|
\c extern _printf
|
|
|
|
\c %define printf _printf
|
|
|
|
|
|
|
|
Thereafter, you can reference \c{printf} as if it was a symbol, and
|
|
|
|
the preprocessor will put the leading underscore on where necessary.
|
|
|
|
|
|
|
|
The \c{cglobal} macro works similarly. You must use \c{cglobal}
|
|
|
|
before defining the symbol in question, but you would have had to do
|
|
|
|
that anyway if you used \c{GLOBAL}.
|
|
|
|
|
|
|
|
\S{16cmodels} \i{Memory Models}
|
|
|
|
|
|
|
|
NASM contains no mechanism to support the various C memory models
|
|
|
|
directly; you have to keep track yourself of which one you are
|
|
|
|
writing for. This means you have to keep track of the following
|
|
|
|
things:
|
|
|
|
|
|
|
|
\b In models using a single code segment (tiny, small and compact),
|
|
|
|
functions are near. This means that function pointers, when stored
|
|
|
|
in data segments or pushed on the stack as function arguments, are
|
|
|
|
16 bits long and contain only an offset field (the \c{CS} register
|
|
|
|
never changes its value, and always gives the segment part of the
|
|
|
|
full function address), and that functions are called using ordinary
|
|
|
|
near \c{CALL} instructions and return using \c{RETN} (which, in
|
|
|
|
NASM, is synonymous with \c{RET} anyway). This means both that you
|
|
|
|
should write your own routines to return with \c{RETN}, and that you
|
|
|
|
should call external C routines with near \c{CALL} instructions.
|
|
|
|
|
|
|
|
\b In models using more than one code segment (medium, large and
|
|
|
|
huge), functions are far. This means that function pointers are 32
|
|
|
|
bits long (consisting of a 16-bit offset followed by a 16-bit
|
|
|
|
segment), and that functions are called using \c{CALL FAR} (or
|
|
|
|
\c{CALL seg:offset}) and return using \c{RETF}. Again, you should
|
|
|
|
therefore write your own routines to return with \c{RETF} and use
|
|
|
|
\c{CALL FAR} to call external routines.
|
|
|
|
|
|
|
|
\b In models using a single data segment (tiny, small and medium),
|
|
|
|
data pointers are 16 bits long, containing only an offset field (the
|
|
|
|
\c{DS} register doesn't change its value, and always gives the
|
|
|
|
segment part of the full data item address).
|
|
|
|
|
|
|
|
\b In models using more than one data segment (compact, large and
|
|
|
|
huge), data pointers are 32 bits long, consisting of a 16-bit offset
|
|
|
|
followed by a 16-bit segment. You should still be careful not to
|
|
|
|
modify \c{DS} in your routines without restoring it afterwards, but
|
|
|
|
\c{ES} is free for you to use to access the contents of 32-bit data
|
|
|
|
pointers you are passed.
|
|
|
|
|
|
|
|
\b The huge memory model allows single data items to exceed 64K in
|
|
|
|
size. In all other memory models, you can access the whole of a data
|
|
|
|
item just by doing arithmetic on the offset field of the pointer you
|
|
|
|
are given, whether a segment field is present or not; in huge model,
|
|
|
|
you have to be more careful of your pointer arithmetic.
|
|
|
|
|
|
|
|
\b In most memory models, there is a \e{default} data segment, whose
|
|
|
|
segment address is kept in \c{DS} throughout the program. This data
|
|
|
|
segment is typically the same segment as the stack, kept in \c{SS},
|
|
|
|
so that functions' local variables (which are stored on the stack)
|
|
|
|
and global data items can both be accessed easily without changing
|
|
|
|
\c{DS}. Particularly large data items are typically stored in other
|
|
|
|
segments. However, some memory models (though not the standard
|
|
|
|
ones, usually) allow the assumption that \c{SS} and \c{DS} hold the
|
|
|
|
same value to be removed. Be careful about functions' local
|
|
|
|
variables in this latter case.
|
|
|
|
|
|
|
|
In models with a single code segment, the segment is called
|
|
|
|
\i\c{_TEXT}, so your code segment must also go by this name in order
|
|
|
|
to be linked into the same place as the main code segment. In models
|
|
|
|
with a single data segment, or with a default data segment, it is
|
|
|
|
called \i\c{_DATA}.
|
|
|
|
|
|
|
|
\S{16cfunc} Function Definitions and Function Calls
|
|
|
|
|
|
|
|
\I{functions, C calling convention}The \i{C calling convention} in
|
|
|
|
16-bit programs is as follows. In the following description, the
|
|
|
|
words \e{caller} and \e{callee} are used to denote the function
|
|
|
|
doing the calling and the function which gets called.
|
|
|
|
|
|
|
|
\b The caller pushes the function's parameters on the stack, one
|
|
|
|
after another, in reverse order (right to left, so that the first
|
|
|
|
argument specified to the function is pushed last).
|
|
|
|
|
|
|
|
\b The caller then executes a \c{CALL} instruction to pass control
|
|
|
|
to the callee. This \c{CALL} is either near or far depending on the
|
|
|
|
memory model.
|
|
|
|
|
|
|
|
\b The callee receives control, and typically (although this is not
|
|
|
|
actually necessary, in functions which do not need to access their
|
|
|
|
parameters) starts by saving the value of \c{SP} in \c{BP} so as to
|
|
|
|
be able to use \c{BP} as a base pointer to find its parameters on
|
|
|
|
the stack. However, the caller was probably doing this too, so part
|
|
|
|
of the calling convention states that \c{BP} must be preserved by
|
|
|
|
any C function. Hence the callee, if it is going to set up \c{BP} as
|
|
|
|
a \i\e{frame pointer}, must push the previous value first.
|
|
|
|
|
|
|
|
\b The callee may then access its parameters relative to \c{BP}.
|
|
|
|
The word at \c{[BP]} holds the previous value of \c{BP} as it was
|
|
|
|
pushed; the next word, at \c{[BP+2]}, holds the offset part of the
|
|
|
|
return address, pushed implicitly by \c{CALL}. In a small-model
|
|
|
|
(near) function, the parameters start after that, at \c{[BP+4]}; in
|
|
|
|
a large-model (far) function, the segment part of the return address
|
|
|
|
lives at \c{[BP+4]}, and the parameters begin at \c{[BP+6]}. The
|
|
|
|
leftmost parameter of the function, since it was pushed last, is
|
|
|
|
accessible at this offset from \c{BP}; the others follow, at
|
|
|
|
successively greater offsets. Thus, in a function such as \c{printf}
|
|
|
|
which takes a variable number of parameters, the pushing of the
|
|
|
|
parameters in reverse order means that the function knows where to
|
|
|
|
find its first parameter, which tells it the number and type of the
|
|
|
|
remaining ones.
|
|
|
|
|
|
|
|
\b The callee may also wish to decrease \c{SP} further, so as to
|
|
|
|
allocate space on the stack for local variables, which will then be
|
|
|
|
accessible at negative offsets from \c{BP}.
|
|
|
|
|
|
|
|
\b The callee, if it wishes to return a value to the caller, should
|
|
|
|
leave the value in \c{AL}, \c{AX} or \c{DX:AX} depending on the size
|
|
|
|
of the value. Floating-point results are sometimes (depending on the
|
|
|
|
compiler) returned in \c{ST0}.
|
|
|
|
|
|
|
|
\b Once the callee has finished processing, it restores \c{SP} from
|
|
|
|
\c{BP} if it had allocated local stack space, then pops the previous
|
|
|
|
value of \c{BP}, and returns via \c{RETN} or \c{RETF} depending on
|
|
|
|
memory model.
|
|
|
|
|
|
|
|
\b When the caller regains control from the callee, the function
|
|
|
|
parameters are still on the stack, so it typically adds an immediate
|
|
|
|
constant to \c{SP} to remove them (instead of executing a number of
|
|
|
|
slow \c{POP} instructions). Thus, if a function is accidentally
|
|
|
|
called with the wrong number of parameters due to a prototype
|
|
|
|
mismatch, the stack will still be returned to a sensible state since
|
|
|
|
the caller, which \e{knows} how many parameters it pushed, does the
|
|
|
|
removing.
|
|
|
|
|
|
|
|
It is instructive to compare this calling convention with that for
|
|
|
|
Pascal programs (described in \k{16bpfunc}). Pascal has a simpler
|
|
|
|
convention, since no functions have variable numbers of parameters.
|
|
|
|
Therefore the callee knows how many parameters it should have been
|
|
|
|
passed, and is able to deallocate them from the stack itself by
|
|
|
|
passing an immediate argument to the \c{RET} or \c{RETF}
|
|
|
|
instruction, so the caller does not have to do it. Also, the
|
|
|
|
parameters are pushed in left-to-right order, not right-to-left,
|
|
|
|
which means that a compiler can give better guarantees about
|
|
|
|
sequence points without performance suffering.
|
|
|
|
|
|
|
|
Thus, you would define a function in C style in the following way.
|
|
|
|
The following example is for small model:
|
|
|
|
|
|
|
|
\c global _myfunc
|
|
|
|
\c _myfunc: push bp
|
|
|
|
\c mov bp,sp
|
|
|
|
\c sub sp,0x40 ; 64 bytes of local stack space
|
|
|
|
\c mov bx,[bp+4] ; first parameter to function
|
|
|
|
\c ; some more code
|
|
|
|
\c mov sp,bp ; undo "sub sp,0x40" above
|
|
|
|
\c pop bp
|
|
|
|
\c ret
|
|
|
|
|
|
|
|
For a large-model function, you would replace \c{RET} by \c{RETF},
|
|
|
|
and look for the first parameter at \c{[BP+6]} instead of
|
|
|
|
\c{[BP+4]}. Of course, if one of the parameters is a pointer, then
|
|
|
|
the offsets of \e{subsequent} parameters will change depending on
|
|
|
|
the memory model as well: far pointers take up four bytes on the
|
|
|
|
stack when passed as a parameter, whereas near pointers take up two.
|
|
|
|
|
|
|
|
At the other end of the process, to call a C function from your
|
|
|
|
assembly code, you would do something like this:
|
|
|
|
|
|
|
|
\c extern _printf
|
|
|
|
\c ; and then, further down...
|
|
|
|
\c push word [myint] ; one of my integer variables
|
|
|
|
\c push word mystring ; pointer into my data segment
|
|
|
|
\c call _printf
|
|
|
|
\c add sp,byte 4 ; `byte' saves space
|
|
|
|
\c ; then those data items...
|
|
|
|
\c segment _DATA
|
|
|
|
\c myint dw 1234
|
|
|
|
\c mystring db 'This number -> %d <- should be 1234',10,0
|
|
|
|
|
|
|
|
This piece of code is the small-model assembly equivalent of the C
|
|
|
|
code
|
|
|
|
|
|
|
|
\c int myint = 1234;
|
|
|
|
\c printf("This number -> %d <- should be 1234\n", myint);
|
|
|
|
|
|
|
|
In large model, the function-call code might look more like this. In
|
|
|
|
this example, it is assumed that \c{DS} already holds the segment
|
|
|
|
base of the segment \c{_DATA}. If not, you would have to initialise
|
|
|
|
it first.
|
|
|
|
|
|
|
|
\c push word [myint]
|
|
|
|
\c push word seg mystring ; Now push the segment, and...
|
|
|
|
\c push word mystring ; ... offset of "mystring"
|
|
|
|
\c call far _printf
|
|
|
|
\c add sp,byte 6
|
|
|
|
|
|
|
|
The integer value still takes up one word on the stack, since large
|
|
|
|
model does not affect the size of the \c{int} data type. The first
|
|
|
|
argument (pushed last) to \c{printf}, however, is a data pointer,
|
|
|
|
and therefore has to contain a segment and offset part. The segment
|
|
|
|
should be stored second in memory, and therefore must be pushed
|
|
|
|
first. (Of course, \c{PUSH DS} would have been a shorter instruction
|
|
|
|
than \c{PUSH WORD SEG mystring}, if \c{DS} was set up as the above
|
|
|
|
example assumed.) Then the actual call becomes a far call, since
|
|
|
|
functions expect far calls in large model; and \c{SP} has to be
|
|
|
|
increased by 6 rather than 4 afterwards to make up for the extra
|
|
|
|
word of parameters.
|
|
|
|
|
|
|
|
\S{16cdata} Accessing Data Items
|
|
|
|
|
|
|
|
To get at the contents of C variables, or to declare variables which
|
|
|
|
C can access, you need only declare the names as \c{GLOBAL} or
|
|
|
|
\c{EXTERN}. (Again, the names require leading underscores, as stated
|
|
|
|
in \k{16cunder}.) Thus, a C variable declared as \c{int i} can be
|
|
|
|
accessed from assembler as
|
|
|
|
|
|
|
|
\c extern _i
|
|
|
|
\c mov ax,[_i]
|
|
|
|
|
|
|
|
And to declare your own integer variable which C programs can access
|
|
|
|
as \c{extern int j}, you do this (making sure you are assembling in
|
|
|
|
the \c{_DATA} segment, if necessary):
|
|
|
|
|
|
|
|
\c global _j
|
|
|
|
\c _j dw 0
|
|
|
|
|
|
|
|
To access a C array, you need to know the size of the components of
|
|
|
|
the array. For example, \c{int} variables are two bytes long, so if
|
|
|
|
a C program declares an array as \c{int a[10]}, you can access
|
|
|
|
\c{a[3]} by coding \c{mov ax,[_a+6]}. (The byte offset 6 is obtained
|
|
|
|
by multiplying the desired array index, 3, by the size of the array
|
|
|
|
element, 2.) The sizes of the C base types in 16-bit compilers are:
|
|
|
|
1 for \c{char}, 2 for \c{short} and \c{int}, 4 for \c{long} and
|
|
|
|
\c{float}, and 8 for \c{double}.
|
|
|
|
|
|
|
|
To access a C \i{data structure}, you need to know the offset from
|
|
|
|
the base of the structure to the field you are interested in. You
|
|
|
|
can either do this by converting the C structure definition into a
|
|
|
|
NASM structure definition (using \i\c{STRUC}), or by calculating the
|
|
|
|
one offset and using just that.
|
|
|
|
|
|
|
|
To do either of these, you should read your C compiler's manual to
|
|
|
|
find out how it organises data structures. NASM gives no special
|
|
|
|
alignment to structure members in its own \c{STRUC} macro, so you
|
|
|
|
have to specify alignment yourself if the C compiler generates it.
|
|
|
|
Typically, you might find that a structure like
|
|
|
|
|
|
|
|
\c struct {
|
|
|
|
\c char c;
|
|
|
|
\c int i;
|
|
|
|
\c } foo;
|
|
|
|
|
|
|
|
might be four bytes long rather than three, since the \c{int} field
|
|
|
|
would be aligned to a two-byte boundary. However, this sort of
|
|
|
|
feature tends to be a configurable option in the C compiler, either
|
|
|
|
using command-line options or \c{#pragma} lines, so you have to find
|
|
|
|
out how your own compiler does it.
|
|
|
|
|
|
|
|
\S{16cmacro} \i\c{c16.mac}: Helper Macros for the 16-bit C Interface
|
|
|
|
|
|
|
|
Included in the NASM archives, in the \I{misc subdirectory}\c{misc}
|
|
|
|
directory, is a file \c{c16.mac} of macros. It defines three macros:
|
|
|
|
\i\c{proc}, \i\c{arg} and \i\c{endproc}. These are intended to be
|
|
|
|
used for C-style procedure definitions, and they automate a lot of
|
|
|
|
the work involved in keeping track of the calling convention.
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
(An alternative, TASM compatible form of \c{arg} is also now built
|
|
|
|
into NASM's preprocessor. See \k{tasmcompat} for details.)
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
An example of an assembly function using the macro set is given
|
|
|
|
here:
|
|
|
|
|
|
|
|
\c proc _nearproc
|
|
|
|
\c %$i arg
|
|
|
|
\c %$j arg
|
|
|
|
\c mov ax,[bp + %$i]
|
|
|
|
\c mov bx,[bp + %$j]
|
|
|
|
\c add ax,[bx]
|
|
|
|
\c endproc
|
|
|
|
|
|
|
|
This defines \c{_nearproc} to be a procedure taking two arguments,
|
|
|
|
the first (\c{i}) an integer and the second (\c{j}) a pointer to an
|
|
|
|
integer. It returns \c{i + *j}.
|
|
|
|
|
|
|
|
Note that the \c{arg} macro has an \c{EQU} as the first line of its
|
|
|
|
expansion, and since the label before the macro call gets prepended
|
|
|
|
to the first line of the expanded macro, the \c{EQU} works, defining
|
|
|
|
\c{%$i} to be an offset from \c{BP}. A context-local variable is
|
|
|
|
used, local to the context pushed by the \c{proc} macro and popped
|
|
|
|
by the \c{endproc} macro, so that the same argument name can be used
|
|
|
|
in later procedures. Of course, you don't \e{have} to do that.
|
|
|
|
|
|
|
|
The macro set produces code for near functions (tiny, small and
|
|
|
|
compact-model code) by default. You can have it generate far
|
|
|
|
functions (medium, large and huge-model code) by means of coding
|
|
|
|
\I\c{FARCODE}\c{%define FARCODE}. This changes the kind of return
|
|
|
|
instruction generated by \c{endproc}, and also changes the starting
|
|
|
|
point for the argument offsets. The macro set contains no intrinsic
|
|
|
|
dependency on whether data pointers are far or not.
|
|
|
|
|
|
|
|
\c{arg} can take an optional parameter, giving the size of the
|
|
|
|
argument. If no size is given, 2 is assumed, since it is likely that
|
|
|
|
many function parameters will be of type \c{int}.
|
|
|
|
|
|
|
|
The large-model equivalent of the above function would look like this:
|
|
|
|
|
|
|
|
\c %define FARCODE
|
|
|
|
\c proc _farproc
|
|
|
|
\c %$i arg
|
|
|
|
\c %$j arg 4
|
|
|
|
\c mov ax,[bp + %$i]
|
|
|
|
\c mov bx,[bp + %$j]
|
|
|
|
\c mov es,[bp + %$j + 2]
|
|
|
|
\c add ax,[bx]
|
|
|
|
\c endproc
|
|
|
|
|
|
|
|
This makes use of the argument to the \c{arg} macro to define a
|
|
|
|
parameter of size 4, because \c{j} is now a far pointer. When we
|
|
|
|
load from \c{j}, we must load a segment and an offset.
|
|
|
|
|
|
|
|
\H{16bp} Interfacing to \i{Borland Pascal} Programs
|
|
|
|
|
|
|
|
Interfacing to Borland Pascal programs is similar in concept to
|
|
|
|
interfacing to 16-bit C programs. The differences are:
|
|
|
|
|
|
|
|
\b The leading underscore required for interfacing to C programs is
|
|
|
|
not required for Pascal.
|
|
|
|
|
|
|
|
\b The memory model is always large: functions are far, data
|
|
|
|
pointers are far, and no data item can be more than 64K long.
|
|
|
|
(Actually, some functions are near, but only those functions that
|
|
|
|
are local to a Pascal unit and never called from outside it. All
|
|
|
|
assembly functions that Pascal calls, and all Pascal functions that
|
|
|
|
assembly routines are able to call, are far.) However, all static
|
|
|
|
data declared in a Pascal program goes into the default data
|
|
|
|
segment, which is the one whose segment address will be in \c{DS}
|
|
|
|
when control is passed to your assembly code. The only things that
|
|
|
|
do not live in the default data segment are local variables (they
|
|
|
|
live in the stack segment) and dynamically allocated variables. All
|
|
|
|
data \e{pointers}, however, are far.
|
|
|
|
|
|
|
|
\b The function calling convention is different - described below.
|
|
|
|
|
|
|
|
\b Some data types, such as strings, are stored differently.
|
|
|
|
|
|
|
|
\b There are restrictions on the segment names you are allowed to
|
|
|
|
use - Borland Pascal will ignore code or data declared in a segment
|
|
|
|
it doesn't like the name of. The restrictions are described below.
|
|
|
|
|
|
|
|
\S{16bpfunc} The Pascal Calling Convention
|
|
|
|
|
|
|
|
\I{functions, Pascal calling convention}\I{Pascal calling
|
|
|
|
convention}The 16-bit Pascal calling convention is as follows. In
|
|
|
|
the following description, the words \e{caller} and \e{callee} are
|
|
|
|
used to denote the function doing the calling and the function which
|
|
|
|
gets called.
|
|
|
|
|
|
|
|
\b The caller pushes the function's parameters on the stack, one
|
|
|
|
after another, in normal order (left to right, so that the first
|
|
|
|
argument specified to the function is pushed first).
|
|
|
|
|
|
|
|
\b The caller then executes a far \c{CALL} instruction to pass
|
|
|
|
control to the callee.
|
|
|
|
|
|
|
|
\b The callee receives control, and typically (although this is not
|
|
|
|
actually necessary, in functions which do not need to access their
|
|
|
|
parameters) starts by saving the value of \c{SP} in \c{BP} so as to
|
|
|
|
be able to use \c{BP} as a base pointer to find its parameters on
|
|
|
|
the stack. However, the caller was probably doing this too, so part
|
|
|
|
of the calling convention states that \c{BP} must be preserved by
|
|
|
|
any function. Hence the callee, if it is going to set up \c{BP} as a
|
|
|
|
\i{frame pointer}, must push the previous value first.
|
|
|
|
|
|
|
|
\b The callee may then access its parameters relative to \c{BP}.
|
|
|
|
The word at \c{[BP]} holds the previous value of \c{BP} as it was
|
|
|
|
pushed. The next word, at \c{[BP+2]}, holds the offset part of the
|
|
|
|
return address, and the next one at \c{[BP+4]} the segment part. The
|
|
|
|
parameters begin at \c{[BP+6]}. The rightmost parameter of the
|
|
|
|
function, since it was pushed last, is accessible at this offset
|
|
|
|
from \c{BP}; the others follow, at successively greater offsets.
|
|
|
|
|
|
|
|
\b The callee may also wish to decrease \c{SP} further, so as to
|
|
|
|
allocate space on the stack for local variables, which will then be
|
|
|
|
accessible at negative offsets from \c{BP}.
|
|
|
|
|
|
|
|
\b The callee, if it wishes to return a value to the caller, should
|
|
|
|
leave the value in \c{AL}, \c{AX} or \c{DX:AX} depending on the size
|
|
|
|
of the value. Floating-point results are returned in \c{ST0}.
|
|
|
|
Results of type \c{Real} (Borland's own custom floating-point data
|
|
|
|
type, not handled directly by the FPU) are returned in \c{DX:BX:AX}.
|
|
|
|
To return a result of type \c{String}, the caller pushes a pointer
|
|
|
|
to a temporary string before pushing the parameters, and the callee
|
|
|
|
places the returned string value at that location. The pointer is
|
|
|
|
not a parameter, and should not be removed from the stack by the
|
|
|
|
\c{RETF} instruction.
|
|
|
|
|
|
|
|
\b Once the callee has finished processing, it restores \c{SP} from
|
|
|
|
\c{BP} if it had allocated local stack space, then pops the previous
|
|
|
|
value of \c{BP}, and returns via \c{RETF}. It uses the form of
|
|
|
|
\c{RETF} with an immediate parameter, giving the number of bytes
|
|
|
|
taken up by the parameters on the stack. This causes the parameters
|
|
|
|
to be removed from the stack as a side effect of the return
|
|
|
|
instruction.
|
|
|
|
|
|
|
|
\b When the caller regains control from the callee, the function
|
|
|
|
parameters have already been removed from the stack, so it needs to
|
|
|
|
do nothing further.
|
|
|
|
|
|
|
|
Thus, you would define a function in Pascal style, taking two
|
|
|
|
\c{Integer}-type parameters, in the following way:
|
|
|
|
|
|
|
|
\c global myfunc
|
|
|
|
\c myfunc: push bp
|
|
|
|
\c mov bp,sp
|
|
|
|
\c sub sp,0x40 ; 64 bytes of local stack space
|
|
|
|
\c mov bx,[bp+8] ; first parameter to function
|
|
|
|
\c mov bx,[bp+6] ; second parameter to function
|
|
|
|
\c ; some more code
|
|
|
|
\c mov sp,bp ; undo "sub sp,0x40" above
|
|
|
|
\c pop bp
|
|
|
|
\c retf 4 ; total size of params is 4
|
|
|
|
|
|
|
|
At the other end of the process, to call a Pascal function from your
|
|
|
|
assembly code, you would do something like this:
|
|
|
|
|
|
|
|
\c extern SomeFunc
|
|
|
|
\c ; and then, further down...
|
|
|
|
\c push word seg mystring ; Now push the segment, and...
|
|
|
|
\c push word mystring ; ... offset of "mystring"
|
|
|
|
\c push word [myint] ; one of my variables
|
|
|
|
\c call far SomeFunc
|
|
|
|
|
|
|
|
This is equivalent to the Pascal code
|
|
|
|
|
|
|
|
\c procedure SomeFunc(String: PChar; Int: Integer);
|
|
|
|
\c SomeFunc(@mystring, myint);
|
|
|
|
|
|
|
|
\S{16bpseg} Borland Pascal \I{segment names, Borland Pascal}Segment
|
|
|
|
Name Restrictions
|
|
|
|
|
|
|
|
Since Borland Pascal's internal unit file format is completely
|
|
|
|
different from \c{OBJ}, it only makes a very sketchy job of actually
|
|
|
|
reading and understanding the various information contained in a
|
|
|
|
real \c{OBJ} file when it links that in. Therefore an object file
|
|
|
|
intended to be linked to a Pascal program must obey a number of
|
|
|
|
restrictions:
|
|
|
|
|
|
|
|
\b Procedures and functions must be in a segment whose name is
|
|
|
|
either \c{CODE}, \c{CSEG}, or something ending in \c{_TEXT}.
|
|
|
|
|
|
|
|
\b Initialised data must be in a segment whose name is either
|
|
|
|
\c{CONST} or something ending in \c{_DATA}.
|
|
|
|
|
|
|
|
\b Uninitialised data must be in a segment whose name is either
|
|
|
|
\c{DATA}, \c{DSEG}, or something ending in \c{_BSS}.
|
|
|
|
|
|
|
|
\b Any other segments in the object file are completely ignored.
|
|
|
|
\c{GROUP} directives and segment attributes are also ignored.
|
|
|
|
|
|
|
|
\S{16bpmacro} Using \i\c{c16.mac} With Pascal Programs
|
|
|
|
|
|
|
|
The \c{c16.mac} macro package, described in \k{16cmacro}, can also
|
|
|
|
be used to simplify writing functions to be called from Pascal
|
|
|
|
programs, if you code \I\c{PASCAL}\c{%define PASCAL}. This
|
|
|
|
definition ensures that functions are far (it implies
|
|
|
|
\i\c{FARCODE}), and also causes procedure return instructions to be
|
|
|
|
generated with an operand.
|
|
|
|
|
|
|
|
Defining \c{PASCAL} does not change the code which calculates the
|
|
|
|
argument offsets; you must declare your function's arguments in
|
|
|
|
reverse order. For example:
|
|
|
|
|
|
|
|
\c %define PASCAL
|
|
|
|
\c proc _pascalproc
|
|
|
|
\c %$j arg 4
|
|
|
|
\c %$i arg
|
|
|
|
\c mov ax,[bp + %$i]
|
|
|
|
\c mov bx,[bp + %$j]
|
|
|
|
\c mov es,[bp + %$j + 2]
|
|
|
|
\c add ax,[bx]
|
|
|
|
\c endproc
|
|
|
|
|
|
|
|
This defines the same routine, conceptually, as the example in
|
|
|
|
\k{16cmacro}: it defines a function taking two arguments, an integer
|
|
|
|
and a pointer to an integer, which returns the sum of the integer
|
|
|
|
and the contents of the pointer. The only difference between this
|
|
|
|
code and the large-model C version is that \c{PASCAL} is defined
|
|
|
|
instead of \c{FARCODE}, and that the arguments are declared in
|
|
|
|
reverse order.
|
|
|
|
|
|
|
|
\C{32bit} Writing 32-bit Code (Unix, Win32, DJGPP)
|
|
|
|
|
|
|
|
This chapter attempts to cover some of the common issues involved
|
|
|
|
when writing 32-bit code, to run under \i{Win32} or Unix, or to be
|
|
|
|
linked with C code generated by a Unix-style C compiler such as
|
|
|
|
\i{DJGPP}. It covers how to write assembly code to interface with
|
|
|
|
32-bit C routines, and how to write position-independent code for
|
|
|
|
shared libraries.
|
|
|
|
|
|
|
|
Almost all 32-bit code, and in particular all code running under
|
|
|
|
Win32, DJGPP or any of the PC Unix variants, runs in \I{flat memory
|
|
|
|
model}\e{flat} memory model. This means that the segment registers
|
|
|
|
and paging have already been set up to give you the same 32-bit 4Gb
|
|
|
|
address space no matter what segment you work relative to, and that
|
|
|
|
you should ignore all segment registers completely. When writing
|
|
|
|
flat-model application code, you never need to use a segment
|
|
|
|
override or modify any segment register, and the code-section
|
|
|
|
addresses you pass to \c{CALL} and \c{JMP} live in the same address
|
|
|
|
space as the data-section addresses you access your variables by and
|
|
|
|
the stack-section addresses you access local variables and procedure
|
|
|
|
parameters by. Every address is 32 bits long and contains only an
|
|
|
|
offset part.
|
|
|
|
|
|
|
|
\H{32c} Interfacing to 32-bit C Programs
|
|
|
|
|
|
|
|
A lot of the discussion in \k{16c}, about interfacing to 16-bit C
|
|
|
|
programs, still applies when working in 32 bits. The absence of
|
|
|
|
memory models or segmentation worries simplifies things a lot.
|
|
|
|
|
|
|
|
\S{32cunder} External Symbol Names
|
|
|
|
|
|
|
|
Most 32-bit C compilers share the convention used by 16-bit
|
|
|
|
compilers, that the names of all global symbols (functions or data)
|
|
|
|
they define are formed by prefixing an underscore to the name as it
|
|
|
|
appears in the C program. However, not all of them do: the ELF
|
|
|
|
specification states that C symbols do \e{not} have a leading
|
|
|
|
underscore on their assembly-language names.
|
|
|
|
|
|
|
|
The older Linux \c{a.out} C compiler, all Win32 compilers, DJGPP,
|
|
|
|
and NetBSD and FreeBSD, all use the leading underscore; for these
|
|
|
|
compilers, the macros \c{cextern} and \c{cglobal}, as given in
|
|
|
|
\k{16cunder}, will still work. For ELF, though, the leading
|
|
|
|
underscore should not be used.
|
|
|
|
|
|
|
|
\S{32cfunc} Function Definitions and Function Calls
|
|
|
|
|
|
|
|
\I{functions, C calling convention}The \i{C calling convention}The C
|
|
|
|
calling convention in 32-bit programs is as follows. In the
|
|
|
|
following description, the words \e{caller} and \e{callee} are used
|
|
|
|
to denote the function doing the calling and the function which gets
|
|
|
|
called.
|
|
|
|
|
|
|
|
\b The caller pushes the function's parameters on the stack, one
|
|
|
|
after another, in reverse order (right to left, so that the first
|
|
|
|
argument specified to the function is pushed last).
|
|
|
|
|
|
|
|
\b The caller then executes a near \c{CALL} instruction to pass
|
|
|
|
control to the callee.
|
|
|
|
|
|
|
|
\b The callee receives control, and typically (although this is not
|
|
|
|
actually necessary, in functions which do not need to access their
|
|
|
|
parameters) starts by saving the value of \c{ESP} in \c{EBP} so as
|
|
|
|
to be able to use \c{EBP} as a base pointer to find its parameters
|
|
|
|
on the stack. However, the caller was probably doing this too, so
|
|
|
|
part of the calling convention states that \c{EBP} must be preserved
|
|
|
|
by any C function. Hence the callee, if it is going to set up
|
|
|
|
\c{EBP} as a \i{frame pointer}, must push the previous value first.
|
|
|
|
|
|
|
|
\b The callee may then access its parameters relative to \c{EBP}.
|
|
|
|
The doubleword at \c{[EBP]} holds the previous value of \c{EBP} as
|
|
|
|
it was pushed; the next doubleword, at \c{[EBP+4]}, holds the return
|
|
|
|
address, pushed implicitly by \c{CALL}. The parameters start after
|
|
|
|
that, at \c{[EBP+8]}. The leftmost parameter of the function, since
|
|
|
|
it was pushed last, is accessible at this offset from \c{EBP}; the
|
|
|
|
others follow, at successively greater offsets. Thus, in a function
|
|
|
|
such as \c{printf} which takes a variable number of parameters, the
|
|
|
|
pushing of the parameters in reverse order means that the function
|
|
|
|
knows where to find its first parameter, which tells it the number
|
|
|
|
and type of the remaining ones.
|
|
|
|
|
|
|
|
\b The callee may also wish to decrease \c{ESP} further, so as to
|
|
|
|
allocate space on the stack for local variables, which will then be
|
|
|
|
accessible at negative offsets from \c{EBP}.
|
|
|
|
|
|
|
|
\b The callee, if it wishes to return a value to the caller, should
|
|
|
|
leave the value in \c{AL}, \c{AX} or \c{EAX} depending on the size
|
|
|
|
of the value. Floating-point results are typically returned in
|
|
|
|
\c{ST0}.
|
|
|
|
|
|
|
|
\b Once the callee has finished processing, it restores \c{ESP} from
|
|
|
|
\c{EBP} if it had allocated local stack space, then pops the previous
|
|
|
|
value of \c{EBP}, and returns via \c{RET} (equivalently, \c{RETN}).
|
|
|
|
|
|
|
|
\b When the caller regains control from the callee, the function
|
|
|
|
parameters are still on the stack, so it typically adds an immediate
|
|
|
|
constant to \c{ESP} to remove them (instead of executing a number of
|
|
|
|
slow \c{POP} instructions). Thus, if a function is accidentally
|
|
|
|
called with the wrong number of parameters due to a prototype
|
|
|
|
mismatch, the stack will still be returned to a sensible state since
|
|
|
|
the caller, which \e{knows} how many parameters it pushed, does the
|
|
|
|
removing.
|
|
|
|
|
|
|
|
There is an alternative calling convention used by Win32 programs
|
|
|
|
for Windows API calls, and also for functions called \e{by} the
|
|
|
|
Windows API such as window procedures: they follow what Microsoft
|
|
|
|
calls the \c{__stdcall} convention. This is slightly closer to the
|
|
|
|
Pascal convention, in that the callee clears the stack by passing a
|
|
|
|
parameter to the \c{RET} instruction. However, the parameters are
|
|
|
|
still pushed in right-to-left order.
|
|
|
|
|
|
|
|
Thus, you would define a function in C style in the following way:
|
|
|
|
|
|
|
|
\c global _myfunc
|
|
|
|
\c _myfunc: push ebp
|
|
|
|
\c mov ebp,esp
|
|
|
|
\c sub esp,0x40 ; 64 bytes of local stack space
|
|
|
|
\c mov ebx,[ebp+8] ; first parameter to function
|
|
|
|
\c ; some more code
|
|
|
|
\c leave ; mov esp,ebp / pop ebp
|
|
|
|
\c ret
|
|
|
|
|
|
|
|
At the other end of the process, to call a C function from your
|
|
|
|
assembly code, you would do something like this:
|
|
|
|
|
|
|
|
\c extern _printf
|
|
|
|
\c ; and then, further down...
|
|
|
|
\c push dword [myint] ; one of my integer variables
|
|
|
|
\c push dword mystring ; pointer into my data segment
|
|
|
|
\c call _printf
|
|
|
|
\c add esp,byte 8 ; `byte' saves space
|
|
|
|
\c ; then those data items...
|
|
|
|
\c segment _DATA
|
|
|
|
\c myint dd 1234
|
|
|
|
\c mystring db 'This number -> %d <- should be 1234',10,0
|
|
|
|
|
|
|
|
This piece of code is the assembly equivalent of the C code
|
|
|
|
|
|
|
|
\c int myint = 1234;
|
|
|
|
\c printf("This number -> %d <- should be 1234\n", myint);
|
|
|
|
|
|
|
|
\S{32cdata} Accessing Data Items
|
|
|
|
|
|
|
|
To get at the contents of C variables, or to declare variables which
|
|
|
|
C can access, you need only declare the names as \c{GLOBAL} or
|
|
|
|
\c{EXTERN}. (Again, the names require leading underscores, as stated
|
|
|
|
in \k{32cunder}.) Thus, a C variable declared as \c{int i} can be
|
|
|
|
accessed from assembler as
|
|
|
|
|
|
|
|
\c extern _i
|
|
|
|
\c mov eax,[_i]
|
|
|
|
|
|
|
|
And to declare your own integer variable which C programs can access
|
|
|
|
as \c{extern int j}, you do this (making sure you are assembling in
|
|
|
|
the \c{_DATA} segment, if necessary):
|
|
|
|
|
|
|
|
\c global _j
|
|
|
|
\c _j dd 0
|
|
|
|
|
|
|
|
To access a C array, you need to know the size of the components of
|
|
|
|
the array. For example, \c{int} variables are four bytes long, so if
|
|
|
|
a C program declares an array as \c{int a[10]}, you can access
|
|
|
|
\c{a[3]} by coding \c{mov ax,[_a+12]}. (The byte offset 12 is obtained
|
|
|
|
by multiplying the desired array index, 3, by the size of the array
|
|
|
|
element, 4.) The sizes of the C base types in 32-bit compilers are:
|
|
|
|
1 for \c{char}, 2 for \c{short}, 4 for \c{int}, \c{long} and
|
|
|
|
\c{float}, and 8 for \c{double}. Pointers, being 32-bit addresses,
|
|
|
|
are also 4 bytes long.
|
|
|
|
|
|
|
|
To access a C \i{data structure}, you need to know the offset from
|
|
|
|
the base of the structure to the field you are interested in. You
|
|
|
|
can either do this by converting the C structure definition into a
|
|
|
|
NASM structure definition (using \c{STRUC}), or by calculating the
|
|
|
|
one offset and using just that.
|
|
|
|
|
|
|
|
To do either of these, you should read your C compiler's manual to
|
|
|
|
find out how it organises data structures. NASM gives no special
|
|
|
|
alignment to structure members in its own \i\c{STRUC} macro, so you
|
|
|
|
have to specify alignment yourself if the C compiler generates it.
|
|
|
|
Typically, you might find that a structure like
|
|
|
|
|
|
|
|
\c struct {
|
|
|
|
\c char c;
|
|
|
|
\c int i;
|
|
|
|
\c } foo;
|
|
|
|
|
|
|
|
might be eight bytes long rather than five, since the \c{int} field
|
|
|
|
would be aligned to a four-byte boundary. However, this sort of
|
|
|
|
feature is sometimes a configurable option in the C compiler, either
|
|
|
|
using command-line options or \c{#pragma} lines, so you have to find
|
|
|
|
out how your own compiler does it.
|
|
|
|
|
|
|
|
\S{32cmacro} \i\c{c32.mac}: Helper Macros for the 32-bit C Interface
|
|
|
|
|
|
|
|
Included in the NASM archives, in the \I{misc directory}\c{misc}
|
|
|
|
directory, is a file \c{c32.mac} of macros. It defines three macros:
|
|
|
|
\i\c{proc}, \i\c{arg} and \i\c{endproc}. These are intended to be
|
|
|
|
used for C-style procedure definitions, and they automate a lot of
|
|
|
|
the work involved in keeping track of the calling convention.
|
|
|
|
|
|
|
|
An example of an assembly function using the macro set is given
|
|
|
|
here:
|
|
|
|
|
|
|
|
\c proc _proc32
|
|
|
|
\c %$i arg
|
|
|
|
\c %$j arg
|
|
|
|
\c mov eax,[ebp + %$i]
|
|
|
|
\c mov ebx,[ebp + %$j]
|
|
|
|
\c add eax,[ebx]
|
|
|
|
\c endproc
|
|
|
|
|
|
|
|
This defines \c{_proc32} to be a procedure taking two arguments, the
|
|
|
|
first (\c{i}) an integer and the second (\c{j}) a pointer to an
|
|
|
|
integer. It returns \c{i + *j}.
|
|
|
|
|
|
|
|
Note that the \c{arg} macro has an \c{EQU} as the first line of its
|
|
|
|
expansion, and since the label before the macro call gets prepended
|
|
|
|
to the first line of the expanded macro, the \c{EQU} works, defining
|
|
|
|
\c{%$i} to be an offset from \c{BP}. A context-local variable is
|
|
|
|
used, local to the context pushed by the \c{proc} macro and popped
|
|
|
|
by the \c{endproc} macro, so that the same argument name can be used
|
|
|
|
in later procedures. Of course, you don't \e{have} to do that.
|
|
|
|
|
|
|
|
\c{arg} can take an optional parameter, giving the size of the
|
|
|
|
argument. If no size is given, 4 is assumed, since it is likely that
|
|
|
|
many function parameters will be of type \c{int} or pointers.
|
|
|
|
|
|
|
|
\H{picdll} Writing NetBSD/FreeBSD/OpenBSD and Linux/ELF \i{Shared
|
|
|
|
Libraries}
|
|
|
|
|
|
|
|
ELF replaced the older \c{a.out} object file format under Linux
|
|
|
|
because it contains support for \i{position-independent code}
|
|
|
|
(\i{PIC}), which makes writing shared libraries much easier. NASM
|
|
|
|
supports the ELF position-independent code features, so you can
|
|
|
|
write Linux ELF shared libraries in NASM.
|
|
|
|
|
|
|
|
\i{NetBSD}, and its close cousins \i{FreeBSD} and \i{OpenBSD}, take
|
|
|
|
a different approach by hacking PIC support into the \c{a.out}
|
|
|
|
format. NASM supports this as the \i\c{aoutb} output format, so you
|
|
|
|
can write \i{BSD} shared libraries in NASM too.
|
|
|
|
|
|
|
|
The operating system loads a PIC shared library by memory-mapping
|
|
|
|
the library file at an arbitrarily chosen point in the address space
|
|
|
|
of the running process. The contents of the library's code section
|
|
|
|
must therefore not depend on where it is loaded in memory.
|
|
|
|
|
|
|
|
Therefore, you cannot get at your variables by writing code like
|
|
|
|
this:
|
|
|
|
|
|
|
|
\c mov eax,[myvar] ; WRONG
|
|
|
|
|
|
|
|
Instead, the linker provides an area of memory called the
|
|
|
|
\i\e{global offset table}, or \i{GOT}; the GOT is situated at a
|
|
|
|
constant distance from your library's code, so if you can find out
|
|
|
|
where your library is loaded (which is typically done using a
|
|
|
|
\c{CALL} and \c{POP} combination), you can obtain the address of the
|
|
|
|
GOT, and you can then load the addresses of your variables out of
|
|
|
|
linker-generated entries in the GOT.
|
|
|
|
|
|
|
|
The \e{data} section of a PIC shared library does not have these
|
|
|
|
restrictions: since the data section is writable, it has to be
|
|
|
|
copied into memory anyway rather than just paged in from the library
|
|
|
|
file, so as long as it's being copied it can be relocated too. So
|
|
|
|
you can put ordinary types of relocation in the data section without
|
|
|
|
too much worry (but see \k{picglobal} for a caveat).
|
|
|
|
|
|
|
|
\S{picgot} Obtaining the Address of the GOT
|
|
|
|
|
|
|
|
Each code module in your shared library should define the GOT as an
|
|
|
|
external symbol:
|
|
|
|
|
|
|
|
\c extern _GLOBAL_OFFSET_TABLE_ ; in ELF
|
|
|
|
\c extern __GLOBAL_OFFSET_TABLE_ ; in BSD a.out
|
|
|
|
|
|
|
|
At the beginning of any function in your shared library which plans
|
|
|
|
to access your data or BSS sections, you must first calculate the
|
|
|
|
address of the GOT. This is typically done by writing the function
|
|
|
|
in this form:
|
|
|
|
|
|
|
|
\c func: push ebp
|
|
|
|
\c mov ebp,esp
|
|
|
|
\c push ebx
|
|
|
|
\c call .get_GOT
|
|
|
|
\c .get_GOT: pop ebx
|
|
|
|
\c add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
|
|
|
|
\c ; the function body comes here
|
|
|
|
\c mov ebx,[ebp-4]
|
|
|
|
\c mov esp,ebp
|
|
|
|
\c pop ebp
|
|
|
|
\c ret
|
|
|
|
|
|
|
|
(For BSD, again, the symbol \c{_GLOBAL_OFFSET_TABLE} requires a
|
|
|
|
second leading underscore.)
|
|
|
|
|
|
|
|
The first two lines of this function are simply the standard C
|
|
|
|
prologue to set up a stack frame, and the last three lines are
|
|
|
|
standard C function epilogue. The third line, and the fourth to last
|
|
|
|
line, save and restore the \c{EBX} register, because PIC shared
|
|
|
|
libraries use this register to store the address of the GOT.
|
|
|
|
|
|
|
|
The interesting bit is the \c{CALL} instruction and the following
|
|
|
|
two lines. The \c{CALL} and \c{POP} combination obtains the address
|
|
|
|
of the label \c{.get_GOT}, without having to know in advance where
|
|
|
|
the program was loaded (since the \c{CALL} instruction is encoded
|
|
|
|
relative to the current position). The \c{ADD} instruction makes use
|
|
|
|
of one of the special PIC relocation types: \i{GOTPC relocation}.
|
|
|
|
With the \i\c{WRT ..gotpc} qualifier specified, the symbol
|
|
|
|
referenced (here \c{_GLOBAL_OFFSET_TABLE_}, the special symbol
|
|
|
|
assigned to the GOT) is given as an offset from the beginning of the
|
|
|
|
section. (Actually, ELF encodes it as the offset from the operand
|
|
|
|
field of the \c{ADD} instruction, but NASM simplifies this
|
|
|
|
deliberately, so you do things the same way for both ELF and BSD.)
|
|
|
|
So the instruction then \e{adds} the beginning of the section, to
|
|
|
|
get the real address of the GOT, and subtracts the value of
|
|
|
|
\c{.get_GOT} which it knows is in \c{EBX}. Therefore, by the time
|
|
|
|
that instruction has finished,
|
|
|
|
\c{EBX} contains the address of the GOT.
|
|
|
|
|
|
|
|
If you didn't follow that, don't worry: it's never necessary to
|
|
|
|
obtain the address of the GOT by any other means, so you can put
|
|
|
|
those three instructions into a macro and safely ignore them:
|
|
|
|
|
|
|
|
\c %macro get_GOT 0
|
|
|
|
\c call %%getgot
|
|
|
|
\c %%getgot: pop ebx
|
|
|
|
\c add ebx,_GLOBAL_OFFSET_TABLE_+$$-%%getgot wrt ..gotpc
|
|
|
|
\c %endmacro
|
|
|
|
|
|
|
|
\S{piclocal} Finding Your Local Data Items
|
|
|
|
|
|
|
|
Having got the GOT, you can then use it to obtain the addresses of
|
|
|
|
your data items. Most variables will reside in the sections you have
|
|
|
|
declared; they can be accessed using the \I{GOTOFF
|
|
|
|
relocation}\c{..gotoff} special \I\c{WRT ..gotoff}\c{WRT} type. The
|
|
|
|
way this works is like this:
|
|
|
|
|
|
|
|
\c lea eax,[ebx+myvar wrt ..gotoff]
|
|
|
|
|
|
|
|
The expression \c{myvar wrt ..gotoff} is calculated, when the shared
|
|
|
|
library is linked, to be the offset to the local variable \c{myvar}
|
|
|
|
from the beginning of the GOT. Therefore, adding it to \c{EBX} as
|
|
|
|
above will place the real address of \c{myvar} in \c{EAX}.
|
|
|
|
|
|
|
|
If you declare variables as \c{GLOBAL} without specifying a size for
|
|
|
|
them, they are shared between code modules in the library, but do
|
|
|
|
not get exported from the library to the program that loaded it.
|
|
|
|
They will still be in your ordinary data and BSS sections, so you
|
|
|
|
can access them in the same way as local variables, using the above
|
|
|
|
\c{..gotoff} mechanism.
|
|
|
|
|
|
|
|
Note that due to a peculiarity of the way BSD \c{a.out} format
|
|
|
|
handles this relocation type, there must be at least one non-local
|
|
|
|
symbol in the same section as the address you're trying to access.
|
|
|
|
|
|
|
|
\S{picextern} Finding External and Common Data Items
|
|
|
|
|
|
|
|
If your library needs to get at an external variable (external to
|
|
|
|
the \e{library}, not just to one of the modules within it), you must
|
|
|
|
use the \I{GOT relocations}\I\c{WRT ..got}\c{..got} type to get at
|
|
|
|
it. The \c{..got} type, instead of giving you the offset from the
|
|
|
|
GOT base to the variable, gives you the offset from the GOT base to
|
|
|
|
a GOT \e{entry} containing the address of the variable. The linker
|
|
|
|
will set up this GOT entry when it builds the library, and the
|
|
|
|
dynamic linker will place the correct address in it at load time. So
|
|
|
|
to obtain the address of an external variable \c{extvar} in \c{EAX},
|
|
|
|
you would code
|
|
|
|
|
|
|
|
\c mov eax,[ebx+extvar wrt ..got]
|
|
|
|
|
|
|
|
This loads the address of \c{extvar} out of an entry in the GOT. The
|
|
|
|
linker, when it builds the shared library, collects together every
|
|
|
|
relocation of type \c{..got}, and builds the GOT so as to ensure it
|
|
|
|
has every necessary entry present.
|
|
|
|
|
|
|
|
Common variables must also be accessed in this way.
|
|
|
|
|
|
|
|
\S{picglobal} Exporting Symbols to the Library User
|
|
|
|
|
|
|
|
If you want to export symbols to the user of the library, you have
|
|
|
|
to declare whether they are functions or data, and if they are data,
|
|
|
|
you have to give the size of the data item. This is because the
|
|
|
|
dynamic linker has to build \I{PLT}\i{procedure linkage table}
|
|
|
|
entries for any exported functions, and also moves exported data
|
|
|
|
items away from the library's data section in which they were
|
|
|
|
declared.
|
|
|
|
|
|
|
|
So to export a function to users of the library, you must use
|
|
|
|
|
|
|
|
\c global func:function ; declare it as a function
|
|
|
|
\c func: push ebp
|
|
|
|
\c ; etc.
|
|
|
|
|
|
|
|
And to export a data item such as an array, you would have to code
|
|
|
|
|
|
|
|
\c global array:data array.end-array ; give the size too
|
|
|
|
\c array: resd 128
|
|
|
|
\c .end:
|
|
|
|
|
|
|
|
Be careful: If you export a variable to the library user, by
|
|
|
|
declaring it as \c{GLOBAL} and supplying a size, the variable will
|
|
|
|
end up living in the data section of the main program, rather than
|
|
|
|
in your library's data section, where you declared it. So you will
|
|
|
|
have to access your own global variable with the \c{..got} mechanism
|
|
|
|
rather than \c{..gotoff}, as if it were external (which,
|
|
|
|
effectively, it has become).
|
|
|
|
|
|
|
|
Equally, if you need to store the address of an exported global in
|
|
|
|
one of your data sections, you can't do it by means of the standard
|
|
|
|
sort of code:
|
|
|
|
|
|
|
|
\c dataptr: dd global_data_item ; WRONG
|
|
|
|
|
|
|
|
NASM will interpret this code as an ordinary relocation, in which
|
|
|
|
\c{global_data_item} is merely an offset from the beginning of the
|
|
|
|
\c{.data} section (or whatever); so this reference will end up
|
|
|
|
pointing at your data section instead of at the exported global
|
|
|
|
which resides elsewhere.
|
|
|
|
|
|
|
|
Instead of the above code, then, you must write
|
|
|
|
|
|
|
|
\c dataptr: dd global_data_item wrt ..sym
|
|
|
|
|
|
|
|
which makes use of the special \c{WRT} type \I\c{WRT ..sym}\c{..sym}
|
|
|
|
to instruct NASM to search the symbol table for a particular symbol
|
|
|
|
at that address, rather than just relocating by section base.
|
|
|
|
|
|
|
|
Either method will work for functions: referring to one of your
|
|
|
|
functions by means of
|
|
|
|
|
|
|
|
\c funcptr: dd my_function
|
|
|
|
|
|
|
|
will give the user the address of the code you wrote, whereas
|
|
|
|
|
|
|
|
\c funcptr: dd my_function wrt ..sym
|
|
|
|
|
|
|
|
will give the address of the procedure linkage table for the
|
|
|
|
function, which is where the calling program will \e{believe} the
|
|
|
|
function lives. Either address is a valid way to call the function.
|
|
|
|
|
|
|
|
\S{picproc} Calling Procedures Outside the Library
|
|
|
|
|
|
|
|
Calling procedures outside your shared library has to be done by
|
|
|
|
means of a \i\e{procedure linkage table}, or \i{PLT}. The PLT is
|
|
|
|
placed at a known offset from where the library is loaded, so the
|
|
|
|
library code can make calls to the PLT in a position-independent
|
|
|
|
way. Within the PLT there is code to jump to offsets contained in
|
|
|
|
the GOT, so function calls to other shared libraries or to routines
|
|
|
|
in the main program can be transparently passed off to their real
|
|
|
|
destinations.
|
|
|
|
|
|
|
|
To call an external routine, you must use another special PIC
|
|
|
|
relocation type, \I{PLT relocations}\i\c{WRT ..plt}. This is much
|
|
|
|
easier than the GOT-based ones: you simply replace calls such as
|
|
|
|
\c{CALL printf} with the PLT-relative version \c{CALL printf WRT
|
|
|
|
..plt}.
|
|
|
|
|
|
|
|
\S{link} Generating the Library File
|
|
|
|
|
|
|
|
Having written some code modules and assembled them to \c{.o} files,
|
|
|
|
you then generate your shared library with a command such as
|
|
|
|
|
|
|
|
\c ld -shared -o library.so module1.o module2.o # for ELF
|
|
|
|
\c ld -Bshareable -o library.so module1.o module2.o # for BSD
|
|
|
|
|
|
|
|
For ELF, if your shared library is going to reside in system
|
|
|
|
directories such as \c{/usr/lib} or \c{/lib}, it is usually worth
|
|
|
|
using the \i\c{-soname} flag to the linker, to store the final
|
|
|
|
library file name, with a version number, into the library:
|
|
|
|
|
|
|
|
\c ld -shared -soname library.so.1 -o library.so.1.2 *.o
|
|
|
|
|
|
|
|
You would then copy \c{library.so.1.2} into the library directory,
|
|
|
|
and create \c{library.so.1} as a symbolic link to it.
|
|
|
|
|
|
|
|
\C{mixsize} Mixing 16 and 32 Bit Code
|
|
|
|
|
|
|
|
This chapter tries to cover some of the issues, largely related to
|
|
|
|
unusual forms of addressing and jump instructions, encountered when
|
|
|
|
writing operating system code such as protected-mode initialisation
|
|
|
|
routines, which require code that operates in mixed segment sizes,
|
|
|
|
such as code in a 16-bit segment trying to modify data in a 32-bit
|
|
|
|
one, or jumps between different-size segments.
|
|
|
|
|
|
|
|
\H{mixjump} Mixed-Size Jumps\I{jumps, mixed-size}
|
|
|
|
|
|
|
|
\I{operating system, writing}\I{writing operating systems}The most
|
|
|
|
common form of \i{mixed-size instruction} is the one used when
|
|
|
|
writing a 32-bit OS: having done your setup in 16-bit mode, such as
|
|
|
|
loading the kernel, you then have to boot it by switching into
|
|
|
|
protected mode and jumping to the 32-bit kernel start address. In a
|
|
|
|
fully 32-bit OS, this tends to be the \e{only} mixed-size
|
|
|
|
instruction you need, since everything before it can be done in pure
|
|
|
|
16-bit code, and everything after it can be pure 32-bit.
|
|
|
|
|
|
|
|
This jump must specify a 48-bit far address, since the target
|
|
|
|
segment is a 32-bit one. However, it must be assembled in a 16-bit
|
|
|
|
segment, so just coding, for example,
|
|
|
|
|
|
|
|
\c jmp 0x1234:0x56789ABC ; wrong!
|
|
|
|
|
|
|
|
will not work, since the offset part of the address will be
|
|
|
|
truncated to \c{0x9ABC} and the jump will be an ordinary 16-bit far
|
|
|
|
one.
|
|
|
|
|
|
|
|
The Linux kernel setup code gets round the inability of \c{as86} to
|
|
|
|
generate the required instruction by coding it manually, using
|
|
|
|
\c{DB} instructions. NASM can go one better than that, by actually
|
|
|
|
generating the right instruction itself. Here's how to do it right:
|
|
|
|
|
|
|
|
\c jmp dword 0x1234:0x56789ABC ; right
|
|
|
|
|
|
|
|
\I\c{JMP DWORD}The \c{DWORD} prefix (strictly speaking, it should
|
|
|
|
come \e{after} the colon, since it is declaring the \e{offset} field
|
|
|
|
to be a doubleword; but NASM will accept either form, since both are
|
|
|
|
unambiguous) forces the offset part to be treated as far, in the
|
|
|
|
assumption that you are deliberately writing a jump from a 16-bit
|
|
|
|
segment to a 32-bit one.
|
|
|
|
|
|
|
|
You can do the reverse operation, jumping from a 32-bit segment to a
|
|
|
|
16-bit one, by means of the \c{WORD} prefix:
|
|
|
|
|
|
|
|
\c jmp word 0x8765:0x4321 ; 32 to 16 bit
|
|
|
|
|
|
|
|
If the \c{WORD} prefix is specified in 16-bit mode, or the \c{DWORD}
|
|
|
|
prefix in 32-bit mode, they will be ignored, since each is
|
|
|
|
explicitly forcing NASM into a mode it was in anyway.
|
|
|
|
|
|
|
|
\H{mixaddr} Addressing Between Different-Size Segments\I{addressing,
|
|
|
|
mixed-size}\I{mixed-size addressing}
|
|
|
|
|
|
|
|
If your OS is mixed 16 and 32-bit, or if you are writing a DOS
|
|
|
|
extender, you are likely to have to deal with some 16-bit segments
|
|
|
|
and some 32-bit ones. At some point, you will probably end up
|
|
|
|
writing code in a 16-bit segment which has to access data in a
|
|
|
|
32-bit segment, or vice versa.
|
|
|
|
|
|
|
|
If the data you are trying to access in a 32-bit segment lies within
|
|
|
|
the first 64K of the segment, you may be able to get away with using
|
|
|
|
an ordinary 16-bit addressing operation for the purpose; but sooner
|
|
|
|
or later, you will want to do 32-bit addressing from 16-bit mode.
|
|
|
|
|
|
|
|
The easiest way to do this is to make sure you use a register for
|
|
|
|
the address, since any effective address containing a 32-bit
|
|
|
|
register is forced to be a 32-bit address. So you can do
|
|
|
|
|
|
|
|
\c mov eax,offset_into_32_bit_segment_specified_by_fs
|
|
|
|
\c mov dword [fs:eax],0x11223344
|
|
|
|
|
|
|
|
This is fine, but slightly cumbersome (since it wastes an
|
|
|
|
instruction and a register) if you already know the precise offset
|
|
|
|
you are aiming at. The x86 architecture does allow 32-bit effective
|
|
|
|
addresses to specify nothing but a 4-byte offset, so why shouldn't
|
|
|
|
NASM be able to generate the best instruction for the purpose?
|
|
|
|
|
|
|
|
It can. As in \k{mixjump}, you need only prefix the address with the
|
|
|
|
\c{DWORD} keyword, and it will be forced to be a 32-bit address:
|
|
|
|
|
|
|
|
\c mov dword [fs:dword my_offset],0x11223344
|
|
|
|
|
|
|
|
Also as in \k{mixjump}, NASM is not fussy about whether the
|
|
|
|
\c{DWORD} prefix comes before or after the segment override, so
|
|
|
|
arguably a nicer-looking way to code the above instruction is
|
|
|
|
|
|
|
|
\c mov dword [dword fs:my_offset],0x11223344
|
|
|
|
|
|
|
|
Don't confuse the \c{DWORD} prefix \e{outside} the square brackets,
|
|
|
|
which controls the size of the data stored at the address, with the
|
|
|
|
one \c{inside} the square brackets which controls the length of the
|
|
|
|
address itself. The two can quite easily be different:
|
|
|
|
|
|
|
|
\c mov word [dword 0x12345678],0x9ABC
|
|
|
|
|
|
|
|
This moves 16 bits of data to an address specified by a 32-bit
|
|
|
|
offset.
|
|
|
|
|
|
|
|
You can also specify \c{WORD} or \c{DWORD} prefixes along with the
|
|
|
|
\c{FAR} prefix to indirect far jumps or calls. For example:
|
|
|
|
|
|
|
|
\c call dword far [fs:word 0x4321]
|
|
|
|
|
|
|
|
This instruction contains an address specified by a 16-bit offset;
|
|
|
|
it loads a 48-bit far pointer from that (16-bit segment and 32-bit
|
|
|
|
offset), and calls that address.
|
|
|
|
|
|
|
|
\H{mixother} Other Mixed-Size Instructions
|
|
|
|
|
|
|
|
The other way you might want to access data might be using the
|
|
|
|
string instructions (\c{LODSx}, \c{STOSx} and so on) or the
|
|
|
|
\c{XLATB} instruction. These instructions, since they take no
|
|
|
|
parameters, might seem to have no easy way to make them perform
|
|
|
|
32-bit addressing when assembled in a 16-bit segment.
|
|
|
|
|
|
|
|
This is the purpose of NASM's \i\c{a16} and \i\c{a32} prefixes. If
|
|
|
|
you are coding \c{LODSB} in a 16-bit segment but it is supposed to
|
|
|
|
be accessing a string in a 32-bit segment, you should load the
|
|
|
|
desired address into \c{ESI} and then code
|
|
|
|
|
|
|
|
\c a32 lodsb
|
|
|
|
|
|
|
|
The prefix forces the addressing size to 32 bits, meaning that
|
|
|
|
\c{LODSB} loads from \c{[DS:ESI]} instead of \c{[DS:SI]}. To access
|
|
|
|
a string in a 16-bit segment when coding in a 32-bit one, the
|
|
|
|
corresponding \c{a16} prefix can be used.
|
|
|
|
|
|
|
|
The \c{a16} and \c{a32} prefixes can be applied to any instruction
|
|
|
|
in NASM's instruction table, but most of them can generate all the
|
|
|
|
useful forms without them. The prefixes are necessary only for
|
|
|
|
instructions with implicit addressing: \c{CMPSx} (\k{insCMPSB}),
|
|
|
|
\c{SCASx} (\k{insSCASB}), \c{LODSx} (\k{insLODSB}), \c{STOSx}
|
|
|
|
(\k{insSTOSB}), \c{MOVSx} (\k{insMOVSB}), \c{INSx} (\k{insINSB}),
|
|
|
|
\c{OUTSx} (\k{insOUTSB}), and \c{XLATB} (\k{insXLATB}). Also, the
|
|
|
|
various push and pop instructions (\c{PUSHA} and \c{POPF} as well as
|
|
|
|
the more usual \c{PUSH} and \c{POP}) can accept \c{a16} or \c{a32}
|
|
|
|
prefixes to force a particular one of \c{SP} or \c{ESP} to be used
|
|
|
|
as a stack pointer, in case the stack segment in use is a different
|
|
|
|
size from the code segment.
|
|
|
|
|
|
|
|
\c{PUSH} and \c{POP}, when applied to segment registers in 32-bit
|
|
|
|
mode, also have the slightly odd behaviour that they push and pop 4
|
|
|
|
bytes at a time, of which the top two are ignored and the bottom two
|
|
|
|
give the value of the segment register being manipulated. To force
|
|
|
|
the 16-bit behaviour of segment-register push and pop instructions,
|
|
|
|
you can use the operand-size prefix \i\c{o16}:
|
|
|
|
|
|
|
|
\c o16 push ss
|
|
|
|
\c o16 push ds
|
|
|
|
|
|
|
|
This code saves a doubleword of stack space by fitting two segment
|
|
|
|
registers into the space which would normally be consumed by pushing
|
|
|
|
one.
|
|
|
|
|
|
|
|
(You can also use the \i\c{o32} prefix to force the 32-bit behaviour
|
|
|
|
when in 16-bit mode, but this seems less useful.)
|
|
|
|
|
|
|
|
\C{trouble} Troubleshooting
|
|
|
|
|
|
|
|
This chapter describes some of the common problems that users have
|
|
|
|
been known to encounter with NASM, and answers them. It also gives
|
|
|
|
instructions for reporting bugs in NASM if you find a difficulty
|
|
|
|
that isn't listed here.
|
|
|
|
|
|
|
|
\H{problems} Common Problems
|
|
|
|
|
|
|
|
\S{inefficient} NASM Generates \i{Inefficient Code}
|
|
|
|
|
|
|
|
I get a lot of `bug' reports about NASM generating inefficient, or
|
|
|
|
even `wrong', code on instructions such as \c{ADD ESP,8}. This is a
|
|
|
|
deliberate design feature, connected to predictability of output:
|
|
|
|
NASM, on seeing \c{ADD ESP,8}, will generate the form of the
|
|
|
|
instruction which leaves room for a 32-bit offset. You need to code
|
|
|
|
\I\c{BYTE}\c{ADD ESP,BYTE 8} if you want the space-efficient
|
|
|
|
form of the instruction. This isn't a bug: at worst it's a
|
|
|
|
misfeature, and that's a matter of opinion only.
|
|
|
|
|
|
|
|
\S{jmprange} My Jumps are Out of Range\I{out of range, jumps}
|
|
|
|
|
|
|
|
Similarly, people complain that when they issue \i{conditional
|
|
|
|
jumps} (which are \c{SHORT} by default) that try to jump too far,
|
|
|
|
NASM reports `short jump out of range' instead of making the jumps
|
|
|
|
longer.
|
|
|
|
|
|
|
|
This, again, is partly a predictability issue, but in fact has a
|
|
|
|
more practical reason as well. NASM has no means of being told what
|
|
|
|
type of processor the code it is generating will be run on; so it
|
|
|
|
cannot decide for itself that it should generate \i\c{Jcc NEAR} type
|
|
|
|
instructions, because it doesn't know that it's working for a 386 or
|
|
|
|
above. Alternatively, it could replace the out-of-range short
|
|
|
|
\c{JNE} instruction with a very short \c{JE} instruction that jumps
|
|
|
|
over a \c{JMP NEAR}; this is a sensible solution for processors
|
|
|
|
below a 386, but hardly efficient on processors which have good
|
|
|
|
branch prediction \e{and} could have used \c{JNE NEAR} instead. So,
|
|
|
|
once again, it's up to the user, not the assembler, to decide what
|
|
|
|
instructions should be generated.
|
|
|
|
|
|
|
|
\S{proborg} \i\c{ORG} Doesn't Work
|
|
|
|
|
|
|
|
People writing \i{boot sector} programs in the \c{bin} format often
|
|
|
|
complain that \c{ORG} doesn't work the way they'd like: in order to
|
|
|
|
place the \c{0xAA55} signature word at the end of a 512-byte boot
|
|
|
|
sector, people who are used to MASM tend to code
|
|
|
|
|
|
|
|
\c ORG 0
|
|
|
|
\c ; some boot sector code
|
|
|
|
\c ORG 510
|
|
|
|
\c DW 0xAA55
|
|
|
|
|
|
|
|
This is not the intended use of the \c{ORG} directive in NASM, and
|
|
|
|
will not work. The correct way to solve this problem in NASM is to
|
|
|
|
use the \i\c{TIMES} directive, like this:
|
|
|
|
|
|
|
|
\c ORG 0
|
|
|
|
\c ; some boot sector code
|
|
|
|
\c TIMES 510-($-$$) DB 0
|
|
|
|
\c DW 0xAA55
|
|
|
|
|
|
|
|
The \c{TIMES} directive will insert exactly enough zero bytes into
|
|
|
|
the output to move the assembly point up to 510. This method also
|
|
|
|
has the advantage that if you accidentally fill your boot sector too
|
|
|
|
full, NASM will catch the problem at assembly time and report it, so
|
|
|
|
you won't end up with a boot sector that you have to disassemble to
|
|
|
|
find out what's wrong with it.
|
|
|
|
|
|
|
|
\S{probtimes} \i\c{TIMES} Doesn't Work
|
|
|
|
|
|
|
|
The other common problem with the above code is people who write the
|
|
|
|
\c{TIMES} line as
|
|
|
|
|
|
|
|
\c TIMES 510-$ DB 0
|
|
|
|
|
|
|
|
by reasoning that \c{$} should be a pure number, just like 510, so
|
|
|
|
the difference between them is also a pure number and can happily be
|
|
|
|
fed to \c{TIMES}.
|
|
|
|
|
|
|
|
NASM is a \e{modular} assembler: the various component parts are
|
|
|
|
designed to be easily separable for re-use, so they don't exchange
|
|
|
|
information unnecessarily. In consequence, the \c{bin} output
|
|
|
|
format, even though it has been told by the \c{ORG} directive that
|
|
|
|
the \c{.text} section should start at 0, does not pass that
|
|
|
|
information back to the expression evaluator. So from the
|
|
|
|
evaluator's point of view, \c{$} isn't a pure number: it's an offset
|
|
|
|
from a section base. Therefore the difference between \c{$} and 510
|
|
|
|
is also not a pure number, but involves a section base. Values
|
|
|
|
involving section bases cannot be passed as arguments to \c{TIMES}.
|
|
|
|
|
|
|
|
The solution, as in the previous section, is to code the \c{TIMES}
|
|
|
|
line in the form
|
|
|
|
|
|
|
|
\c TIMES 510-($-$$) DB 0
|
|
|
|
|
|
|
|
in which \c{$} and \c{$$} are offsets from the same section base,
|
|
|
|
and so their difference is a pure number. This will solve the
|
|
|
|
problem and generate sensible code.
|
|
|
|
|
|
|
|
\H{bugs} \i{Bugs}\I{reporting bugs}
|
|
|
|
|
|
|
|
We have never yet released a version of NASM with any \e{known}
|
|
|
|
bugs. That doesn't usually stop there being plenty we didn't know
|
|
|
|
about, though. Any that you find should be reported to
|
2002-05-01 04:58:18 +08:00
|
|
|
\W{mailto:hpa@zytor.com}\c{hpa@zytor.com}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
Please read \k{qstart} first, and don't report the bug if it's
|
|
|
|
listed in there as a deliberate feature. (If you think the feature
|
|
|
|
is badly thought out, feel free to send us reasons why you think it
|
|
|
|
should be changed, but don't just send us mail saying `This is a
|
|
|
|
bug' if the documentation says we did it on purpose.) Then read
|
|
|
|
\k{problems}, and don't bother reporting the bug if it's listed
|
|
|
|
there.
|
|
|
|
|
|
|
|
If you do report a bug, \e{please} give us all of the following
|
|
|
|
information:
|
|
|
|
|
|
|
|
\b What operating system you're running NASM under. DOS, Linux,
|
|
|
|
NetBSD, Win16, Win32, VMS (I'd be impressed), whatever.
|
|
|
|
|
|
|
|
\b If you're running NASM under DOS or Win32, tell us whether you've
|
|
|
|
compiled your own executable from the DOS source archive, or whether
|
|
|
|
you were using the standard distribution binaries out of the
|
|
|
|
archive. If you were using a locally built executable, try to
|
|
|
|
reproduce the problem using one of the standard binaries, as this
|
|
|
|
will make it easier for us to reproduce your problem prior to fixing
|
|
|
|
it.
|
|
|
|
|
|
|
|
\b Which version of NASM you're using, and exactly how you invoked
|
|
|
|
it. Give us the precise command line, and the contents of the
|
|
|
|
\c{NASM} environment variable if any.
|
|
|
|
|
|
|
|
\b Which versions of any supplementary programs you're using, and
|
|
|
|
how you invoked them. If the problem only becomes visible at link
|
|
|
|
time, tell us what linker you're using, what version of it you've
|
|
|
|
got, and the exact linker command line. If the problem involves
|
|
|
|
linking against object files generated by a compiler, tell us what
|
|
|
|
compiler, what version, and what command line or options you used.
|
|
|
|
(If you're compiling in an IDE, please try to reproduce the problem
|
|
|
|
with the command-line version of the compiler.)
|
|
|
|
|
|
|
|
\b If at all possible, send us a NASM source file which exhibits the
|
|
|
|
problem. If this causes copyright problems (e.g. you can only
|
|
|
|
reproduce the bug in restricted-distribution code) then bear in mind
|
|
|
|
the following two points: firstly, we guarantee that any source code
|
|
|
|
sent to us for the purposes of debugging NASM will be used \e{only}
|
|
|
|
for the purposes of debugging NASM, and that we will delete all our
|
|
|
|
copies of it as soon as we have found and fixed the bug or bugs in
|
|
|
|
question; and secondly, we would prefer \e{not} to be mailed large
|
|
|
|
chunks of code anyway. The smaller the file, the better. A
|
|
|
|
three-line sample file that does nothing useful \e{except}
|
|
|
|
demonstrate the problem is much easier to work with than a
|
|
|
|
fully fledged ten-thousand-line program. (Of course, some errors
|
|
|
|
\e{do} only crop up in large files, so this may not be possible.)
|
|
|
|
|
|
|
|
\b A description of what the problem actually \e{is}. `It doesn't
|
|
|
|
work' is \e{not} a helpful description! Please describe exactly what
|
|
|
|
is happening that shouldn't be, or what isn't happening that should.
|
|
|
|
Examples might be: `NASM generates an error message saying Line 3
|
|
|
|
for an error that's actually on Line 5'; `NASM generates an error
|
|
|
|
message that I believe it shouldn't be generating at all'; `NASM
|
|
|
|
fails to generate an error message that I believe it \e{should} be
|
|
|
|
generating'; `the object file produced from this source code crashes
|
|
|
|
my linker'; `the ninth byte of the output file is 66 and I think it
|
|
|
|
should be 77 instead'.
|
|
|
|
|
|
|
|
\b If you believe the output file from NASM to be faulty, send it to
|
|
|
|
us. That allows us to determine whether our own copy of NASM
|
|
|
|
generates the same file, or whether the problem is related to
|
|
|
|
portability issues between our development platforms and yours. We
|
|
|
|
can handle binary files mailed to us as MIME attachments, uuencoded,
|
|
|
|
and even BinHex. Alternatively, we may be able to provide an FTP
|
|
|
|
site you can upload the suspect files to; but mailing them is easier
|
|
|
|
for us.
|
|
|
|
|
|
|
|
\b Any other information or data files that might be helpful. If,
|
|
|
|
for example, the problem involves NASM failing to generate an object
|
|
|
|
file while TASM can generate an equivalent file without trouble,
|
|
|
|
then send us \e{both} object files, so we can see what TASM is doing
|
|
|
|
differently from us.
|
|
|
|
|
|
|
|
\A{iref} Intel x86 Instruction Reference
|
|
|
|
|
|
|
|
This appendix provides a complete list of the machine instructions
|
|
|
|
which NASM will assemble, and a short description of the function of
|
|
|
|
each one.
|
|
|
|
|
|
|
|
It is not intended to be exhaustive documentation on the fine
|
|
|
|
details of the instructions' function, such as which exceptions they
|
|
|
|
can trigger: for such documentation, you should go to Intel's Web
|
2002-05-01 04:58:18 +08:00
|
|
|
site, \W{http://www.intel.com/}\c{http://www.intel.com/}.
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
Instead, this appendix is intended primarily to provide
|
|
|
|
documentation on the way the instructions may be used within NASM.
|
|
|
|
For example, looking up \c{LOOP} will tell you that NASM allows
|
|
|
|
\c{CX} or \c{ECX} to be specified as an optional second argument to
|
|
|
|
the \c{LOOP} instruction, to enforce which of the two possible
|
|
|
|
counter registers should be used if the default is not the one
|
|
|
|
desired.
|
|
|
|
|
|
|
|
The instructions are not quite listed in alphabetical order, since
|
|
|
|
groups of instructions with similar functions are lumped together in
|
|
|
|
the same entry. Most of them don't move very far from their
|
|
|
|
alphabetic position because of this.
|
|
|
|
|
|
|
|
\H{iref-opr} Key to Operand Specifications
|
|
|
|
|
|
|
|
The instruction descriptions in this appendix specify their operands
|
|
|
|
using the following notation:
|
|
|
|
|
|
|
|
\b Registers: \c{reg8} denotes an 8-bit \i{general purpose
|
|
|
|
register}, \c{reg16} denotes a 16-bit general purpose register, and
|
|
|
|
\c{reg32} a 32-bit one. \c{fpureg} denotes one of the eight FPU
|
|
|
|
stack registers, \c{mmxreg} denotes one of the eight 64-bit MMX
|
|
|
|
registers, and \c{segreg} denotes a segment register. In addition,
|
|
|
|
some registers (such as \c{AL}, \c{DX} or
|
|
|
|
\c{ECX}) may be specified explicitly.
|
|
|
|
|
|
|
|
\b Immediate operands: \c{imm} denotes a generic \i{immediate operand}.
|
|
|
|
\c{imm8}, \c{imm16} and \c{imm32} are used when the operand is
|
|
|
|
intended to be a specific size. For some of these instructions, NASM
|
|
|
|
needs an explicit specifier: for example, \c{ADD ESP,16} could be
|
|
|
|
interpreted as either \c{ADD r/m32,imm32} or \c{ADD r/m32,imm8}.
|
|
|
|
NASM chooses the former by default, and so you must specify \c{ADD
|
|
|
|
ESP,BYTE 16} for the latter.
|
|
|
|
|
|
|
|
\b Memory references: \c{mem} denotes a generic \i{memory reference};
|
|
|
|
\c{mem8}, \c{mem16}, \c{mem32}, \c{mem64} and \c{mem80} are used
|
|
|
|
when the operand needs to be a specific size. Again, a specifier is
|
|
|
|
needed in some cases: \c{DEC [address]} is ambiguous and will be
|
|
|
|
rejected by NASM. You must specify \c{DEC BYTE [address]}, \c{DEC
|
|
|
|
WORD [address]} or \c{DEC DWORD [address]} instead.
|
|
|
|
|
|
|
|
\b \i{Restricted memory references}: one form of the \c{MOV}
|
|
|
|
instruction allows a memory address to be specified \e{without}
|
|
|
|
allowing the normal range of register combinations and effective
|
|
|
|
address processing. This is denoted by \c{memoffs8}, \c{memoffs16}
|
|
|
|
and \c{memoffs32}.
|
|
|
|
|
|
|
|
\b Register or memory choices: many instructions can accept either a
|
|
|
|
register \e{or} a memory reference as an operand. \c{r/m8} is a
|
|
|
|
shorthand for \c{reg8/mem8}; similarly \c{r/m16} and \c{r/m32}.
|
|
|
|
\c{r/m64} is MMX-related, and is a shorthand for \c{mmxreg/mem64}.
|
|
|
|
|
|
|
|
\H{iref-opc} Key to Opcode Descriptions
|
|
|
|
|
|
|
|
This appendix also provides the opcodes which NASM will generate for
|
|
|
|
each form of each instruction. The opcodes are listed in the
|
|
|
|
following way:
|
|
|
|
|
|
|
|
\b A hex number, such as \c{3F}, indicates a fixed byte containing
|
|
|
|
that number.
|
|
|
|
|
|
|
|
\b A hex number followed by \c{+r}, such as \c{C8+r}, indicates that
|
|
|
|
one of the operands to the instruction is a register, and the
|
|
|
|
`register value' of that register should be added to the hex number
|
|
|
|
to produce the generated byte. For example, EDX has register value
|
|
|
|
2, so the code \c{C8+r}, when the register operand is EDX, generates
|
|
|
|
the hex byte \c{CA}. Register values for specific registers are
|
|
|
|
given in \k{iref-rv}.
|
|
|
|
|
|
|
|
\b A hex number followed by \c{+cc}, such as \c{40+cc}, indicates
|
|
|
|
that the instruction name has a condition code suffix, and the
|
|
|
|
numeric representation of the condition code should be added to the
|
|
|
|
hex number to produce the generated byte. For example, the code
|
|
|
|
\c{40+cc}, when the instruction contains the \c{NE} condition,
|
|
|
|
generates the hex byte \c{45}. Condition codes and their numeric
|
|
|
|
representations are given in \k{iref-cc}.
|
|
|
|
|
|
|
|
\b A slash followed by a digit, such as \c{/2}, indicates that one
|
|
|
|
of the operands to the instruction is a memory address or register
|
|
|
|
(denoted \c{mem} or \c{r/m}, with an optional size). This is to be
|
|
|
|
encoded as an effective address, with a \i{ModR/M byte}, an optional
|
|
|
|
\i{SIB byte}, and an optional displacement, and the spare (register)
|
|
|
|
field of the ModR/M byte should be the digit given (which will be
|
|
|
|
from 0 to 7, so it fits in three bits). The encoding of effective
|
|
|
|
addresses is given in \k{iref-ea}.
|
|
|
|
|
|
|
|
\b The code \c{/r} combines the above two: it indicates that one of
|
|
|
|
the operands is a memory address or \c{r/m}, and another is a
|
|
|
|
register, and that an effective address should be generated with the
|
|
|
|
spare (register) field in the ModR/M byte being equal to the
|
|
|
|
`register value' of the register operand. The encoding of effective
|
|
|
|
addresses is given in \k{iref-ea}; register values are given in
|
|
|
|
\k{iref-rv}.
|
|
|
|
|
|
|
|
\b The codes \c{ib}, \c{iw} and \c{id} indicate that one of the
|
|
|
|
operands to the instruction is an immediate value, and that this is
|
|
|
|
to be encoded as a byte, little-endian word or little-endian
|
|
|
|
doubleword respectively.
|
|
|
|
|
|
|
|
\b The codes \c{rb}, \c{rw} and \c{rd} indicate that one of the
|
|
|
|
operands to the instruction is an immediate value, and that the
|
|
|
|
\e{difference} between this value and the address of the end of the
|
|
|
|
instruction is to be encoded as a byte, word or doubleword
|
|
|
|
respectively. Where the form \c{rw/rd} appears, it indicates that
|
|
|
|
either \c{rw} or \c{rd} should be used according to whether assembly
|
|
|
|
is being performed in \c{BITS 16} or \c{BITS 32} state respectively.
|
|
|
|
|
|
|
|
\b The codes \c{ow} and \c{od} indicate that one of the operands to
|
|
|
|
the instruction is a reference to the contents of a memory address
|
|
|
|
specified as an immediate value: this encoding is used in some forms
|
|
|
|
of the \c{MOV} instruction in place of the standard
|
|
|
|
effective-address mechanism. The displacement is encoded as a word
|
|
|
|
or doubleword. Again, \c{ow/od} denotes that \c{ow} or \c{od} should
|
|
|
|
be chosen according to the \c{BITS} setting.
|
|
|
|
|
|
|
|
\b The codes \c{o16} and \c{o32} indicate that the given form of the
|
|
|
|
instruction should be assembled with operand size 16 or 32 bits. In
|
|
|
|
other words, \c{o16} indicates a \c{66} prefix in \c{BITS 32} state,
|
|
|
|
but generates no code in \c{BITS 16} state; and \c{o32} indicates a
|
|
|
|
\c{66} prefix in \c{BITS 16} state but generates nothing in \c{BITS
|
|
|
|
32}.
|
|
|
|
|
|
|
|
\b The codes \c{a16} and \c{a32}, similarly to \c{o16} and \c{o32},
|
|
|
|
indicate the address size of the given form of the instruction.
|
|
|
|
Where this does not match the \c{BITS} setting, a \c{67} prefix is
|
|
|
|
required.
|
|
|
|
|
|
|
|
\S{iref-rv} Register Values
|
|
|
|
|
|
|
|
Where an instruction requires a register value, it is already
|
|
|
|
implicit in the encoding of the rest of the instruction what type of
|
|
|
|
register is intended: an 8-bit general-purpose register, a segment
|
|
|
|
register, a debug register, an MMX register, or whatever. Therefore
|
|
|
|
there is no problem with registers of different types sharing an
|
|
|
|
encoding value.
|
|
|
|
|
|
|
|
The encodings for the various classes of register are:
|
|
|
|
|
|
|
|
\b 8-bit general registers: \c{AL} is 0, \c{CL} is 1, \c{DL} is 2,
|
|
|
|
\c{BL} is 3, \c{AH} is 4, \c{CH} is 5, \c{DH} is 6, and \c{BH} is
|
|
|
|
7.
|
|
|
|
|
|
|
|
\b 16-bit general registers: \c{AX} is 0, \c{CX} is 1, \c{DX} is 2,
|
|
|
|
\c{BX} is 3, \c{SP} is 4, \c{BP} is 5, \c{SI} is 6, and \c{DI} is 7.
|
|
|
|
|
|
|
|
\b 32-bit general registers: \c{EAX} is 0, \c{ECX} is 1, \c{EDX} is
|
|
|
|
2, \c{EBX} is 3, \c{ESP} is 4, \c{EBP} is 5, \c{ESI} is 6, and
|
|
|
|
\c{EDI} is 7.
|
|
|
|
|
|
|
|
\b \i{Segment registers}: \c{ES} is 0, \c{CS} is 1, \c{SS} is 2, \c{DS}
|
|
|
|
is 3, \c{FS} is 4, and \c{GS} is 5.
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\b \I{floating-point, registers}Floating-point registers: \c{ST0}
|
2002-05-01 04:52:49 +08:00
|
|
|
is 0, \c{ST1} is 1, \c{ST2} is 2, \c{ST3} is 3, \c{ST4} is 4,
|
|
|
|
\c{ST5} is 5, \c{ST6} is 6, and \c{ST7} is 7.
|
|
|
|
|
|
|
|
\b 64-bit \i{MMX registers}: \c{MM0} is 0, \c{MM1} is 1, \c{MM2} is 2,
|
|
|
|
\c{MM3} is 3, \c{MM4} is 4, \c{MM5} is 5, \c{MM6} is 6, and \c{MM7}
|
|
|
|
is 7.
|
|
|
|
|
|
|
|
\b \i{Control registers}: \c{CR0} is 0, \c{CR2} is 2, \c{CR3} is 3,
|
|
|
|
and \c{CR4} is 4.
|
|
|
|
|
|
|
|
\b \i{Debug registers}: \c{DR0} is 0, \c{DR1} is 1, \c{DR2} is 2,
|
|
|
|
\c{DR3} is 3, \c{DR6} is 6, and \c{DR7} is 7.
|
|
|
|
|
|
|
|
\b \i{Test registers}: \c{TR3} is 3, \c{TR4} is 4, \c{TR5} is 5,
|
|
|
|
\c{TR6} is 6, and \c{TR7} is 7.
|
|
|
|
|
|
|
|
(Note that wherever a register name contains a number, that number
|
|
|
|
is also the register value for that register.)
|
|
|
|
|
|
|
|
\S{iref-cc} \i{Condition Codes}
|
|
|
|
|
|
|
|
The available condition codes are given here, along with their
|
|
|
|
numeric representations as part of opcodes. Many of these condition
|
|
|
|
codes have synonyms, so several will be listed at a time.
|
|
|
|
|
|
|
|
In the following descriptions, the word `either', when applied to two
|
|
|
|
possible trigger conditions, is used to mean `either or both'. If
|
|
|
|
`either but not both' is meant, the phrase `exactly one of' is used.
|
|
|
|
|
|
|
|
\b \c{O} is 0 (trigger if the overflow flag is set); \c{NO} is 1.
|
|
|
|
|
|
|
|
\b \c{B}, \c{C} and \c{NAE} are 2 (trigger if the carry flag is
|
|
|
|
set); \c{AE}, \c{NB} and \c{NC} are 3.
|
|
|
|
|
|
|
|
\b \c{E} and \c{Z} are 4 (trigger if the zero flag is set); \c{NE}
|
|
|
|
and \c{NZ} are 5.
|
|
|
|
|
|
|
|
\b \c{BE} and \c{NA} are 6 (trigger if either of the carry or zero
|
|
|
|
flags is set); \c{A} and \c{NBE} are 7.
|
|
|
|
|
|
|
|
\b \c{S} is 8 (trigger if the sign flag is set); \c{NS} is 9.
|
|
|
|
|
|
|
|
\b \c{P} and \c{PE} are 10 (trigger if the parity flag is set);
|
|
|
|
\c{NP} and \c{PO} are 11.
|
|
|
|
|
|
|
|
\b \c{L} and \c{NGE} are 12 (trigger if exactly one of the sign and
|
|
|
|
overflow flags is set); \c{GE} and \c{NL} are 13.
|
|
|
|
|
|
|
|
\b \c{LE} and \c{NG} are 14 (trigger if either the zero flag is set,
|
|
|
|
or exactly one of the sign and overflow flags is set); \c{G} and
|
|
|
|
\c{NLE} are 15.
|
|
|
|
|
|
|
|
Note that in all cases, the sense of a condition code may be
|
|
|
|
reversed by changing the low bit of the numeric representation.
|
|
|
|
|
|
|
|
\S{iref-ea} Effective Address Encoding: \i{ModR/M} and \i{SIB}
|
|
|
|
|
|
|
|
An \i{effective address} is encoded in up to three parts: a ModR/M
|
|
|
|
byte, an optional SIB byte, and an optional byte, word or doubleword
|
|
|
|
displacement field.
|
|
|
|
|
|
|
|
The ModR/M byte consists of three fields: the \c{mod} field, ranging
|
|
|
|
from 0 to 3, in the upper two bits of the byte, the \c{r/m} field,
|
|
|
|
ranging from 0 to 7, in the lower three bits, and the spare
|
|
|
|
(register) field in the middle (bit 3 to bit 5). The spare field is
|
|
|
|
not relevant to the effective address being encoded, and either
|
|
|
|
contains an extension to the instruction opcode or the register
|
|
|
|
value of another operand.
|
|
|
|
|
|
|
|
The ModR/M system can be used to encode a direct register reference
|
|
|
|
rather than a memory access. This is always done by setting the
|
|
|
|
\c{mod} field to 3 and the \c{r/m} field to the register value of
|
|
|
|
the register in question (it must be a general-purpose register, and
|
|
|
|
the size of the register must already be implicit in the encoding of
|
|
|
|
the rest of the instruction). In this case, the SIB byte and
|
|
|
|
displacement field are both absent.
|
|
|
|
|
|
|
|
In 16-bit addressing mode (either \c{BITS 16} with no \c{67} prefix,
|
|
|
|
or \c{BITS 32} with a \c{67} prefix), the SIB byte is never used.
|
|
|
|
The general rules for \c{mod} and \c{r/m} (there is an exception,
|
|
|
|
given below) are:
|
|
|
|
|
|
|
|
\b The \c{mod} field gives the length of the displacement field: 0
|
|
|
|
means no displacement, 1 means one byte, and 2 means two bytes.
|
|
|
|
|
|
|
|
\b The \c{r/m} field encodes the combination of registers to be
|
|
|
|
added to the displacement to give the accessed address: 0 means
|
|
|
|
\c{BX+SI}, 1 means \c{BX+DI}, 2 means \c{BP+SI}, 3 means \c{BP+DI},
|
|
|
|
4 means \c{SI} only, 5 means \c{DI} only, 6 means \c{BP} only, and 7
|
|
|
|
means \c{BX} only.
|
|
|
|
|
|
|
|
However, there is a special case:
|
|
|
|
|
|
|
|
\b If \c{mod} is 0 and \c{r/m} is 6, the effective address encoded
|
|
|
|
is not \c{[BP]} as the above rules would suggest, but instead
|
|
|
|
\c{[disp16]}: the displacement field is present and is two bytes
|
|
|
|
long, and no registers are added to the displacement.
|
|
|
|
|
|
|
|
Therefore the effective address \c{[BP]} cannot be encoded as
|
|
|
|
efficiently as \c{[BX]}; so if you code \c{[BP]} in a program, NASM
|
|
|
|
adds a notional 8-bit zero displacement, and sets \c{mod} to 1,
|
|
|
|
\c{r/m} to 6, and the one-byte displacement field to 0.
|
|
|
|
|
|
|
|
In 32-bit addressing mode (either \c{BITS 16} with a \c{67} prefix,
|
|
|
|
or \c{BITS 32} with no \c{67} prefix) the general rules (again,
|
|
|
|
there are exceptions) for \c{mod} and \c{r/m} are:
|
|
|
|
|
|
|
|
\b The \c{mod} field gives the length of the displacement field: 0
|
|
|
|
means no displacement, 1 means one byte, and 2 means four bytes.
|
|
|
|
|
|
|
|
\b If only one register is to be added to the displacement, and it
|
|
|
|
is not \c{ESP}, the \c{r/m} field gives its register value, and the
|
|
|
|
SIB byte is absent. If the \c{r/m} field is 4 (which would encode
|
|
|
|
\c{ESP}), the SIB byte is present and gives the combination and
|
|
|
|
scaling of registers to be added to the displacement.
|
|
|
|
|
|
|
|
If the SIB byte is present, it describes the combination of
|
|
|
|
registers (an optional base register, and an optional index register
|
|
|
|
scaled by multiplication by 1, 2, 4 or 8) to be added to the
|
|
|
|
displacement. The SIB byte is divided into the \c{scale} field, in
|
|
|
|
the top two bits, the \c{index} field in the next three, and the
|
|
|
|
\c{base} field in the bottom three. The general rules are:
|
|
|
|
|
|
|
|
\b The \c{base} field encodes the register value of the base
|
|
|
|
register.
|
|
|
|
|
|
|
|
\b The \c{index} field encodes the register value of the index
|
|
|
|
register, unless it is 4, in which case no index register is used
|
|
|
|
(so \c{ESP} cannot be used as an index register).
|
|
|
|
|
|
|
|
\b The \c{scale} field encodes the multiplier by which the index
|
|
|
|
register is scaled before adding it to the base and displacement: 0
|
|
|
|
encodes a multiplier of 1, 1 encodes 2, 2 encodes 4 and 3 encodes 8.
|
|
|
|
|
|
|
|
The exceptions to the 32-bit encoding rules are:
|
|
|
|
|
|
|
|
\b If \c{mod} is 0 and \c{r/m} is 5, the effective address encoded
|
|
|
|
is not \c{[EBP]} as the above rules would suggest, but instead
|
|
|
|
\c{[disp32]}: the displacement field is present and is four bytes
|
|
|
|
long, and no registers are added to the displacement.
|
|
|
|
|
|
|
|
\b If \c{mod} is 0, \c{r/m} is 4 (meaning the SIB byte is present)
|
|
|
|
and \c{base} is 4, the effective address encoded is not
|
|
|
|
\c{[EBP+index]} as the above rules would suggest, but instead
|
|
|
|
\c{[disp32+index]}: the displacement field is present and is four
|
|
|
|
bytes long, and there is no base register (but the index register is
|
|
|
|
still processed in the normal way).
|
|
|
|
|
|
|
|
\H{iref-flg} Key to Instruction Flags
|
|
|
|
|
|
|
|
Given along with each instruction in this appendix is a set of
|
|
|
|
flags, denoting the type of the instruction. The types are as follows:
|
|
|
|
|
|
|
|
\b \c{8086}, \c{186}, \c{286}, \c{386}, \c{486}, \c{PENT} and \c{P6}
|
|
|
|
denote the lowest processor type that supports the instruction. Most
|
|
|
|
instructions run on all processors above the given type; those that
|
|
|
|
do not are documented. The Pentium II contains no additional
|
|
|
|
instructions beyond the P6 (Pentium Pro); from the point of view of
|
|
|
|
its instruction set, it can be thought of as a P6 with MMX
|
|
|
|
capability.
|
|
|
|
|
|
|
|
\b \c{CYRIX} indicates that the instruction is specific to Cyrix
|
|
|
|
processors, for example the extra MMX instructions in the Cyrix
|
|
|
|
extended MMX instruction set.
|
|
|
|
|
|
|
|
\b \c{FPU} indicates that the instruction is a floating-point one,
|
|
|
|
and will only run on machines with a coprocessor (automatically
|
|
|
|
including 486DX, Pentium and above).
|
|
|
|
|
|
|
|
\b \c{MMX} indicates that the instruction is an MMX one, and will
|
|
|
|
run on MMX-capable Pentium processors and the Pentium II.
|
|
|
|
|
|
|
|
\b \c{PRIV} indicates that the instruction is a protected-mode
|
|
|
|
management instruction. Many of these may only be used in protected
|
|
|
|
mode, or only at privilege level zero.
|
|
|
|
|
|
|
|
\b \c{UNDOC} indicates that the instruction is an undocumented one,
|
|
|
|
and not part of the official Intel Architecture; it may or may not
|
|
|
|
be supported on any given machine.
|
|
|
|
|
|
|
|
\H{insAAA} \i\c{AAA}, \i\c{AAS}, \i\c{AAM}, \i\c{AAD}: ASCII
|
|
|
|
Adjustments
|
|
|
|
|
|
|
|
\c AAA ; 37 [8086]
|
|
|
|
|
|
|
|
\c AAS ; 3F [8086]
|
|
|
|
|
|
|
|
\c AAD ; D5 0A [8086]
|
|
|
|
\c AAD imm ; D5 ib [8086]
|
|
|
|
|
|
|
|
\c AAM ; D4 0A [8086]
|
|
|
|
\c AAM imm ; D4 ib [8086]
|
|
|
|
|
|
|
|
These instructions are used in conjunction with the add, subtract,
|
|
|
|
multiply and divide instructions to perform binary-coded decimal
|
|
|
|
arithmetic in \e{unpacked} (one BCD digit per byte - easy to
|
|
|
|
translate to and from ASCII, hence the instruction names) form.
|
|
|
|
There are also packed BCD instructions \c{DAA} and \c{DAS}: see
|
|
|
|
\k{insDAA}.
|
|
|
|
|
|
|
|
\c{AAA} should be used after a one-byte \c{ADD} instruction whose
|
|
|
|
destination was the \c{AL} register: by means of examining the value
|
|
|
|
in the low nibble of \c{AL} and also the auxiliary carry flag
|
|
|
|
\c{AF}, it determines whether the addition has overflowed, and
|
|
|
|
adjusts it (and sets the carry flag) if so. You can add long BCD
|
|
|
|
strings together by doing \c{ADD}/\c{AAA} on the low digits, then
|
|
|
|
doing \c{ADC}/\c{AAA} on each subsequent digit.
|
|
|
|
|
|
|
|
\c{AAS} works similarly to \c{AAA}, but is for use after \c{SUB}
|
|
|
|
instructions rather than \c{ADD}.
|
|
|
|
|
|
|
|
\c{AAM} is for use after you have multiplied two decimal digits
|
|
|
|
together and left the result in \c{AL}: it divides \c{AL} by ten and
|
|
|
|
stores the quotient in \c{AH}, leaving the remainder in \c{AL}. The
|
|
|
|
divisor 10 can be changed by specifying an operand to the
|
|
|
|
instruction: a particularly handy use of this is \c{AAM 16}, causing
|
|
|
|
the two nibbles in \c{AL} to be separated into \c{AH} and \c{AL}.
|
|
|
|
|
|
|
|
\c{AAD} performs the inverse operation to \c{AAM}: it multiplies
|
|
|
|
\c{AH} by ten, adds it to \c{AL}, and sets \c{AH} to zero. Again,
|
|
|
|
the multiplier 10 can be changed.
|
|
|
|
|
|
|
|
\H{insADC} \i\c{ADC}: Add with Carry
|
|
|
|
|
|
|
|
\c ADC r/m8,reg8 ; 10 /r [8086]
|
|
|
|
\c ADC r/m16,reg16 ; o16 11 /r [8086]
|
|
|
|
\c ADC r/m32,reg32 ; o32 11 /r [386]
|
|
|
|
|
|
|
|
\c ADC reg8,r/m8 ; 12 /r [8086]
|
|
|
|
\c ADC reg16,r/m16 ; o16 13 /r [8086]
|
|
|
|
\c ADC reg32,r/m32 ; o32 13 /r [386]
|
|
|
|
|
|
|
|
\c ADC r/m8,imm8 ; 80 /2 ib [8086]
|
|
|
|
\c ADC r/m16,imm16 ; o16 81 /2 iw [8086]
|
|
|
|
\c ADC r/m32,imm32 ; o32 81 /2 id [386]
|
|
|
|
|
|
|
|
\c ADC r/m16,imm8 ; o16 83 /2 ib [8086]
|
|
|
|
\c ADC r/m32,imm8 ; o32 83 /2 ib [386]
|
|
|
|
|
|
|
|
\c ADC AL,imm8 ; 14 ib [8086]
|
|
|
|
\c ADC AX,imm16 ; o16 15 iw [8086]
|
|
|
|
\c ADC EAX,imm32 ; o32 15 id [386]
|
|
|
|
|
|
|
|
\c{ADC} performs integer addition: it adds its two operands
|
|
|
|
together, plus the value of the carry flag, and leaves the result in
|
|
|
|
its destination (first) operand. The flags are set according to the
|
|
|
|
result of the operation: in particular, the carry flag is affected
|
|
|
|
and can be used by a subsequent \c{ADC} instruction.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
|
|
|
To add two numbers without also adding the contents of the carry
|
|
|
|
flag, use \c{ADD} (\k{insADD}).
|
|
|
|
|
|
|
|
\H{insADD} \i\c{ADD}: Add Integers
|
|
|
|
|
|
|
|
\c ADD r/m8,reg8 ; 00 /r [8086]
|
|
|
|
\c ADD r/m16,reg16 ; o16 01 /r [8086]
|
|
|
|
\c ADD r/m32,reg32 ; o32 01 /r [386]
|
|
|
|
|
|
|
|
\c ADD reg8,r/m8 ; 02 /r [8086]
|
|
|
|
\c ADD reg16,r/m16 ; o16 03 /r [8086]
|
|
|
|
\c ADD reg32,r/m32 ; o32 03 /r [386]
|
|
|
|
|
|
|
|
\c ADD r/m8,imm8 ; 80 /0 ib [8086]
|
|
|
|
\c ADD r/m16,imm16 ; o16 81 /0 iw [8086]
|
|
|
|
\c ADD r/m32,imm32 ; o32 81 /0 id [386]
|
|
|
|
|
|
|
|
\c ADD r/m16,imm8 ; o16 83 /0 ib [8086]
|
|
|
|
\c ADD r/m32,imm8 ; o32 83 /0 ib [386]
|
|
|
|
|
|
|
|
\c ADD AL,imm8 ; 04 ib [8086]
|
|
|
|
\c ADD AX,imm16 ; o16 05 iw [8086]
|
|
|
|
\c ADD EAX,imm32 ; o32 05 id [386]
|
|
|
|
|
|
|
|
\c{ADD} performs integer addition: it adds its two operands
|
|
|
|
together, and leaves the result in its destination (first) operand.
|
|
|
|
The flags are set according to the result of the operation: in
|
|
|
|
particular, the carry flag is affected and can be used by a
|
|
|
|
subsequent \c{ADC} instruction (\k{insADC}).
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insADDPS} \i\c{ADDPS}: Packed Single FP ADD
|
|
|
|
|
|
|
|
\c ADDPS xmmreg,mem128 ; 0f 58 /r [KATMAI,SSE]
|
|
|
|
\c ADDPS xmmreg,xmmreg ; 0f 58 /r [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{ADDPS} performs addition on each of four packed SP FP
|
2002-05-01 05:01:38 +08:00
|
|
|
number items dst(0-31):=dst(0-31)+src(0-31)
|
|
|
|
, ..(63-32), etc.
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insADDSS} \i\c{ADDSS}: Scalar Single FP ADD
|
|
|
|
|
|
|
|
\c ADDSS xmmreg,mem128 ; f3 0f 58 /r [KATMAI,SSE]
|
|
|
|
\c ADDSS xmmreg,xmmreg ; f3 0f 58 /r [KATMAI,SSE]
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insAND} \i\c{AND}: Bitwise AND
|
|
|
|
|
|
|
|
\c AND r/m8,reg8 ; 20 /r [8086]
|
|
|
|
\c AND r/m16,reg16 ; o16 21 /r [8086]
|
|
|
|
\c AND r/m32,reg32 ; o32 21 /r [386]
|
|
|
|
|
|
|
|
\c AND reg8,r/m8 ; 22 /r [8086]
|
|
|
|
\c AND reg16,r/m16 ; o16 23 /r [8086]
|
|
|
|
\c AND reg32,r/m32 ; o32 23 /r [386]
|
|
|
|
|
|
|
|
\c AND r/m8,imm8 ; 80 /4 ib [8086]
|
|
|
|
\c AND r/m16,imm16 ; o16 81 /4 iw [8086]
|
|
|
|
\c AND r/m32,imm32 ; o32 81 /4 id [386]
|
|
|
|
|
|
|
|
\c AND r/m16,imm8 ; o16 83 /4 ib [8086]
|
|
|
|
\c AND r/m32,imm8 ; o32 83 /4 ib [386]
|
|
|
|
|
|
|
|
\c AND AL,imm8 ; 24 ib [8086]
|
|
|
|
\c AND AX,imm16 ; o16 25 iw [8086]
|
|
|
|
\c AND EAX,imm32 ; o32 25 id [386]
|
|
|
|
|
|
|
|
\c{AND} performs a bitwise AND operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if the corresponding
|
|
|
|
bits of the two inputs were both 1), and stores the result in the
|
|
|
|
destination (first) operand.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
|
|
|
The MMX instruction \c{PAND} (see \k{insPAND}) performs the same
|
|
|
|
operation on the 64-bit MMX registers.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insANDNPS} \i\c{ANDNPS}: Bitwise Logical AND NOT For Single FP
|
|
|
|
|
|
|
|
\c ANDNPS xmmreg,mem128 ; 0f 55 /r [KATMAI,SSE]
|
|
|
|
\c ANDNPS xmmreg,xmmreg ; 0f 55 /r [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insANDPS} \i\c{ANDPS}: Bitwise Logical AND For Single FP
|
|
|
|
|
|
|
|
\c ANDPS xmmreg,mem128 ; 0f 54 /r [KATMAI,SSE]
|
|
|
|
\c ANDPS xmmreg,xmmreg ; 0f 54 /r [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insARPL} \i\c{ARPL}: Adjust RPL Field of Selector
|
|
|
|
|
|
|
|
\c ARPL r/m16,reg16 ; 63 /r [286,PRIV]
|
|
|
|
|
|
|
|
\c{ARPL} expects its two word operands to be segment selectors. It
|
|
|
|
adjusts the RPL (requested privilege level - stored in the bottom
|
|
|
|
two bits of the selector) field of the destination (first) operand
|
|
|
|
to ensure that it is no less (i.e. no more privileged than) the RPL
|
|
|
|
field of the source operand. The zero flag is set if and only if a
|
|
|
|
change had to be made.
|
|
|
|
|
|
|
|
\H{insBOUND} \i\c{BOUND}: Check Array Index against Bounds
|
|
|
|
|
|
|
|
\c BOUND reg16,mem ; o16 62 /r [186]
|
|
|
|
\c BOUND reg32,mem ; o32 62 /r [386]
|
|
|
|
|
|
|
|
\c{BOUND} expects its second operand to point to an area of memory
|
|
|
|
containing two signed values of the same size as its first operand
|
|
|
|
(i.e. two words for the 16-bit form; two doublewords for the 32-bit
|
|
|
|
form). It performs two signed comparisons: if the value in the
|
|
|
|
register passed as its first operand is less than the first of the
|
|
|
|
in-memory values, or is greater than or equal to the second, it
|
|
|
|
throws a BR exception. Otherwise, it does nothing.
|
|
|
|
|
|
|
|
\H{insBSF} \i\c{BSF}, \i\c{BSR}: Bit Scan
|
|
|
|
|
|
|
|
\c BSF reg16,r/m16 ; o16 0F BC /r [386]
|
|
|
|
\c BSF reg32,r/m32 ; o32 0F BC /r [386]
|
|
|
|
|
|
|
|
\c BSR reg16,r/m16 ; o16 0F BD /r [386]
|
|
|
|
\c BSR reg32,r/m32 ; o32 0F BD /r [386]
|
|
|
|
|
|
|
|
\c{BSF} searches for a set bit in its source (second) operand,
|
|
|
|
starting from the bottom, and if it finds one, stores the index in
|
|
|
|
its destination (first) operand. If no set bit is found, the
|
|
|
|
contents of the destination operand are undefined.
|
|
|
|
|
|
|
|
\c{BSR} performs the same function, but searches from the top
|
|
|
|
instead, so it finds the most significant set bit.
|
|
|
|
|
|
|
|
Bit indices are from 0 (least significant) to 15 or 31 (most
|
|
|
|
significant).
|
|
|
|
|
|
|
|
\H{insBSWAP} \i\c{BSWAP}: Byte Swap
|
|
|
|
|
|
|
|
\c BSWAP reg32 ; o32 0F C8+r [486]
|
|
|
|
|
|
|
|
\c{BSWAP} swaps the order of the four bytes of a 32-bit register:
|
|
|
|
bits 0-7 exchange places with bits 24-31, and bits 8-15 swap with
|
|
|
|
bits 16-23. There is no explicit 16-bit equivalent: to byte-swap
|
|
|
|
\c{AX}, \c{BX}, \c{CX} or \c{DX}, \c{XCHG} can be used.
|
|
|
|
|
|
|
|
\H{insBT} \i\c{BT}, \i\c{BTC}, \i\c{BTR}, \i\c{BTS}: Bit Test
|
|
|
|
|
|
|
|
\c BT r/m16,reg16 ; o16 0F A3 /r [386]
|
|
|
|
\c BT r/m32,reg32 ; o32 0F A3 /r [386]
|
|
|
|
\c BT r/m16,imm8 ; o16 0F BA /4 ib [386]
|
|
|
|
\c BT r/m32,imm8 ; o32 0F BA /4 ib [386]
|
|
|
|
|
|
|
|
\c BTC r/m16,reg16 ; o16 0F BB /r [386]
|
|
|
|
\c BTC r/m32,reg32 ; o32 0F BB /r [386]
|
|
|
|
\c BTC r/m16,imm8 ; o16 0F BA /7 ib [386]
|
|
|
|
\c BTC r/m32,imm8 ; o32 0F BA /7 ib [386]
|
|
|
|
|
|
|
|
\c BTR r/m16,reg16 ; o16 0F B3 /r [386]
|
|
|
|
\c BTR r/m32,reg32 ; o32 0F B3 /r [386]
|
|
|
|
\c BTR r/m16,imm8 ; o16 0F BA /6 ib [386]
|
|
|
|
\c BTR r/m32,imm8 ; o32 0F BA /6 ib [386]
|
|
|
|
|
|
|
|
\c BTS r/m16,reg16 ; o16 0F AB /r [386]
|
|
|
|
\c BTS r/m32,reg32 ; o32 0F AB /r [386]
|
|
|
|
\c BTS r/m16,imm ; o16 0F BA /5 ib [386]
|
|
|
|
\c BTS r/m32,imm ; o32 0F BA /5 ib [386]
|
|
|
|
|
|
|
|
These instructions all test one bit of their first operand, whose
|
|
|
|
index is given by the second operand, and store the value of that
|
|
|
|
bit into the carry flag. Bit indices are from 0 (least significant)
|
|
|
|
to 15 or 31 (most significant).
|
|
|
|
|
|
|
|
In addition to storing the original value of the bit into the carry
|
|
|
|
flag, \c{BTR} also resets (clears) the bit in the operand itself.
|
|
|
|
\c{BTS} sets the bit, and \c{BTC} complements the bit. \c{BT} does
|
|
|
|
not modify its operands.
|
|
|
|
|
|
|
|
The bit offset should be no greater than the size of the operand.
|
|
|
|
|
|
|
|
\H{insCALL} \i\c{CALL}: Call Subroutine
|
|
|
|
|
|
|
|
\c CALL imm ; E8 rw/rd [8086]
|
|
|
|
\c CALL imm:imm16 ; o16 9A iw iw [8086]
|
|
|
|
\c CALL imm:imm32 ; o32 9A id iw [386]
|
|
|
|
\c CALL FAR mem16 ; o16 FF /3 [8086]
|
|
|
|
\c CALL FAR mem32 ; o32 FF /3 [386]
|
|
|
|
\c CALL r/m16 ; o16 FF /2 [8086]
|
|
|
|
\c CALL r/m32 ; o32 FF /2 [386]
|
|
|
|
|
|
|
|
\c{CALL} calls a subroutine, by means of pushing the current
|
|
|
|
instruction pointer (\c{IP}) and optionally \c{CS} as well on the
|
|
|
|
stack, and then jumping to a given address.
|
|
|
|
|
|
|
|
\c{CS} is pushed as well as \c{IP} if and only if the call is a far
|
|
|
|
call, i.e. a destination segment address is specified in the
|
|
|
|
instruction. The forms involving two colon-separated arguments are
|
|
|
|
far calls; so are the \c{CALL FAR mem} forms.
|
|
|
|
|
|
|
|
You can choose between the two immediate \i{far call} forms (\c{CALL
|
|
|
|
imm:imm}) by the use of the \c{WORD} and \c{DWORD} keywords: \c{CALL
|
|
|
|
WORD 0x1234:0x5678}) or \c{CALL DWORD 0x1234:0x56789abc}.
|
|
|
|
|
|
|
|
The \c{CALL FAR mem} forms execute a far call by loading the
|
|
|
|
destination address out of memory. The address loaded consists of 16
|
|
|
|
or 32 bits of offset (depending on the operand size), and 16 bits of
|
|
|
|
segment. The operand size may be overridden using \c{CALL WORD FAR
|
|
|
|
mem} or \c{CALL DWORD FAR mem}.
|
|
|
|
|
|
|
|
The \c{CALL r/m} forms execute a \i{near call} (within the same
|
|
|
|
segment), loading the destination address out of memory or out of a
|
|
|
|
register. The keyword \c{NEAR} may be specified, for clarity, in
|
|
|
|
these forms, but is not necessary. Again, operand size can be
|
|
|
|
overridden using \c{CALL WORD mem} or \c{CALL DWORD mem}.
|
|
|
|
|
|
|
|
As a convenience, NASM does not require you to call a far procedure
|
|
|
|
symbol by coding the cumbersome \c{CALL SEG routine:routine}, but
|
|
|
|
instead allows the easier synonym \c{CALL FAR routine}.
|
|
|
|
|
|
|
|
The \c{CALL r/m} forms given above are near calls; NASM will accept
|
|
|
|
the \c{NEAR} keyword (e.g. \c{CALL NEAR [address]}), even though it
|
|
|
|
is not strictly necessary.
|
|
|
|
|
|
|
|
\H{insCBW} \i\c{CBW}, \i\c{CWD}, \i\c{CDQ}, \i\c{CWDE}: Sign Extensions
|
|
|
|
|
|
|
|
\c CBW ; o16 98 [8086]
|
|
|
|
\c CWD ; o16 99 [8086]
|
|
|
|
\c CDQ ; o32 99 [386]
|
|
|
|
\c CWDE ; o32 98 [386]
|
|
|
|
|
|
|
|
All these instructions sign-extend a short value into a longer one,
|
|
|
|
by replicating the top bit of the original value to fill the
|
|
|
|
extended one.
|
|
|
|
|
|
|
|
\c{CBW} extends \c{AL} into \c{AX} by repeating the top bit of
|
|
|
|
\c{AL} in every bit of \c{AH}. \c{CWD} extends \c{AX} into \c{DX:AX}
|
|
|
|
by repeating the top bit of \c{AX} throughout \c{DX}. \c{CWDE}
|
|
|
|
extends \c{AX} into \c{EAX}, and \c{CDQ} extends \c{EAX} into
|
|
|
|
\c{EDX:EAX}.
|
|
|
|
|
|
|
|
\H{insCLC} \i\c{CLC}, \i\c{CLD}, \i\c{CLI}, \i\c{CLTS}: Clear Flags
|
|
|
|
|
|
|
|
\c CLC ; F8 [8086]
|
|
|
|
\c CLD ; FC [8086]
|
|
|
|
\c CLI ; FA [8086]
|
|
|
|
\c CLTS ; 0F 06 [286,PRIV]
|
|
|
|
|
|
|
|
These instructions clear various flags. \c{CLC} clears the carry
|
|
|
|
flag; \c{CLD} clears the direction flag; \c{CLI} clears the
|
|
|
|
interrupt flag (thus disabling interrupts); and \c{CLTS} clears the
|
|
|
|
task-switched (\c{TS}) flag in \c{CR0}.
|
|
|
|
|
|
|
|
To set the carry, direction, or interrupt flags, use the \c{STC},
|
|
|
|
\c{STD} and \c{STI} instructions (\k{insSTC}). To invert the carry
|
|
|
|
flag, use \c{CMC} (\k{insCMC}).
|
|
|
|
|
|
|
|
\H{insCMC} \i\c{CMC}: Complement Carry Flag
|
|
|
|
|
|
|
|
\c CMC ; F5 [8086]
|
|
|
|
|
|
|
|
\c{CMC} changes the value of the carry flag: if it was 0, it sets it
|
|
|
|
to 1, and vice versa.
|
|
|
|
|
|
|
|
\H{insCMOVcc} \i\c{CMOVcc}: Conditional Move
|
|
|
|
|
|
|
|
\c CMOVcc reg16,r/m16 ; o16 0F 40+cc /r [P6]
|
|
|
|
\c CMOVcc reg32,r/m32 ; o32 0F 40+cc /r [P6]
|
|
|
|
|
|
|
|
\c{CMOV} moves its source (second) operand into its destination
|
|
|
|
(first) operand if the given condition code is satisfied; otherwise
|
|
|
|
it does nothing.
|
|
|
|
|
|
|
|
For a list of condition codes, see \k{iref-cc}.
|
|
|
|
|
|
|
|
Although the \c{CMOV} instructions are flagged \c{P6} above, they
|
|
|
|
may not be supported by all Pentium Pro processors; the \c{CPUID}
|
|
|
|
instruction (\k{insCPUID}) will return a bit which indicates whether
|
|
|
|
conditional moves are supported.
|
|
|
|
|
|
|
|
\H{insCMP} \i\c{CMP}: Compare Integers
|
|
|
|
|
|
|
|
\c CMP r/m8,reg8 ; 38 /r [8086]
|
|
|
|
\c CMP r/m16,reg16 ; o16 39 /r [8086]
|
|
|
|
\c CMP r/m32,reg32 ; o32 39 /r [386]
|
|
|
|
|
|
|
|
\c CMP reg8,r/m8 ; 3A /r [8086]
|
|
|
|
\c CMP reg16,r/m16 ; o16 3B /r [8086]
|
|
|
|
\c CMP reg32,r/m32 ; o32 3B /r [386]
|
|
|
|
|
|
|
|
\c CMP r/m8,imm8 ; 80 /0 ib [8086]
|
|
|
|
\c CMP r/m16,imm16 ; o16 81 /0 iw [8086]
|
|
|
|
\c CMP r/m32,imm32 ; o32 81 /0 id [386]
|
|
|
|
|
|
|
|
\c CMP r/m16,imm8 ; o16 83 /0 ib [8086]
|
|
|
|
\c CMP r/m32,imm8 ; o32 83 /0 ib [386]
|
|
|
|
|
|
|
|
\c CMP AL,imm8 ; 3C ib [8086]
|
|
|
|
\c CMP AX,imm16 ; o16 3D iw [8086]
|
|
|
|
\c CMP EAX,imm32 ; o32 3D id [386]
|
|
|
|
|
|
|
|
\c{CMP} performs a `mental' subtraction of its second operand from
|
|
|
|
its first operand, and affects the flags as if the subtraction had
|
|
|
|
taken place, but does not store the result of the subtraction
|
|
|
|
anywhere.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insCMPEQPS} \i\c{CMPEQPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPEQPS xmmreg,memory ; 0f c2 /r ib [KATMAI,SSE]
|
|
|
|
\c CMPEQPS xmmreg,xmmreg ; [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{CMPPS} with condition set, re CMPPS.
|
|
|
|
|
|
|
|
\H{insCMPEQSS} \i\c{CMPEQSS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPEQSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPEQSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{CMPSS} with condition set, re CMPPS.
|
|
|
|
|
|
|
|
\H{insCMPLEPS} \i\c{CMPLEPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPLEPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPLEPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPLESS} \i\c{CMPLESS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPLESS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPLESS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPLTPS} \i\c{CMPLTPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPLTPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPLTPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPLTSS} \i\c{CMPLTSS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPLTSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPLTSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNEQPS} \i\c{CMPNEQPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPNEQPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNEQPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNEQSS} \i\c{CMPNEQSS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPNEQSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNEQSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNLEPS} \i\c{CMPNLEPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPNLEPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNLEPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNLESS} \i\c{CMPNLESS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPNLESS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNLESS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNLTPS} \i\c{CMPNLTPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPNLTPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNLTPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPNLTSS} \i\c{CMPNLTSS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPNLTSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPNLTSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPORDPS} \i\c{CMPORDPS}: Packed Single FP Compare (CMPPS)
|
|
|
|
|
|
|
|
\c CMPORDPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPORDPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPORDSS} \i\c{CMPORDSS}: Scalar Single FP Compare (CMPSS)
|
|
|
|
|
|
|
|
\c CMPORDSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPORDSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPPS} \i\c{CMPPS}: Packed Single FP Compare
|
|
|
|
|
|
|
|
\c CMPPS xmmreg,memory,immediate ; ?? [KATMAI,SSE,SB,AR2]
|
|
|
|
\c CMPPS xmmreg,xmmreg,immediate ; ?? [KATMAI,SSE,SB,AR2]
|
|
|
|
|
|
|
|
\c{CMP(cc)PS} and \c{CMP(cc)SS} conditions (cc):
|
|
|
|
EQ, LT, LE, UNORD, NEQ, NLT, NLE, ORD
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insCMPSB} \i\c{CMPSB}, \i\c{CMPSW}, \i\c{CMPSD}: Compare Strings
|
|
|
|
|
|
|
|
\c CMPSB ; A6 [8086]
|
|
|
|
\c CMPSW ; o16 A7 [8086]
|
|
|
|
\c CMPSD ; o32 A7 [386]
|
|
|
|
|
|
|
|
\c{CMPSB} compares the byte at \c{[DS:SI]} or \c{[DS:ESI]} with the
|
|
|
|
byte at \c{[ES:DI]} or \c{[ES:EDI]}, and sets the flags accordingly.
|
|
|
|
It then increments or decrements (depending on the direction flag:
|
|
|
|
increments if the flag is clear, decrements if it is set) \c{SI} and
|
|
|
|
\c{DI} (or \c{ESI} and \c{EDI}).
|
|
|
|
|
|
|
|
The registers used are \c{SI} and \c{DI} if the address size is 16
|
|
|
|
bits, and \c{ESI} and \c{EDI} if it is 32 bits. If you need to use
|
|
|
|
an address size not equal to the current \c{BITS} setting, you can
|
|
|
|
use an explicit \i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The segment register used to load from \c{[SI]} or \c{[ESI]} can be
|
|
|
|
overridden by using a segment register name as a prefix (for
|
|
|
|
example, \c{es cmpsb}). The use of \c{ES} for the load from \c{[DI]}
|
|
|
|
or \c{[EDI]} cannot be overridden.
|
|
|
|
|
|
|
|
\c{CMPSW} and \c{CMPSD} work in the same way, but they compare a
|
|
|
|
word or a doubleword instead of a byte, and increment or decrement
|
|
|
|
the addressing registers by 2 or 4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REPE} and \c{REPNE} prefixes (equivalently, \c{REPZ} and
|
|
|
|
\c{REPNZ}) may be used to repeat the instruction up to \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times until the
|
|
|
|
first unequal or equal byte is found.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
|
|
|
|
\H{insCMPSS} \i\c{CMPSS}: Scalar Single FP Compare
|
|
|
|
|
|
|
|
\c CMPSS xmmreg,memory,immediate ; ?? [KATMAI,SSE,SB,AR2]
|
|
|
|
\c CMPSS xmmreg,xmmreg,immediate ; ?? [KATMAI,SSE,SB,AR2]
|
|
|
|
|
|
|
|
\c{CMP(cc)PS} and \c{CMP(cc)SS} conditions (cc):
|
|
|
|
EQ, LT, LE, UNORD, NEQ, NLT, NLE, ORD
|
|
|
|
|
|
|
|
|
2002-05-01 05:01:38 +08:00
|
|
|
\H{insCMPUNORDPS} \i\c{CMPUNORDPS}: Packed Single FP Compare
|
|
|
|
|
|
|
|
(CMPPS)
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\c CMPUNORDPS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPUNORDPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
2002-05-01 05:01:38 +08:00
|
|
|
\H{insCMPUNORDSS} \i\c{CMPUNORDSS}: Scalar Single FP Compare
|
|
|
|
|
|
|
|
(CMPSS)
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\c CMPUNORDSS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CMPUNORDSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insCMPXCHG} \i\c{CMPXCHG}, \i\c{CMPXCHG486}: Compare and Exchange
|
|
|
|
|
|
|
|
\c CMPXCHG r/m8,reg8 ; 0F B0 /r [PENT]
|
|
|
|
\c CMPXCHG r/m16,reg16 ; o16 0F B1 /r [PENT]
|
|
|
|
\c CMPXCHG r/m32,reg32 ; o32 0F B1 /r [PENT]
|
|
|
|
|
|
|
|
\c CMPXCHG486 r/m8,reg8 ; 0F A6 /r [486,UNDOC]
|
|
|
|
\c CMPXCHG486 r/m16,reg16 ; o16 0F A7 /r [486,UNDOC]
|
|
|
|
\c CMPXCHG486 r/m32,reg32 ; o32 0F A7 /r [486,UNDOC]
|
|
|
|
|
|
|
|
These two instructions perform exactly the same operation; however,
|
|
|
|
apparently some (not all) 486 processors support it under a
|
|
|
|
non-standard opcode, so NASM provides the undocumented
|
|
|
|
\c{CMPXCHG486} form to generate the non-standard opcode.
|
|
|
|
|
|
|
|
\c{CMPXCHG} compares its destination (first) operand to the value in
|
|
|
|
\c{AL}, \c{AX} or \c{EAX} (depending on the size of the
|
|
|
|
instruction). If they are equal, it copies its source (second)
|
|
|
|
operand into the destination and sets the zero flag. Otherwise, it
|
|
|
|
clears the zero flag and leaves the destination alone.
|
|
|
|
|
|
|
|
\c{CMPXCHG} is intended to be used for atomic operations in
|
|
|
|
multitasking or multiprocessor environments. To safely update a
|
|
|
|
value in shared memory, for example, you might load the value into
|
|
|
|
\c{EAX}, load the updated value into \c{EBX}, and then execute the
|
|
|
|
instruction \c{lock cmpxchg [value],ebx}. If \c{value} has not
|
|
|
|
changed since being loaded, it is updated with your desired new
|
|
|
|
value, and the zero flag is set to let you know it has worked. (The
|
|
|
|
\c{LOCK} prefix prevents another processor doing anything in the
|
|
|
|
middle of this operation: it guarantees atomicity.) However, if
|
|
|
|
another processor has modified the value in between your load and
|
|
|
|
your attempted store, the store does not happen, and you are
|
|
|
|
notified of the failure by a cleared zero flag, so you can go round
|
|
|
|
and try again.
|
|
|
|
|
|
|
|
\H{insCMPXCHG8B} \i\c{CMPXCHG8B}: Compare and Exchange Eight Bytes
|
|
|
|
|
|
|
|
\c CMPXCHG8B mem ; 0F C7 /1 [PENT]
|
|
|
|
|
|
|
|
This is a larger and more unwieldy version of \c{CMPXCHG}: it
|
|
|
|
compares the 64-bit (eight-byte) value stored at \c{[mem]} with the
|
|
|
|
value in \c{EDX:EAX}. If they are equal, it sets the zero flag and
|
|
|
|
stores \c{ECX:EBX} into the memory area. If they are unequal, it
|
|
|
|
clears the zero flag and leaves the memory area untouched.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insCOMISS} \i\c{COMISS}: Scalar Ordered Single-FP Compare and Set EFLAGS
|
|
|
|
|
|
|
|
\c COMISS xmmreg,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c COMISS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
Set Z, P, C according to comparison, clear O, S, A bits of EFLAGS.
|
|
|
|
Z=P=C=1 for "unordered" result (QNaN).
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insCPUID} \i\c{CPUID}: Get CPU Identification Code
|
|
|
|
|
|
|
|
\c CPUID ; 0F A2 [PENT]
|
|
|
|
|
|
|
|
\c{CPUID} returns various information about the processor it is
|
|
|
|
being executed on. It fills the four registers \c{EAX}, \c{EBX},
|
|
|
|
\c{ECX} and \c{EDX} with information, which varies depending on the
|
|
|
|
input contents of \c{EAX}.
|
|
|
|
|
|
|
|
\c{CPUID} also acts as a barrier to serialise instruction execution:
|
|
|
|
executing the \c{CPUID} instruction guarantees that all the effects
|
|
|
|
(memory modification, flag modification, register modification) of
|
|
|
|
previous instructions have been completed before the next
|
|
|
|
instruction gets fetched.
|
|
|
|
|
|
|
|
The information returned is as follows:
|
|
|
|
|
|
|
|
\b If \c{EAX} is zero on input, \c{EAX} on output holds the maximum
|
|
|
|
acceptable input value of \c{EAX}, and \c{EBX:EDX:ECX} contain the
|
|
|
|
string \c{"GenuineIntel"} (or not, if you have a clone processor).
|
|
|
|
That is to say, \c{EBX} contains \c{"Genu"} (in NASM's own sense of
|
|
|
|
character constants, described in \k{chrconst}), \c{EDX} contains
|
|
|
|
\c{"ineI"} and \c{ECX} contains \c{"ntel"}.
|
|
|
|
|
|
|
|
\b If \c{EAX} is one on input, \c{EAX} on output contains version
|
|
|
|
information about the processor, and \c{EDX} contains a set of
|
|
|
|
feature flags, showing the presence and absence of various features.
|
|
|
|
For example, bit 8 is set if the \c{CMPXCHG8B} instruction
|
|
|
|
(\k{insCMPXCHG8B}) is supported, bit 15 is set if the conditional
|
|
|
|
move instructions (\k{insCMOVcc} and \k{insFCMOVB}) are supported,
|
|
|
|
and bit 23 is set if MMX instructions are supported.
|
|
|
|
|
|
|
|
\b If \c{EAX} is two on input, \c{EAX}, \c{EBX}, \c{ECX} and \c{EDX}
|
|
|
|
all contain information about caches and TLBs (Translation Lookahead
|
|
|
|
Buffers).
|
|
|
|
|
|
|
|
For more information on the data returned from \c{CPUID}, see the
|
|
|
|
documentation on Intel's web site.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insCVTPI2PS} \i\c{CVTPI2PS}:
|
|
|
|
Packed Signed INT32 to Packed Single-FP Conversion
|
|
|
|
|
|
|
|
\c CVTPI2PS xmmreg,mem64 ; ?? [KATMAI,SSE,MMX]
|
|
|
|
\c CVTPI2PS xmmreg,mmxreg ; ?? [KATMAI,SSE,MMX]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCVTPS2PI} \i\c{CVTPS2PI}:
|
|
|
|
Packed Single-FP to Packed INT32 Conversion
|
|
|
|
|
|
|
|
\c CVTPS2PI mmxreg,mem64 ; ?? [KATMAI,SSE,MMX]
|
|
|
|
\c CVTPS2PI mmxreg,xmmreg ; ?? [KATMAI,SSE,MMX]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCVTSI2SS} \i\c{CVTSI2SS}:
|
|
|
|
Scalar Signed INT32 to Single-FP Conversion
|
|
|
|
|
|
|
|
\c CVTSI2SS xmmreg,memory ; ?? [KATMAI,SSE,SD,AR1]
|
|
|
|
\c CVTSI2SS xmmreg,reg32 ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCVTSS2SI} \i\c{CVTSS2SI}:
|
|
|
|
Scalar Single-FP to Signed INT32 Conversion
|
|
|
|
|
|
|
|
\c CVTSS2SI reg32,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CVTSS2SI reg32,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCVTTPS2PI} \i\c{CVTTPS2PI}:
|
|
|
|
Packed Single-FP to Packed INT32 Conversion
|
|
|
|
|
|
|
|
\c CVTTPS2PI mmxreg,memory ; ?? [KATMAI,SSE,MMX]
|
|
|
|
\c CVTTPS2PI mmxreg,xmmreg ; ?? [KATMAI,SSE,MMX]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insCVTTSS2SI} \i\c{CVTTSS2SI}:
|
|
|
|
Scalr Single-FP to Signed INT32 Conversion
|
|
|
|
|
|
|
|
\c CVTTSS2SI reg32,memory ; ?? [KATMAI,SSE]
|
|
|
|
\c CVTTSS2SI reg32,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insDAA} \i\c{DAA}, \i\c{DAS}: Decimal Adjustments
|
|
|
|
|
|
|
|
\c DAA ; 27 [8086]
|
|
|
|
\c DAS ; 2F [8086]
|
|
|
|
|
|
|
|
These instructions are used in conjunction with the add and subtract
|
|
|
|
instructions to perform binary-coded decimal arithmetic in
|
|
|
|
\e{packed} (one BCD digit per nibble) form. For the unpacked
|
|
|
|
equivalents, see \k{insAAA}.
|
|
|
|
|
|
|
|
\c{DAA} should be used after a one-byte \c{ADD} instruction whose
|
|
|
|
destination was the \c{AL} register: by means of examining the value
|
|
|
|
in the \c{AL} and also the auxiliary carry flag \c{AF}, it
|
|
|
|
determines whether either digit of the addition has overflowed, and
|
|
|
|
adjusts it (and sets the carry and auxiliary-carry flags) if so. You
|
|
|
|
can add long BCD strings together by doing \c{ADD}/\c{DAA} on the
|
|
|
|
low two digits, then doing \c{ADC}/\c{DAA} on each subsequent pair
|
|
|
|
of digits.
|
|
|
|
|
|
|
|
\c{DAS} works similarly to \c{DAA}, but is for use after \c{SUB}
|
|
|
|
instructions rather than \c{ADD}.
|
|
|
|
|
|
|
|
\H{insDEC} \i\c{DEC}: Decrement Integer
|
|
|
|
|
|
|
|
\c DEC reg16 ; o16 48+r [8086]
|
|
|
|
\c DEC reg32 ; o32 48+r [386]
|
|
|
|
\c DEC r/m8 ; FE /1 [8086]
|
|
|
|
\c DEC r/m16 ; o16 FF /1 [8086]
|
|
|
|
\c DEC r/m32 ; o32 FF /1 [386]
|
|
|
|
|
|
|
|
\c{DEC} subtracts 1 from its operand. It does \e{not} affect the
|
|
|
|
carry flag: to affect the carry flag, use \c{SUB something,1} (see
|
|
|
|
\k{insSUB}). See also \c{INC} (\k{insINC}).
|
|
|
|
|
|
|
|
\H{insDIV} \i\c{DIV}: Unsigned Integer Divide
|
|
|
|
|
|
|
|
\c DIV r/m8 ; F6 /6 [8086]
|
|
|
|
\c DIV r/m16 ; o16 F7 /6 [8086]
|
|
|
|
\c DIV r/m32 ; o32 F7 /6 [386]
|
|
|
|
|
|
|
|
\c{DIV} performs unsigned integer division. The explicit operand
|
|
|
|
provided is the divisor; the dividend and destination operands are
|
|
|
|
implicit, in the following way:
|
|
|
|
|
|
|
|
\b For \c{DIV r/m8}, \c{AX} is divided by the given operand; the
|
|
|
|
quotient is stored in \c{AL} and the remainder in \c{AH}.
|
|
|
|
|
|
|
|
\b For \c{DIV r/m16}, \c{DX:AX} is divided by the given operand; the
|
|
|
|
quotient is stored in \c{AX} and the remainder in \c{DX}.
|
|
|
|
|
|
|
|
\b For \c{DIV r/m32}, \c{EDX:EAX} is divided by the given operand;
|
|
|
|
the quotient is stored in \c{EAX} and the remainder in \c{EDX}.
|
|
|
|
|
|
|
|
Signed integer division is performed by the \c{IDIV} instruction:
|
|
|
|
see \k{insIDIV}.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insDIVPS} \i\c{DIVPS}: Packed Single-FP Divide
|
|
|
|
|
|
|
|
\c DIVPS xmmreg,memory ; 0F,5E,/r [KATMAI,SSE]
|
|
|
|
\c DIVPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{DIVPS}The DIVPS instruction divides the packed SP FP numbers
|
|
|
|
of both their operands.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insDIVSS} \i\c{DIVSS}: Scalar Single-FP Divide
|
|
|
|
|
|
|
|
\c DIVSS xmmreg,memory ; F3,0F,5E,/r [KATMAI,SSE]
|
|
|
|
\c DIVSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\c{DIVSS}-The DIVSS instructions divide the lowest SP FP numbers
|
2002-05-01 05:00:33 +08:00
|
|
|
of both operands; the upper three fields are passed through from xmm1.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insEMMS} \i\c{EMMS}: Empty MMX State
|
|
|
|
|
|
|
|
\c EMMS ; 0F 77 [PENT,MMX]
|
|
|
|
|
|
|
|
\c{EMMS} sets the FPU tag word (marking which floating-point
|
|
|
|
registers are available) to all ones, meaning all registers are
|
|
|
|
available for the FPU to use. It should be used after executing MMX
|
|
|
|
instructions and before executing any subsequent floating-point
|
|
|
|
operations.
|
|
|
|
|
|
|
|
\H{insENTER} \i\c{ENTER}: Create Stack Frame
|
|
|
|
|
|
|
|
\c ENTER imm,imm ; C8 iw ib [186]
|
|
|
|
|
|
|
|
\c{ENTER} constructs a stack frame for a high-level language
|
|
|
|
procedure call. The first operand (the \c{iw} in the opcode
|
|
|
|
definition above refers to the first operand) gives the amount of
|
|
|
|
stack space to allocate for local variables; the second (the \c{ib}
|
|
|
|
above) gives the nesting level of the procedure (for languages like
|
|
|
|
Pascal, with nested procedures).
|
|
|
|
|
|
|
|
The function of \c{ENTER}, with a nesting level of zero, is
|
|
|
|
equivalent to
|
|
|
|
|
|
|
|
\c PUSH EBP ; or PUSH BP in 16 bits
|
|
|
|
\c MOV EBP,ESP ; or MOV BP,SP in 16 bits
|
|
|
|
\c SUB ESP,operand1 ; or SUB SP,operand1 in 16 bits
|
|
|
|
|
|
|
|
This creates a stack frame with the procedure parameters accessible
|
|
|
|
upwards from \c{EBP}, and local variables accessible downwards from
|
|
|
|
\c{EBP}.
|
|
|
|
|
|
|
|
With a nesting level of one, the stack frame created is 4 (or 2)
|
|
|
|
bytes bigger, and the value of the final frame pointer \c{EBP} is
|
|
|
|
accessible in memory at \c{[EBP-4]}.
|
|
|
|
|
|
|
|
This allows \c{ENTER}, when called with a nesting level of two, to
|
|
|
|
look at the stack frame described by the \e{previous} value of
|
|
|
|
\c{EBP}, find the frame pointer at offset -4 from that, and push it
|
|
|
|
along with its new frame pointer, so that when a level-two procedure
|
|
|
|
is called from within a level-one procedure, \c{[EBP-4]} holds the
|
|
|
|
frame pointer of the most recent level-one procedure call and
|
|
|
|
\c{[EBP-8]} holds that of the most recent level-two call. And so on,
|
|
|
|
for nesting levels up to 31.
|
|
|
|
|
|
|
|
Stack frames created by \c{ENTER} can be destroyed by the \c{LEAVE}
|
|
|
|
instruction: see \k{insLEAVE}.
|
|
|
|
|
|
|
|
\H{insF2XM1} \i\c{F2XM1}: Calculate 2**X-1
|
|
|
|
|
|
|
|
\c F2XM1 ; D9 F0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{F2XM1} raises 2 to the power of \c{ST0}, subtracts one, and
|
|
|
|
stores the result back into \c{ST0}. The initial contents of \c{ST0}
|
|
|
|
must be a number in the range -1 to +1.
|
|
|
|
|
|
|
|
\H{insFABS} \i\c{FABS}: Floating-Point Absolute Value
|
|
|
|
|
|
|
|
\c FABS ; D9 E1 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FABS} computes the absolute value of \c{ST0}, storing the result
|
|
|
|
back in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFADD} \i\c{FADD}, \i\c{FADDP}: Floating-Point Addition
|
|
|
|
|
|
|
|
\c FADD mem32 ; D8 /0 [8086,FPU]
|
|
|
|
\c FADD mem64 ; DC /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c FADD fpureg ; D8 C0+r [8086,FPU]
|
|
|
|
\c FADD ST0,fpureg ; D8 C0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FADD TO fpureg ; DC C0+r [8086,FPU]
|
|
|
|
\c FADD fpureg,ST0 ; DC C0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FADDP fpureg ; DE C0+r [8086,FPU]
|
|
|
|
\c FADDP fpureg,ST0 ; DE C0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FADD}, given one operand, adds the operand to \c{ST0} and stores
|
|
|
|
the result back in \c{ST0}. If the operand has the \c{TO} modifier,
|
|
|
|
the result is stored in the register given rather than in \c{ST0}.
|
|
|
|
|
|
|
|
\c{FADDP} performs the same function as \c{FADD TO}, but pops the
|
|
|
|
register stack after storing the result.
|
|
|
|
|
|
|
|
The given two-operand forms are synonyms for the one-operand forms.
|
|
|
|
|
|
|
|
\H{insFBLD} \i\c{FBLD}, \i\c{FBSTP}: BCD Floating-Point Load and Store
|
|
|
|
|
|
|
|
\c FBLD mem80 ; DF /4 [8086,FPU]
|
|
|
|
\c FBSTP mem80 ; DF /6 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FBLD} loads an 80-bit (ten-byte) packed binary-coded decimal
|
|
|
|
number from the given memory address, converts it to a real, and
|
|
|
|
pushes it on the register stack. \c{FBSTP} stores the value of
|
|
|
|
\c{ST0}, in packed BCD, at the given address and then pops the
|
|
|
|
register stack.
|
|
|
|
|
|
|
|
\H{insFCHS} \i\c{FCHS}: Floating-Point Change Sign
|
|
|
|
|
|
|
|
\c FCHS ; D9 E0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FCHS} negates the number in \c{ST0}: negative numbers become
|
|
|
|
positive, and vice versa.
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\H{insFCLEX} \i\c{FCLEX}, \c{FNCLEX}: Clear Floating-Point Exceptions
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\c FCLEX ; 9B DB E2 [8086,FPU]
|
|
|
|
\c FNCLEX ; DB E2 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FCLEX} clears any floating-point exceptions which may be pending.
|
|
|
|
\c{FNCLEX} does the same thing but doesn't wait for previous
|
|
|
|
floating-point operations (including the \e{handling} of pending
|
|
|
|
exceptions) to finish first.
|
|
|
|
|
|
|
|
\H{insFCMOVB} \i\c{FCMOVcc}: Floating-Point Conditional Move
|
|
|
|
|
|
|
|
\c FCMOVB fpureg ; DA C0+r [P6,FPU]
|
|
|
|
\c FCMOVB ST0,fpureg ; DA C0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVBE fpureg ; DA D0+r [P6,FPU]
|
|
|
|
\c FCMOVBE ST0,fpureg ; DA D0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVE fpureg ; DA C8+r [P6,FPU]
|
|
|
|
\c FCMOVE ST0,fpureg ; DA C8+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVNB fpureg ; DB C0+r [P6,FPU]
|
|
|
|
\c FCMOVNB ST0,fpureg ; DB C0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVNBE fpureg ; DB D0+r [P6,FPU]
|
|
|
|
\c FCMOVNBE ST0,fpureg ; DB D0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVNE fpureg ; DB C8+r [P6,FPU]
|
|
|
|
\c FCMOVNE ST0,fpureg ; DB C8+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVNU fpureg ; DB D8+r [P6,FPU]
|
|
|
|
\c FCMOVNU ST0,fpureg ; DB D8+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCMOVU fpureg ; DA D8+r [P6,FPU]
|
|
|
|
\c FCMOVU ST0,fpureg ; DA D8+r [P6,FPU]
|
|
|
|
|
|
|
|
The \c{FCMOV} instructions perform conditional move operations: each
|
|
|
|
of them moves the contents of the given register into \c{ST0} if its
|
|
|
|
condition is satisfied, and does nothing if not.
|
|
|
|
|
|
|
|
The conditions are not the same as the standard condition codes used
|
|
|
|
with conditional jump instructions. The conditions \c{B}, \c{BE},
|
|
|
|
\c{NB}, \c{NBE}, \c{E} and \c{NE} are exactly as normal, but none of
|
|
|
|
the other standard ones are supported. Instead, the condition \c{U}
|
|
|
|
and its counterpart \c{NU} are provided; the \c{U} condition is
|
|
|
|
satisfied if the last two floating-point numbers compared were
|
|
|
|
\e{unordered}, i.e. they were not equal but neither one could be
|
|
|
|
said to be greater than the other, for example if they were NaNs.
|
|
|
|
(The flag state which signals this is the setting of the parity
|
|
|
|
flag: so the \c{U} condition is notionally equivalent to \c{PE}, and
|
|
|
|
\c{NU} is equivalent to \c{PO}.)
|
|
|
|
|
|
|
|
The \c{FCMOV} conditions test the main processor's status flags, not
|
|
|
|
the FPU status flags, so using \c{FCMOV} directly after \c{FCOM}
|
|
|
|
will not work. Instead, you should either use \c{FCOMI} which writes
|
|
|
|
directly to the main CPU flags word, or use \c{FSTSW} to extract the
|
|
|
|
FPU flags.
|
|
|
|
|
|
|
|
Although the \c{FCMOV} instructions are flagged \c{P6} above, they
|
|
|
|
may not be supported by all Pentium Pro processors; the \c{CPUID}
|
|
|
|
instruction (\k{insCPUID}) will return a bit which indicates whether
|
|
|
|
conditional moves are supported.
|
|
|
|
|
|
|
|
\H{insFCOM} \i\c{FCOM}, \i\c{FCOMP}, \i\c{FCOMPP}, \i\c{FCOMI}, \i\c{FCOMIP}: Floating-Point Compare
|
|
|
|
|
|
|
|
\c FCOM mem32 ; D8 /2 [8086,FPU]
|
|
|
|
\c FCOM mem64 ; DC /2 [8086,FPU]
|
|
|
|
\c FCOM fpureg ; D8 D0+r [8086,FPU]
|
|
|
|
\c FCOM ST0,fpureg ; D8 D0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FCOMP mem32 ; D8 /3 [8086,FPU]
|
|
|
|
\c FCOMP mem64 ; DC /3 [8086,FPU]
|
|
|
|
\c FCOMP fpureg ; D8 D8+r [8086,FPU]
|
|
|
|
\c FCOMP ST0,fpureg ; D8 D8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FCOMPP ; DE D9 [8086,FPU]
|
|
|
|
|
|
|
|
\c FCOMI fpureg ; DB F0+r [P6,FPU]
|
|
|
|
\c FCOMI ST0,fpureg ; DB F0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FCOMIP fpureg ; DF F0+r [P6,FPU]
|
|
|
|
\c FCOMIP ST0,fpureg ; DF F0+r [P6,FPU]
|
|
|
|
|
|
|
|
\c{FCOM} compares \c{ST0} with the given operand, and sets the FPU
|
|
|
|
flags accordingly. \c{ST0} is treated as the left-hand side of the
|
|
|
|
comparison, so that the carry flag is set (for a `less-than' result)
|
|
|
|
if \c{ST0} is less than the given operand.
|
|
|
|
|
|
|
|
\c{FCOMP} does the same as \c{FCOM}, but pops the register stack
|
|
|
|
afterwards. \c{FCOMPP} compares \c{ST0} with \c{ST1} and then pops
|
|
|
|
the register stack twice.
|
|
|
|
|
|
|
|
\c{FCOMI} and \c{FCOMIP} work like the corresponding forms of
|
|
|
|
\c{FCOM} and \c{FCOMP}, but write their results directly to the CPU
|
|
|
|
flags register rather than the FPU status word, so they can be
|
|
|
|
immediately followed by conditional jump or conditional move
|
|
|
|
instructions.
|
|
|
|
|
|
|
|
The \c{FCOM} instructions differ from the \c{FUCOM} instructions
|
|
|
|
(\k{insFUCOM}) only in the way they handle quiet NaNs: \c{FUCOM}
|
|
|
|
will handle them silently and set the condition code flags to an
|
|
|
|
`unordered' result, whereas \c{FCOM} will generate an exception.
|
|
|
|
|
|
|
|
\H{insFCOS} \i\c{FCOS}: Cosine
|
|
|
|
|
|
|
|
\c FCOS ; D9 FF [386,FPU]
|
|
|
|
|
|
|
|
\c{FCOS} computes the cosine of \c{ST0} (in radians), and stores the
|
|
|
|
result in \c{ST0}. See also \c{FSINCOS} (\k{insFSIN}).
|
|
|
|
|
|
|
|
\H{insFDECSTP} \i\c{FDECSTP}: Decrement Floating-Point Stack Pointer
|
|
|
|
|
|
|
|
\c FDECSTP ; D9 F6 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FDECSTP} decrements the `top' field in the floating-point status
|
|
|
|
word. This has the effect of rotating the FPU register stack by one,
|
|
|
|
as if the contents of \c{ST7} had been pushed on the stack. See also
|
|
|
|
\c{FINCSTP} (\k{insFINCSTP}).
|
|
|
|
|
|
|
|
\H{insFDISI} \i\c{FxDISI}, \i\c{FxENI}: Disable and Enable Floating-Point Interrupts
|
|
|
|
|
|
|
|
\c FDISI ; 9B DB E1 [8086,FPU]
|
|
|
|
\c FNDISI ; DB E1 [8086,FPU]
|
|
|
|
|
|
|
|
\c FENI ; 9B DB E0 [8086,FPU]
|
|
|
|
\c FNENI ; DB E0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FDISI} and \c{FENI} disable and enable floating-point interrupts.
|
|
|
|
These instructions are only meaningful on original 8087 processors:
|
|
|
|
the 287 and above treat them as no-operation instructions.
|
|
|
|
|
|
|
|
\c{FNDISI} and \c{FNENI} do the same thing as \c{FDISI} and \c{FENI}
|
|
|
|
respectively, but without waiting for the floating-point processor
|
|
|
|
to finish what it was doing first.
|
|
|
|
|
|
|
|
\H{insFDIV} \i\c{FDIV}, \i\c{FDIVP}, \i\c{FDIVR}, \i\c{FDIVRP}: Floating-Point Division
|
|
|
|
|
|
|
|
\c FDIV mem32 ; D8 /6 [8086,FPU]
|
|
|
|
\c FDIV mem64 ; DC /6 [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIV fpureg ; D8 F0+r [8086,FPU]
|
|
|
|
\c FDIV ST0,fpureg ; D8 F0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIV TO fpureg ; DC F8+r [8086,FPU]
|
|
|
|
\c FDIV fpureg,ST0 ; DC F8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIVR mem32 ; D8 /0 [8086,FPU]
|
|
|
|
\c FDIVR mem64 ; DC /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIVR fpureg ; D8 F8+r [8086,FPU]
|
|
|
|
\c FDIVR ST0,fpureg ; D8 F8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIVR TO fpureg ; DC F0+r [8086,FPU]
|
|
|
|
\c FDIVR fpureg,ST0 ; DC F0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIVP fpureg ; DE F8+r [8086,FPU]
|
|
|
|
\c FDIVP fpureg,ST0 ; DE F8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FDIVRP fpureg ; DE F0+r [8086,FPU]
|
|
|
|
\c FDIVRP fpureg,ST0 ; DE F0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FDIV} divides \c{ST0} by the given operand and stores the result
|
|
|
|
back in \c{ST0}, unless the \c{TO} qualifier is given, in which case
|
|
|
|
it divides the given operand by \c{ST0} and stores the result in the
|
|
|
|
operand.
|
|
|
|
|
|
|
|
\c{FDIVR} does the same thing, but does the division the other way
|
|
|
|
up: so if \c{TO} is not given, it divides the given operand by
|
|
|
|
\c{ST0} and stores the result in \c{ST0}, whereas if \c{TO} is given
|
|
|
|
it divides \c{ST0} by its operand and stores the result in the
|
|
|
|
operand.
|
|
|
|
|
|
|
|
\c{FDIVP} operates like \c{FDIV TO}, but pops the register stack
|
|
|
|
once it has finished. \c{FDIVRP} operates like \c{FDIVR TO}, but
|
|
|
|
pops the register stack once it has finished.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insFEMMS} \i\c{FEMMS}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c FEMMS 0,0,0 ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insFFREE} \i\c{FFREE}: Flag Floating-Point Register as Unused
|
|
|
|
|
|
|
|
\c FFREE fpureg ; DD C0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FFREE} marks the given register as being empty.
|
|
|
|
|
|
|
|
\H{insFIADD} \i\c{FIADD}: Floating-Point/Integer Addition
|
|
|
|
|
|
|
|
\c FIADD mem16 ; DE /0 [8086,FPU]
|
|
|
|
\c FIADD mem32 ; DA /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FIADD} adds the 16-bit or 32-bit integer stored in the given
|
|
|
|
memory location to \c{ST0}, storing the result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFICOM} \i\c{FICOM}, \i\c{FICOMP}: Floating-Point/Integer Compare
|
|
|
|
|
|
|
|
\c FICOM mem16 ; DE /2 [8086,FPU]
|
|
|
|
\c FICOM mem32 ; DA /2 [8086,FPU]
|
|
|
|
|
|
|
|
\c FICOMP mem16 ; DE /3 [8086,FPU]
|
|
|
|
\c FICOMP mem32 ; DA /3 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FICOM} compares \c{ST0} with the 16-bit or 32-bit integer stored
|
|
|
|
in the given memory location, and sets the FPU flags accordingly.
|
|
|
|
\c{FICOMP} does the same, but pops the register stack afterwards.
|
|
|
|
|
|
|
|
\H{insFIDIV} \i\c{FIDIV}, \i\c{FIDIVR}: Floating-Point/Integer Division
|
|
|
|
|
|
|
|
\c FIDIV mem16 ; DE /6 [8086,FPU]
|
|
|
|
\c FIDIV mem32 ; DA /6 [8086,FPU]
|
|
|
|
|
|
|
|
\c FIDIVR mem16 ; DE /0 [8086,FPU]
|
|
|
|
\c FIDIVR mem32 ; DA /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FIDIV} divides \c{ST0} by the 16-bit or 32-bit integer stored in
|
|
|
|
the given memory location, and stores the result in \c{ST0}.
|
|
|
|
\c{FIDIVR} does the division the other way up: it divides the
|
|
|
|
integer by \c{ST0}, but still stores the result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFILD} \i\c{FILD}, \i\c{FIST}, \i\c{FISTP}: Floating-Point/Integer Conversion
|
|
|
|
|
|
|
|
\c FILD mem16 ; DF /0 [8086,FPU]
|
|
|
|
\c FILD mem32 ; DB /0 [8086,FPU]
|
|
|
|
\c FILD mem64 ; DF /5 [8086,FPU]
|
|
|
|
|
|
|
|
\c FIST mem16 ; DF /2 [8086,FPU]
|
|
|
|
\c FIST mem32 ; DB /2 [8086,FPU]
|
|
|
|
|
|
|
|
\c FISTP mem16 ; DF /3 [8086,FPU]
|
|
|
|
\c FISTP mem32 ; DB /3 [8086,FPU]
|
|
|
|
\c FISTP mem64 ; DF /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FILD} loads an integer out of a memory location, converts it to a
|
|
|
|
real, and pushes it on the FPU register stack. \c{FIST} converts
|
|
|
|
\c{ST0} to an integer and stores that in memory; \c{FISTP} does the
|
|
|
|
same as \c{FIST}, but pops the register stack afterwards.
|
|
|
|
|
|
|
|
\H{insFIMUL} \i\c{FIMUL}: Floating-Point/Integer Multiplication
|
|
|
|
|
|
|
|
\c FIMUL mem16 ; DE /1 [8086,FPU]
|
|
|
|
\c FIMUL mem32 ; DA /1 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FIMUL} multiplies \c{ST0} by the 16-bit or 32-bit integer stored
|
|
|
|
in the given memory location, and stores the result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFINCSTP} \i\c{FINCSTP}: Increment Floating-Point Stack Pointer
|
|
|
|
|
|
|
|
\c FINCSTP ; D9 F7 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FINCSTP} increments the `top' field in the floating-point status
|
|
|
|
word. This has the effect of rotating the FPU register stack by one,
|
|
|
|
as if the register stack had been popped; however, unlike the
|
|
|
|
popping of the stack performed by many FPU instructions, it does not
|
|
|
|
flag the new \c{ST7} (previously \c{ST0}) as empty. See also
|
|
|
|
\c{FDECSTP} (\k{insFDECSTP}).
|
|
|
|
|
|
|
|
\H{insFINIT} \i\c{FINIT}, \i\c{FNINIT}: Initialise Floating-Point Unit
|
|
|
|
|
|
|
|
\c FINIT ; 9B DB E3 [8086,FPU]
|
|
|
|
\c FNINIT ; DB E3 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FINIT} initialises the FPU to its default state. It flags all
|
|
|
|
registers as empty, though it does not actually change their values.
|
|
|
|
\c{FNINIT} does the same, without first waiting for pending
|
|
|
|
exceptions to clear.
|
|
|
|
|
|
|
|
\H{insFISUB} \i\c{FISUB}: Floating-Point/Integer Subtraction
|
|
|
|
|
|
|
|
\c FISUB mem16 ; DE /4 [8086,FPU]
|
|
|
|
\c FISUB mem32 ; DA /4 [8086,FPU]
|
|
|
|
|
|
|
|
\c FISUBR mem16 ; DE /5 [8086,FPU]
|
|
|
|
\c FISUBR mem32 ; DA /5 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FISUB} subtracts the 16-bit or 32-bit integer stored in the given
|
|
|
|
memory location from \c{ST0}, and stores the result in \c{ST0}.
|
|
|
|
\c{FISUBR} does the subtraction the other way round, i.e. it
|
|
|
|
subtracts \c{ST0} from the given integer, but still stores the
|
|
|
|
result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFLD} \i\c{FLD}: Floating-Point Load
|
|
|
|
|
|
|
|
\c FLD mem32 ; D9 /0 [8086,FPU]
|
|
|
|
\c FLD mem64 ; DD /0 [8086,FPU]
|
|
|
|
\c FLD mem80 ; DB /5 [8086,FPU]
|
|
|
|
\c FLD fpureg ; D9 C0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FLD} loads a floating-point value out of the given register or
|
|
|
|
memory location, and pushes it on the FPU register stack.
|
|
|
|
|
|
|
|
\H{insFLD1} \i\c{FLDxx}: Floating-Point Load Constants
|
|
|
|
|
|
|
|
\c FLD1 ; D9 E8 [8086,FPU]
|
|
|
|
\c FLDL2E ; D9 EA [8086,FPU]
|
|
|
|
\c FLDL2T ; D9 E9 [8086,FPU]
|
|
|
|
\c FLDLG2 ; D9 EC [8086,FPU]
|
|
|
|
\c FLDLN2 ; D9 ED [8086,FPU]
|
|
|
|
\c FLDPI ; D9 EB [8086,FPU]
|
|
|
|
\c FLDZ ; D9 EE [8086,FPU]
|
|
|
|
|
|
|
|
These instructions push specific standard constants on the FPU
|
|
|
|
register stack. \c{FLD1} pushes the value 1; \c{FLDL2E} pushes the
|
|
|
|
base-2 logarithm of e; \c{FLDL2T} pushes the base-2 log of 10;
|
|
|
|
\c{FLDLG2} pushes the base-10 log of 2; \c{FLDLN2} pushes the base-e
|
|
|
|
log of 2; \c{FLDPI} pushes pi; and \c{FLDZ} pushes zero.
|
|
|
|
|
|
|
|
\H{insFLDCW} \i\c{FLDCW}: Load Floating-Point Control Word
|
|
|
|
|
|
|
|
\c FLDCW mem16 ; D9 /5 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FLDCW} loads a 16-bit value out of memory and stores it into the
|
|
|
|
FPU control word (governing things like the rounding mode, the
|
|
|
|
precision, and the exception masks). See also \c{FSTCW}
|
|
|
|
(\k{insFSTCW}).
|
|
|
|
|
|
|
|
\H{insFLDENV} \i\c{FLDENV}: Load Floating-Point Environment
|
|
|
|
|
|
|
|
\c FLDENV mem ; D9 /4 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FLDENV} loads the FPU operating environment (control word, status
|
|
|
|
word, tag word, instruction pointer, data pointer and last opcode)
|
|
|
|
from memory. The memory area is 14 or 28 bytes long, depending on
|
|
|
|
the CPU mode at the time. See also \c{FSTENV} (\k{insFSTENV}).
|
|
|
|
|
|
|
|
\H{insFMUL} \i\c{FMUL}, \i\c{FMULP}: Floating-Point Multiply
|
|
|
|
|
|
|
|
\c FMUL mem32 ; D8 /1 [8086,FPU]
|
|
|
|
\c FMUL mem64 ; DC /1 [8086,FPU]
|
|
|
|
|
|
|
|
\c FMUL fpureg ; D8 C8+r [8086,FPU]
|
|
|
|
\c FMUL ST0,fpureg ; D8 C8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FMUL TO fpureg ; DC C8+r [8086,FPU]
|
|
|
|
\c FMUL fpureg,ST0 ; DC C8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FMULP fpureg ; DE C8+r [8086,FPU]
|
|
|
|
\c FMULP fpureg,ST0 ; DE C8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FMUL} multiplies \c{ST0} by the given operand, and stores the
|
|
|
|
result in \c{ST0}, unless the \c{TO} qualifier is used in which case
|
|
|
|
it stores the result in the operand. \c{FMULP} performs the same
|
|
|
|
operation as \c{FMUL TO}, and then pops the register stack.
|
|
|
|
|
|
|
|
\H{insFNOP} \i\c{FNOP}: Floating-Point No Operation
|
|
|
|
|
|
|
|
\c FNOP ; D9 D0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FNOP} does nothing.
|
|
|
|
|
|
|
|
\H{insFPATAN} \i\c{FPATAN}, \i\c{FPTAN}: Arctangent and Tangent
|
|
|
|
|
|
|
|
\c FPATAN ; D9 F3 [8086,FPU]
|
|
|
|
\c FPTAN ; D9 F2 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FPATAN} computes the arctangent, in radians, of the result of
|
|
|
|
dividing \c{ST1} by \c{ST0}, stores the result in \c{ST1}, and pops
|
|
|
|
the register stack. It works like the C \c{atan2} function, in that
|
|
|
|
changing the sign of both \c{ST0} and \c{ST1} changes the output
|
|
|
|
value by pi (so it performs true rectangular-to-polar coordinate
|
|
|
|
conversion, with \c{ST1} being the Y coordinate and \c{ST0} being
|
|
|
|
the X coordinate, not merely an arctangent).
|
|
|
|
|
|
|
|
\c{FPTAN} computes the tangent of the value in \c{ST0} (in radians),
|
|
|
|
and stores the result back into \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFPREM} \i\c{FPREM}, \i\c{FPREM1}: Floating-Point Partial Remainder
|
|
|
|
|
|
|
|
\c FPREM ; D9 F8 [8086,FPU]
|
|
|
|
\c FPREM1 ; D9 F5 [386,FPU]
|
|
|
|
|
|
|
|
These instructions both produce the remainder obtained by dividing
|
|
|
|
\c{ST0} by \c{ST1}. This is calculated, notionally, by dividing
|
|
|
|
\c{ST0} by \c{ST1}, rounding the result to an integer, multiplying
|
|
|
|
by \c{ST1} again, and computing the value which would need to be
|
|
|
|
added back on to the result to get back to the original value in
|
|
|
|
\c{ST0}.
|
|
|
|
|
|
|
|
The two instructions differ in the way the notional round-to-integer
|
|
|
|
operation is performed. \c{FPREM} does it by rounding towards zero,
|
|
|
|
so that the remainder it returns always has the same sign as the
|
|
|
|
original value in \c{ST0}; \c{FPREM1} does it by rounding to the
|
|
|
|
nearest integer, so that the remainder always has at most half the
|
|
|
|
magnitude of \c{ST1}.
|
|
|
|
|
|
|
|
Both instructions calculate \e{partial} remainders, meaning that
|
|
|
|
they may not manage to provide the final result, but might leave
|
|
|
|
intermediate results in \c{ST0} instead. If this happens, they will
|
|
|
|
set the C2 flag in the FPU status word; therefore, to calculate a
|
|
|
|
remainder, you should repeatedly execute \c{FPREM} or \c{FPREM1}
|
|
|
|
until C2 becomes clear.
|
|
|
|
|
|
|
|
\H{insFRNDINT} \i\c{FRNDINT}: Floating-Point Round to Integer
|
|
|
|
|
|
|
|
\c FRNDINT ; D9 FC [8086,FPU]
|
|
|
|
|
|
|
|
\c{FRNDINT} rounds the contents of \c{ST0} to an integer, according
|
|
|
|
to the current rounding mode set in the FPU control word, and stores
|
|
|
|
the result back in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFRSTOR} \i\c{FSAVE}, \i\c{FRSTOR}: Save/Restore Floating-Point State
|
|
|
|
|
|
|
|
\c FSAVE mem ; 9B DD /6 [8086,FPU]
|
|
|
|
\c FNSAVE mem ; DD /6 [8086,FPU]
|
|
|
|
|
|
|
|
\c FRSTOR mem ; DD /4 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSAVE} saves the entire floating-point unit state, including all
|
|
|
|
the information saved by \c{FSTENV} (\k{insFSTENV}) plus the
|
|
|
|
contents of all the registers, to a 94 or 108 byte area of memory
|
|
|
|
(depending on the CPU mode). \c{FRSTOR} restores the floating-point
|
|
|
|
state from the same area of memory.
|
|
|
|
|
|
|
|
\c{FNSAVE} does the same as \c{FSAVE}, without first waiting for
|
|
|
|
pending floating-point exceptions to clear.
|
|
|
|
|
|
|
|
\H{insFSCALE} \i\c{FSCALE}: Scale Floating-Point Value by Power of Two
|
|
|
|
|
|
|
|
\c FSCALE ; D9 FD [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSCALE} scales a number by a power of two: it rounds \c{ST1}
|
|
|
|
towards zero to obtain an integer, then multiplies \c{ST0} by two to
|
|
|
|
the power of that integer, and stores the result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFSETPM} \i\c{FSETPM}: Set Protected Mode
|
|
|
|
|
|
|
|
\c FSETPM ; DB E4 [286,FPU]
|
|
|
|
|
|
|
|
This instruction initalises protected mode on the 287 floating-point
|
|
|
|
coprocessor. It is only meaningful on that processor: the 387 and
|
|
|
|
above treat the instruction as a no-operation.
|
|
|
|
|
|
|
|
\H{insFSIN} \i\c{FSIN}, \i\c{FSINCOS}: Sine and Cosine
|
|
|
|
|
|
|
|
\c FSIN ; D9 FE [386,FPU]
|
|
|
|
\c FSINCOS ; D9 FB [386,FPU]
|
|
|
|
|
|
|
|
\c{FSIN} calculates the sine of \c{ST0} (in radians) and stores the
|
|
|
|
result in \c{ST0}. \c{FSINCOS} does the same, but then pushes the
|
|
|
|
cosine of the same value on the register stack, so that the sine
|
|
|
|
ends up in \c{ST1} and the cosine in \c{ST0}. \c{FSINCOS} is faster
|
|
|
|
than executing \c{FSIN} and \c{FCOS} (see \k{insFCOS}) in
|
|
|
|
succession.
|
|
|
|
|
|
|
|
\H{insFSQRT} \i\c{FSQRT}: Floating-Point Square Root
|
|
|
|
|
|
|
|
\c FSQRT ; D9 FA [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSQRT} calculates the square root of \c{ST0} and stores the
|
|
|
|
result in \c{ST0}.
|
|
|
|
|
|
|
|
\H{insFST} \i\c{FST}, \i\c{FSTP}: Floating-Point Store
|
|
|
|
|
|
|
|
\c FST mem32 ; D9 /2 [8086,FPU]
|
|
|
|
\c FST mem64 ; DD /2 [8086,FPU]
|
|
|
|
\c FST fpureg ; DD D0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSTP mem32 ; D9 /3 [8086,FPU]
|
|
|
|
\c FSTP mem64 ; DD /3 [8086,FPU]
|
|
|
|
\c FSTP mem80 ; DB /0 [8086,FPU]
|
|
|
|
\c FSTP fpureg ; DD D8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FST} stores the value in \c{ST0} into the given memory location
|
|
|
|
or other FPU register. \c{FSTP} does the same, but then pops the
|
|
|
|
register stack.
|
|
|
|
|
|
|
|
\H{insFSTCW} \i\c{FSTCW}: Store Floating-Point Control Word
|
|
|
|
|
|
|
|
\c FSTCW mem16 ; 9B D9 /0 [8086,FPU]
|
|
|
|
\c FNSTCW mem16 ; D9 /0 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSTCW} stores the FPU control word (governing things like the
|
|
|
|
rounding mode, the precision, and the exception masks) into a 2-byte
|
|
|
|
memory area. See also \c{FLDCW} (\k{insFLDCW}).
|
|
|
|
|
|
|
|
\c{FNSTCW} does the same thing as \c{FSTCW}, without first waiting
|
|
|
|
for pending floating-point exceptions to clear.
|
|
|
|
|
|
|
|
\H{insFSTENV} \i\c{FSTENV}: Store Floating-Point Environment
|
|
|
|
|
|
|
|
\c FSTENV mem ; 9B D9 /6 [8086,FPU]
|
|
|
|
\c FNSTENV mem ; D9 /6 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSTENV} stores the FPU operating environment (control word,
|
|
|
|
status word, tag word, instruction pointer, data pointer and last
|
|
|
|
opcode) into memory. The memory area is 14 or 28 bytes long,
|
|
|
|
depending on the CPU mode at the time. See also \c{FLDENV}
|
|
|
|
(\k{insFLDENV}).
|
|
|
|
|
|
|
|
\c{FNSTENV} does the same thing as \c{FSTENV}, without first waiting
|
|
|
|
for pending floating-point exceptions to clear.
|
|
|
|
|
|
|
|
\H{insFSTSW} \i\c{FSTSW}: Store Floating-Point Status Word
|
|
|
|
|
|
|
|
\c FSTSW mem16 ; 9B DD /0 [8086,FPU]
|
|
|
|
\c FSTSW AX ; 9B DF E0 [286,FPU]
|
|
|
|
|
|
|
|
\c FNSTSW mem16 ; DD /0 [8086,FPU]
|
|
|
|
\c FNSTSW AX ; DF E0 [286,FPU]
|
|
|
|
|
|
|
|
\c{FSTSW} stores the FPU status word into \c{AX} or into a 2-byte
|
|
|
|
memory area.
|
|
|
|
|
|
|
|
\c{FNSTSW} does the same thing as \c{FSTSW}, without first waiting
|
|
|
|
for pending floating-point exceptions to clear.
|
|
|
|
|
|
|
|
\H{insFSUB} \i\c{FSUB}, \i\c{FSUBP}, \i\c{FSUBR}, \i\c{FSUBRP}: Floating-Point Subtract
|
|
|
|
|
|
|
|
\c FSUB mem32 ; D8 /4 [8086,FPU]
|
|
|
|
\c FSUB mem64 ; DC /4 [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUB fpureg ; D8 E0+r [8086,FPU]
|
|
|
|
\c FSUB ST0,fpureg ; D8 E0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUB TO fpureg ; DC E8+r [8086,FPU]
|
|
|
|
\c FSUB fpureg,ST0 ; DC E8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUBR mem32 ; D8 /5 [8086,FPU]
|
|
|
|
\c FSUBR mem64 ; DC /5 [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUBR fpureg ; D8 E8+r [8086,FPU]
|
|
|
|
\c FSUBR ST0,fpureg ; D8 E8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUBR TO fpureg ; DC E0+r [8086,FPU]
|
|
|
|
\c FSUBR fpureg,ST0 ; DC E0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUBP fpureg ; DE E8+r [8086,FPU]
|
|
|
|
\c FSUBP fpureg,ST0 ; DE E8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c FSUBRP fpureg ; DE E0+r [8086,FPU]
|
|
|
|
\c FSUBRP fpureg,ST0 ; DE E0+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FSUB} subtracts the given operand from \c{ST0} and stores the
|
|
|
|
result back in \c{ST0}, unless the \c{TO} qualifier is given, in
|
|
|
|
which case it subtracts \c{ST0} from the given operand and stores
|
|
|
|
the result in the operand.
|
|
|
|
|
|
|
|
\c{FSUBR} does the same thing, but does the subtraction the other way
|
|
|
|
up: so if \c{TO} is not given, it subtracts \c{ST0} from the given
|
|
|
|
operand and stores the result in \c{ST0}, whereas if \c{TO} is given
|
|
|
|
it subtracts its operand from \c{ST0} and stores the result in the
|
|
|
|
operand.
|
|
|
|
|
|
|
|
\c{FSUBP} operates like \c{FSUB TO}, but pops the register stack
|
|
|
|
once it has finished. \c{FSUBRP} operates like \c{FSUBR TO}, but
|
|
|
|
pops the register stack once it has finished.
|
|
|
|
|
|
|
|
\H{insFTST} \i\c{FTST}: Test \c{ST0} Against Zero
|
|
|
|
|
|
|
|
\c FTST ; D9 E4 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FTST} compares \c{ST0} with zero and sets the FPU flags
|
|
|
|
accordingly. \c{ST0} is treated as the left-hand side of the
|
|
|
|
comparison, so that a `less-than' result is generated if \c{ST0} is
|
|
|
|
negative.
|
|
|
|
|
|
|
|
\H{insFUCOM} \i\c{FUCOMxx}: Floating-Point Unordered Compare
|
|
|
|
|
|
|
|
\c FUCOM fpureg ; DD E0+r [386,FPU]
|
|
|
|
\c FUCOM ST0,fpureg ; DD E0+r [386,FPU]
|
|
|
|
|
|
|
|
\c FUCOMP fpureg ; DD E8+r [386,FPU]
|
|
|
|
\c FUCOMP ST0,fpureg ; DD E8+r [386,FPU]
|
|
|
|
|
|
|
|
\c FUCOMPP ; DA E9 [386,FPU]
|
|
|
|
|
|
|
|
\c FUCOMI fpureg ; DB E8+r [P6,FPU]
|
|
|
|
\c FUCOMI ST0,fpureg ; DB E8+r [P6,FPU]
|
|
|
|
|
|
|
|
\c FUCOMIP fpureg ; DF E8+r [P6,FPU]
|
|
|
|
\c FUCOMIP ST0,fpureg ; DF E8+r [P6,FPU]
|
|
|
|
|
|
|
|
\c{FUCOM} compares \c{ST0} with the given operand, and sets the FPU
|
|
|
|
flags accordingly. \c{ST0} is treated as the left-hand side of the
|
|
|
|
comparison, so that the carry flag is set (for a `less-than' result)
|
|
|
|
if \c{ST0} is less than the given operand.
|
|
|
|
|
|
|
|
\c{FUCOMP} does the same as \c{FUCOM}, but pops the register stack
|
|
|
|
afterwards. \c{FUCOMPP} compares \c{ST0} with \c{ST1} and then pops
|
|
|
|
the register stack twice.
|
|
|
|
|
|
|
|
\c{FUCOMI} and \c{FUCOMIP} work like the corresponding forms of
|
|
|
|
\c{FUCOM} and \c{FUCOMP}, but write their results directly to the CPU
|
|
|
|
flags register rather than the FPU status word, so they can be
|
|
|
|
immediately followed by conditional jump or conditional move
|
|
|
|
instructions.
|
|
|
|
|
|
|
|
The \c{FUCOM} instructions differ from the \c{FCOM} instructions
|
|
|
|
(\k{insFCOM}) only in the way they handle quiet NaNs: \c{FUCOM} will
|
|
|
|
handle them silently and set the condition code flags to an
|
|
|
|
`unordered' result, whereas \c{FCOM} will generate an exception.
|
|
|
|
|
|
|
|
\H{insFXAM} \i\c{FXAM}: Examine Class of Value in \c{ST0}
|
|
|
|
|
|
|
|
\c FXAM ; D9 E5 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FXAM} sets the FPU flags C3, C2 and C0 depending on the type of
|
|
|
|
value stored in \c{ST0}: 000 (respectively) for an unsupported
|
|
|
|
format, 001 for a NaN, 010 for a normal finite number, 011 for an
|
|
|
|
infinity, 100 for a zero, 101 for an empty register, and 110 for a
|
|
|
|
denormal. It also sets the C1 flag to the sign of the number.
|
|
|
|
|
|
|
|
\H{insFXCH} \i\c{FXCH}: Floating-Point Exchange
|
|
|
|
|
|
|
|
\c FXCH ; D9 C9 [8086,FPU]
|
|
|
|
\c FXCH fpureg ; D9 C8+r [8086,FPU]
|
|
|
|
\c FXCH fpureg,ST0 ; D9 C8+r [8086,FPU]
|
|
|
|
\c FXCH ST0,fpureg ; D9 C8+r [8086,FPU]
|
|
|
|
|
|
|
|
\c{FXCH} exchanges \c{ST0} with a given FPU register. The no-operand
|
|
|
|
form exchanges \c{ST0} with \c{ST1}.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insFXRSTOR} \i\c{FXRSTOR}: Restore FP and MMXTM State and
|
|
|
|
Streaming SIMD Extension State
|
|
|
|
|
|
|
|
\c FXRSTOR memory ; 0F,AE,/1 [P6,SSE,FPU]
|
|
|
|
|
|
|
|
\c{FXRSTOR}The FXRSTOR instruction reloads the FP and MMXTM technology
|
|
|
|
state, and the Streaming SIMD Extension state (environment and registers),
|
|
|
|
from the memory area defined by m512byte. This data should have been
|
|
|
|
written by a previous FXSAVE.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insFXSAVE} \i\c{FXSAVE}: Store FP and MMXTM State
|
|
|
|
and Streaming SIMD
|
|
|
|
|
|
|
|
\c FXSAVE memory ; 0F,AE,/0 [P6,SSE,FPU]
|
|
|
|
|
|
|
|
|
|
|
|
\c{FXSAVE}The FXSAVE instruction writes the current FP and
|
|
|
|
MMXTM technology state, and Streaming SIMD Extension state
|
|
|
|
(environment and registers), to the specified destination
|
|
|
|
defined by m512byte. It does this without checking for pending
|
|
|
|
unmasked floating-point exceptions (similar to the operation of
|
|
|
|
FNSAVE). Unlike the FSAVE/FNSAVE instructions, the processor
|
|
|
|
retains the contents of the FP and MMXTM technology state and
|
|
|
|
Streaming SIMD Extension state in the processor after the state
|
|
|
|
has been saved. This instruction has been optimized to maximize
|
|
|
|
floating-point save performance.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insFXTRACT} \i\c{FXTRACT}: Extract Exponent and Significand
|
|
|
|
|
|
|
|
\c FXTRACT ; D9 F4 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FXTRACT} separates the number in \c{ST0} into its exponent and
|
|
|
|
significand (mantissa), stores the exponent back into \c{ST0}, and
|
|
|
|
then pushes the significand on the register stack (so that the
|
|
|
|
significand ends up in \c{ST0}, and the exponent in \c{ST1}).
|
|
|
|
|
|
|
|
\H{insFYL2X} \i\c{FYL2X}, \i\c{FYL2XP1}: Compute Y times Log2(X) or Log2(X+1)
|
|
|
|
|
|
|
|
\c FYL2X ; D9 F1 [8086,FPU]
|
|
|
|
\c FYL2XP1 ; D9 F9 [8086,FPU]
|
|
|
|
|
|
|
|
\c{FYL2X} multiplies \c{ST1} by the base-2 logarithm of \c{ST0},
|
|
|
|
stores the result in \c{ST1}, and pops the register stack (so that
|
|
|
|
the result ends up in \c{ST0}). \c{ST0} must be non-zero and
|
|
|
|
positive.
|
|
|
|
|
|
|
|
\c{FYL2XP1} works the same way, but replacing the base-2 log of
|
|
|
|
\c{ST0} with that of \c{ST0} plus one. This time, \c{ST0} must have
|
|
|
|
magnitude no greater than 1 minus half the square root of two.
|
|
|
|
|
|
|
|
\H{insHLT} \i\c{HLT}: Halt Processor
|
|
|
|
|
|
|
|
\c HLT ; F4 [8086]
|
|
|
|
|
|
|
|
\c{HLT} puts the processor into a halted state, where it will
|
|
|
|
perform no more operations until restarted by an interrupt or a
|
|
|
|
reset.
|
|
|
|
|
|
|
|
\H{insIBTS} \i\c{IBTS}: Insert Bit String
|
|
|
|
|
|
|
|
\c IBTS r/m16,reg16 ; o16 0F A7 /r [386,UNDOC]
|
|
|
|
\c IBTS r/m32,reg32 ; o32 0F A7 /r [386,UNDOC]
|
|
|
|
|
|
|
|
No clear documentation seems to be available for this instruction:
|
|
|
|
the best I've been able to find reads `Takes a string of bits from
|
|
|
|
the second operand and puts them in the first operand'. It is
|
|
|
|
present only in early 386 processors, and conflicts with the opcodes
|
|
|
|
for \c{CMPXCHG486}. NASM supports it only for completeness. Its
|
|
|
|
counterpart is \c{XBTS} (see \k{insXBTS}).
|
|
|
|
|
|
|
|
\H{insIDIV} \i\c{IDIV}: Signed Integer Divide
|
|
|
|
|
|
|
|
\c IDIV r/m8 ; F6 /7 [8086]
|
|
|
|
\c IDIV r/m16 ; o16 F7 /7 [8086]
|
|
|
|
\c IDIV r/m32 ; o32 F7 /7 [386]
|
|
|
|
|
|
|
|
\c{IDIV} performs signed integer division. The explicit operand
|
|
|
|
provided is the divisor; the dividend and destination operands are
|
|
|
|
implicit, in the following way:
|
|
|
|
|
|
|
|
\b For \c{IDIV r/m8}, \c{AX} is divided by the given operand; the
|
|
|
|
quotient is stored in \c{AL} and the remainder in \c{AH}.
|
|
|
|
|
|
|
|
\b For \c{IDIV r/m16}, \c{DX:AX} is divided by the given operand; the
|
|
|
|
quotient is stored in \c{AX} and the remainder in \c{DX}.
|
|
|
|
|
|
|
|
\b For \c{IDIV r/m32}, \c{EDX:EAX} is divided by the given operand;
|
|
|
|
the quotient is stored in \c{EAX} and the remainder in \c{EDX}.
|
|
|
|
|
|
|
|
Unsigned integer division is performed by the \c{DIV} instruction:
|
|
|
|
see \k{insDIV}.
|
|
|
|
|
|
|
|
\H{insIMUL} \i\c{IMUL}: Signed Integer Multiply
|
|
|
|
|
|
|
|
\c IMUL r/m8 ; F6 /5 [8086]
|
|
|
|
\c IMUL r/m16 ; o16 F7 /5 [8086]
|
|
|
|
\c IMUL r/m32 ; o32 F7 /5 [386]
|
|
|
|
|
|
|
|
\c IMUL reg16,r/m16 ; o16 0F AF /r [386]
|
|
|
|
\c IMUL reg32,r/m32 ; o32 0F AF /r [386]
|
|
|
|
|
|
|
|
\c IMUL reg16,imm8 ; o16 6B /r ib [286]
|
|
|
|
\c IMUL reg16,imm16 ; o16 69 /r iw [286]
|
|
|
|
\c IMUL reg32,imm8 ; o32 6B /r ib [386]
|
|
|
|
\c IMUL reg32,imm32 ; o32 69 /r id [386]
|
|
|
|
|
|
|
|
\c IMUL reg16,r/m16,imm8 ; o16 6B /r ib [286]
|
|
|
|
\c IMUL reg16,r/m16,imm16 ; o16 69 /r iw [286]
|
|
|
|
\c IMUL reg32,r/m32,imm8 ; o32 6B /r ib [386]
|
|
|
|
\c IMUL reg32,r/m32,imm32 ; o32 69 /r id [386]
|
|
|
|
|
|
|
|
\c{IMUL} performs signed integer multiplication. For the
|
|
|
|
single-operand form, the other operand and destination are implicit,
|
|
|
|
in the following way:
|
|
|
|
|
|
|
|
\b For \c{IMUL r/m8}, \c{AL} is multiplied by the given operand; the
|
|
|
|
product is stored in \c{AX}.
|
|
|
|
|
|
|
|
\b For \c{IMUL r/m16}, \c{AX} is multiplied by the given operand;
|
|
|
|
the product is stored in \c{DX:AX}.
|
|
|
|
|
|
|
|
\b For \c{IMUL r/m32}, \c{EAX} is multiplied by the given operand;
|
|
|
|
the product is stored in \c{EDX:EAX}.
|
|
|
|
|
|
|
|
The two-operand form multiplies its two operands and stores the
|
|
|
|
result in the destination (first) operand. The three-operand form
|
|
|
|
multiplies its last two operands and stores the result in the first
|
|
|
|
operand.
|
|
|
|
|
|
|
|
The two-operand form is in fact a shorthand for the three-operand
|
|
|
|
form, as can be seen by examining the opcode descriptions: in the
|
|
|
|
two-operand form, the code \c{/r} takes both its register and
|
|
|
|
\c{r/m} parts from the same operand (the first one).
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate operand and another longer
|
|
|
|
source operand, the immediate operand is considered to be signed,
|
|
|
|
and is sign-extended to the length of the other source operand. In
|
|
|
|
these cases, the \c{BYTE} qualifier is necessary to force NASM to
|
|
|
|
generate this form of the instruction.
|
|
|
|
|
|
|
|
Unsigned integer multiplication is performed by the \c{MUL}
|
|
|
|
instruction: see \k{insMUL}.
|
|
|
|
|
|
|
|
\H{insIN} \i\c{IN}: Input from I/O Port
|
|
|
|
|
|
|
|
\c IN AL,imm8 ; E4 ib [8086]
|
|
|
|
\c IN AX,imm8 ; o16 E5 ib [8086]
|
|
|
|
\c IN EAX,imm8 ; o32 E5 ib [386]
|
|
|
|
\c IN AL,DX ; EC [8086]
|
|
|
|
\c IN AX,DX ; o16 ED [8086]
|
|
|
|
\c IN EAX,DX ; o32 ED [386]
|
|
|
|
|
|
|
|
\c{IN} reads a byte, word or doubleword from the specified I/O port,
|
|
|
|
and stores it in the given destination register. The port number may
|
|
|
|
be specified as an immediate value if it is between 0 and 255, and
|
|
|
|
otherwise must be stored in \c{DX}. See also \c{OUT} (\k{insOUT}).
|
|
|
|
|
|
|
|
\H{insINC} \i\c{INC}: Increment Integer
|
|
|
|
|
|
|
|
\c INC reg16 ; o16 40+r [8086]
|
|
|
|
\c INC reg32 ; o32 40+r [386]
|
|
|
|
\c INC r/m8 ; FE /0 [8086]
|
|
|
|
\c INC r/m16 ; o16 FF /0 [8086]
|
|
|
|
\c INC r/m32 ; o32 FF /0 [386]
|
|
|
|
|
|
|
|
\c{INC} adds 1 to its operand. It does \e{not} affect the carry
|
|
|
|
flag: to affect the carry flag, use \c{ADD something,1} (see
|
|
|
|
\k{insADD}). See also \c{DEC} (\k{insDEC}).
|
|
|
|
|
|
|
|
\H{insINSB} \i\c{INSB}, \i\c{INSW}, \i\c{INSD}: Input String from I/O Port
|
|
|
|
|
|
|
|
\c INSB ; 6C [186]
|
|
|
|
\c INSW ; o16 6D [186]
|
|
|
|
\c INSD ; o32 6D [386]
|
|
|
|
|
|
|
|
\c{INSB} inputs a byte from the I/O port specified in \c{DX} and
|
|
|
|
stores it at \c{[ES:DI]} or \c{[ES:EDI]}. It then increments or
|
|
|
|
decrements (depending on the direction flag: increments if the flag
|
|
|
|
is clear, decrements if it is set) \c{DI} or \c{EDI}.
|
|
|
|
|
|
|
|
The register used is \c{DI} if the address size is 16 bits, and
|
|
|
|
\c{EDI} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
Segment override prefixes have no effect for this instruction: the
|
|
|
|
use of \c{ES} for the load from \c{[DI]} or \c{[EDI]} cannot be
|
|
|
|
overridden.
|
|
|
|
|
|
|
|
\c{INSW} and \c{INSD} work in the same way, but they input a word or
|
|
|
|
a doubleword instead of a byte, and increment or decrement the
|
|
|
|
addressing register by 2 or 4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REP} prefix may be used to repeat the instruction \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times.
|
|
|
|
|
|
|
|
See also \c{OUTSB}, \c{OUTSW} and \c{OUTSD} (\k{insOUTSB}).
|
|
|
|
|
|
|
|
\H{insINT} \i\c{INT}: Software Interrupt
|
|
|
|
|
|
|
|
\c INT imm8 ; CD ib [8086]
|
|
|
|
|
|
|
|
\c{INT} causes a software interrupt through a specified vector
|
|
|
|
number from 0 to 255.
|
|
|
|
|
|
|
|
The code generated by the \c{INT} instruction is always two bytes
|
|
|
|
long: although there are short forms for some \c{INT} instructions,
|
|
|
|
NASM does not generate them when it sees the \c{INT} mnemonic. In
|
|
|
|
order to generate single-byte breakpoint instructions, use the
|
|
|
|
\c{INT3} or \c{INT1} instructions (see \k{insINT1}) instead.
|
|
|
|
|
|
|
|
\H{insINT1} \i\c{INT3}, \i\c{INT1}, \i\c{ICEBP}, \i\c{INT01}: Breakpoints
|
|
|
|
|
|
|
|
\c INT1 ; F1 [P6]
|
|
|
|
\c ICEBP ; F1 [P6]
|
|
|
|
\c INT01 ; F1 [P6]
|
|
|
|
|
|
|
|
\c INT3 ; CC [8086]
|
|
|
|
|
|
|
|
\c{INT1} and \c{INT3} are short one-byte forms of the instructions
|
|
|
|
\c{INT 1} and \c{INT 3} (see \k{insINT}). They perform a similar
|
|
|
|
function to their longer counterparts, but take up less code space.
|
|
|
|
They are used as breakpoints by debuggers.
|
|
|
|
|
|
|
|
\c{INT1}, and its alternative synonyms \c{INT01} and \c{ICEBP}, is
|
|
|
|
an instruction used by in-circuit emulators (ICEs). It is present,
|
|
|
|
though not documented, on some processors down to the 286, but is
|
|
|
|
only documented for the Pentium Pro. \c{INT3} is the instruction
|
|
|
|
normally used as a breakpoint by debuggers.
|
|
|
|
|
|
|
|
\c{INT3} is not precisely equivalent to \c{INT 3}: the short form,
|
|
|
|
since it is designed to be used as a breakpoint, bypasses the normal
|
|
|
|
IOPL checks in virtual-8086 mode, and also does not go through
|
|
|
|
interrupt redirection.
|
|
|
|
|
|
|
|
\H{insINTO} \i\c{INTO}: Interrupt if Overflow
|
|
|
|
|
|
|
|
\c INTO ; CE [8086]
|
|
|
|
|
|
|
|
\c{INTO} performs an \c{INT 4} software interrupt (see \k{insINT})
|
|
|
|
if and only if the overflow flag is set.
|
|
|
|
|
|
|
|
\H{insINVD} \i\c{INVD}: Invalidate Internal Caches
|
|
|
|
|
|
|
|
\c INVD ; 0F 08 [486]
|
|
|
|
|
|
|
|
\c{INVD} invalidates and empties the processor's internal caches,
|
|
|
|
and causes the processor to instruct external caches to do the same.
|
|
|
|
It does not write the contents of the caches back to memory first:
|
|
|
|
any modified data held in the caches will be lost. To write the data
|
|
|
|
back first, use \c{WBINVD} (\k{insWBINVD}).
|
|
|
|
|
|
|
|
\H{insINVLPG} \i\c{INVLPG}: Invalidate TLB Entry
|
|
|
|
|
|
|
|
\c INVLPG mem ; 0F 01 /0 [486]
|
|
|
|
|
|
|
|
\c{INVLPG} invalidates the translation lookahead buffer (TLB) entry
|
|
|
|
associated with the supplied memory address.
|
|
|
|
|
|
|
|
\H{insIRET} \i\c{IRET}, \i\c{IRETW}, \i\c{IRETD}: Return from Interrupt
|
|
|
|
|
|
|
|
\c IRET ; CF [8086]
|
|
|
|
\c IRETW ; o16 CF [8086]
|
|
|
|
\c IRETD ; o32 CF [386]
|
|
|
|
|
|
|
|
\c{IRET} returns from an interrupt (hardware or software) by means
|
|
|
|
of popping \c{IP} (or \c{EIP}), \c{CS} and the flags off the stack
|
|
|
|
and then continuing execution from the new \c{CS:IP}.
|
|
|
|
|
|
|
|
\c{IRETW} pops \c{IP}, \c{CS} and the flags as 2 bytes each, taking
|
|
|
|
6 bytes off the stack in total. \c{IRETD} pops \c{EIP} as 4 bytes,
|
|
|
|
pops a further 4 bytes of which the top two are discarded and the
|
|
|
|
bottom two go into \c{CS}, and pops the flags as 4 bytes as well,
|
|
|
|
taking 12 bytes off the stack.
|
|
|
|
|
|
|
|
\c{IRET} is a shorthand for either \c{IRETW} or \c{IRETD}, depending
|
|
|
|
on the default \c{BITS} setting at the time.
|
|
|
|
|
|
|
|
\H{insJCXZ} \i\c{JCXZ}, \i\c{JECXZ}: Jump if CX/ECX Zero
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\c JCXZ imm ; a16 E3 rb [8086]
|
|
|
|
\c JECXZ imm ; a32 E3 rb [386]
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\c{JCXZ} performs a short jump (with maximum range 128 bytes) if and
|
|
|
|
only if the contents of the \c{CX} register is 0. \c{JECXZ} does the
|
|
|
|
same thing, but with \c{ECX}.
|
|
|
|
|
|
|
|
\H{insJMP} \i\c{JMP}: Jump
|
|
|
|
|
|
|
|
\c JMP imm ; E9 rw/rd [8086]
|
|
|
|
\c JMP SHORT imm ; EB rb [8086]
|
|
|
|
\c JMP imm:imm16 ; o16 EA iw iw [8086]
|
|
|
|
\c JMP imm:imm32 ; o32 EA id iw [386]
|
|
|
|
\c JMP FAR mem ; o16 FF /5 [8086]
|
|
|
|
\c JMP FAR mem ; o32 FF /5 [386]
|
|
|
|
\c JMP r/m16 ; o16 FF /4 [8086]
|
|
|
|
\c JMP r/m32 ; o32 FF /4 [386]
|
|
|
|
|
|
|
|
\c{JMP} jumps to a given address. The address may be specified as an
|
|
|
|
absolute segment and offset, or as a relative jump within the
|
|
|
|
current segment.
|
|
|
|
|
|
|
|
\c{JMP SHORT imm} has a maximum range of 128 bytes, since the
|
|
|
|
displacement is specified as only 8 bits, but takes up less code
|
|
|
|
space. NASM does not choose when to generate \c{JMP SHORT} for you:
|
|
|
|
you must explicitly code \c{SHORT} every time you want a short jump.
|
|
|
|
|
|
|
|
You can choose between the two immediate \i{far jump} forms (\c{JMP
|
|
|
|
imm:imm}) by the use of the \c{WORD} and \c{DWORD} keywords: \c{JMP
|
|
|
|
WORD 0x1234:0x5678}) or \c{JMP DWORD 0x1234:0x56789abc}.
|
|
|
|
|
|
|
|
The \c{JMP FAR mem} forms execute a far jump by loading the
|
|
|
|
destination address out of memory. The address loaded consists of 16
|
|
|
|
or 32 bits of offset (depending on the operand size), and 16 bits of
|
|
|
|
segment. The operand size may be overridden using \c{JMP WORD FAR
|
|
|
|
mem} or \c{JMP DWORD FAR mem}.
|
|
|
|
|
|
|
|
The \c{JMP r/m} forms execute a \i{near jump} (within the same
|
|
|
|
segment), loading the destination address out of memory or out of a
|
|
|
|
register. The keyword \c{NEAR} may be specified, for clarity, in
|
|
|
|
these forms, but is not necessary. Again, operand size can be
|
|
|
|
overridden using \c{JMP WORD mem} or \c{JMP DWORD mem}.
|
|
|
|
|
|
|
|
As a convenience, NASM does not require you to jump to a far symbol
|
|
|
|
by coding the cumbersome \c{JMP SEG routine:routine}, but instead
|
|
|
|
allows the easier synonym \c{JMP FAR routine}.
|
|
|
|
|
|
|
|
The \c{CALL r/m} forms given above are near calls; NASM will accept
|
|
|
|
the \c{NEAR} keyword (e.g. \c{CALL NEAR [address]}), even though it
|
|
|
|
is not strictly necessary.
|
|
|
|
|
|
|
|
\H{insJcc} \i\c{Jcc}: Conditional Branch
|
|
|
|
|
|
|
|
\c Jcc imm ; 70+cc rb [8086]
|
|
|
|
\c Jcc NEAR imm ; 0F 80+cc rw/rd [386]
|
|
|
|
|
|
|
|
The \i{conditional jump} instructions execute a near (same segment)
|
|
|
|
jump if and only if their conditions are satisfied. For example,
|
|
|
|
\c{JNZ} jumps only if the zero flag is not set.
|
|
|
|
|
|
|
|
The ordinary form of the instructions has only a 128-byte range; the
|
|
|
|
\c{NEAR} form is a 386 extension to the instruction set, and can
|
|
|
|
span the full size of a segment. NASM will not override your choice
|
|
|
|
of jump instruction: if you want \c{Jcc NEAR}, you have to use the
|
|
|
|
\c{NEAR} keyword.
|
|
|
|
|
|
|
|
The \c{SHORT} keyword is allowed on the first form of the
|
|
|
|
instruction, for clarity, but is not necessary.
|
|
|
|
|
|
|
|
\H{insLAHF} \i\c{LAHF}: Load AH from Flags
|
|
|
|
|
|
|
|
\c LAHF ; 9F [8086]
|
|
|
|
|
|
|
|
\c{LAHF} sets the \c{AH} register according to the contents of the
|
|
|
|
low byte of the flags word. See also \c{SAHF} (\k{insSAHF}).
|
|
|
|
|
|
|
|
\H{insLAR} \i\c{LAR}: Load Access Rights
|
|
|
|
|
|
|
|
\c LAR reg16,r/m16 ; o16 0F 02 /r [286,PRIV]
|
|
|
|
\c LAR reg32,r/m32 ; o32 0F 02 /r [286,PRIV]
|
|
|
|
|
|
|
|
\c{LAR} takes the segment selector specified by its source (second)
|
|
|
|
operand, finds the corresponding segment descriptor in the GDT or
|
|
|
|
LDT, and loads the access-rights byte of the descriptor into its
|
|
|
|
destination (first) operand.
|
|
|
|
|
|
|
|
\H{insLDS} \i\c{LDS}, \i\c{LES}, \i\c{LFS}, \i\c{LGS}, \i\c{LSS}: Load Far Pointer
|
|
|
|
|
|
|
|
\c LDS reg16,mem ; o16 C5 /r [8086]
|
|
|
|
\c LDS reg32,mem ; o32 C5 /r [8086]
|
|
|
|
|
|
|
|
\c LES reg16,mem ; o16 C4 /r [8086]
|
|
|
|
\c LES reg32,mem ; o32 C4 /r [8086]
|
|
|
|
|
|
|
|
\c LFS reg16,mem ; o16 0F B4 /r [386]
|
|
|
|
\c LFS reg32,mem ; o32 0F B4 /r [386]
|
|
|
|
|
|
|
|
\c LGS reg16,mem ; o16 0F B5 /r [386]
|
|
|
|
\c LGS reg32,mem ; o32 0F B5 /r [386]
|
|
|
|
|
|
|
|
\c LSS reg16,mem ; o16 0F B2 /r [386]
|
|
|
|
\c LSS reg32,mem ; o32 0F B2 /r [386]
|
|
|
|
|
|
|
|
These instructions load an entire far pointer (16 or 32 bits of
|
|
|
|
offset, plus 16 bits of segment) out of memory in one go. \c{LDS},
|
|
|
|
for example, loads 16 or 32 bits from the given memory address into
|
|
|
|
the given register (depending on the size of the register), then
|
|
|
|
loads the \e{next} 16 bits from memory into \c{DS}. \c{LES},
|
|
|
|
\c{LFS}, \c{LGS} and \c{LSS} work in the same way but use the other
|
|
|
|
segment registers.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insLDMXCSR} \i\c{LDMXCSR}: Load Streaming SIMD Extension
|
|
|
|
Control/Status
|
|
|
|
|
|
|
|
\c LDMXCSR memory ; 0F,AE,/2 [KATMAI,SSE,SD]
|
|
|
|
|
|
|
|
\c{LDMXCSR} The MXCSR control/status register is used to enable
|
|
|
|
masked/unmasked exception handling, to set rounding modes, to
|
|
|
|
set flush-to-zero mode, and to view exception status flags.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insLEA} \i\c{LEA}: Load Effective Address
|
|
|
|
|
|
|
|
\c LEA reg16,mem ; o16 8D /r [8086]
|
|
|
|
\c LEA reg32,mem ; o32 8D /r [8086]
|
|
|
|
|
|
|
|
\c{LEA}, despite its syntax, does not access memory. It calculates
|
|
|
|
the effective address specified by its second operand as if it were
|
|
|
|
going to load or store data from it, but instead it stores the
|
|
|
|
calculated address into the register specified by its first operand.
|
|
|
|
This can be used to perform quite complex calculations (e.g. \c{LEA
|
|
|
|
EAX,[EBX+ECX*4+100]}) in one instruction.
|
|
|
|
|
|
|
|
\c{LEA}, despite being a purely arithmetic instruction which
|
|
|
|
accesses no memory, still requires square brackets around its second
|
|
|
|
operand, as if it were a memory reference.
|
|
|
|
|
|
|
|
\H{insLEAVE} \i\c{LEAVE}: Destroy Stack Frame
|
|
|
|
|
|
|
|
\c LEAVE ; C9 [186]
|
|
|
|
|
|
|
|
\c{LEAVE} destroys a stack frame of the form created by the
|
|
|
|
\c{ENTER} instruction (see \k{insENTER}). It is functionally
|
|
|
|
equivalent to \c{MOV ESP,EBP} followed by \c{POP EBP} (or \c{MOV
|
|
|
|
SP,BP} followed by \c{POP BP} in 16-bit mode).
|
|
|
|
|
|
|
|
\H{insLGDT} \i\c{LGDT}, \i\c{LIDT}, \i\c{LLDT}: Load Descriptor Tables
|
|
|
|
|
|
|
|
\c LGDT mem ; 0F 01 /2 [286,PRIV]
|
|
|
|
\c LIDT mem ; 0F 01 /3 [286,PRIV]
|
|
|
|
\c LLDT r/m16 ; 0F 00 /2 [286,PRIV]
|
|
|
|
|
|
|
|
\c{LGDT} and \c{LIDT} both take a 6-byte memory area as an operand:
|
|
|
|
they load a 32-bit linear address and a 16-bit size limit from that
|
|
|
|
area (in the opposite order) into the GDTR (global descriptor table
|
|
|
|
register) or IDTR (interrupt descriptor table register). These are
|
|
|
|
the only instructions which directly use \e{linear} addresses,
|
|
|
|
rather than segment/offset pairs.
|
|
|
|
|
|
|
|
\c{LLDT} takes a segment selector as an operand. The processor looks
|
|
|
|
up that selector in the GDT and stores the limit and base address
|
|
|
|
given there into the LDTR (local descriptor table register).
|
|
|
|
|
|
|
|
See also \c{SGDT}, \c{SIDT} and \c{SLDT} (\k{insSGDT}).
|
|
|
|
|
|
|
|
\H{insLMSW} \i\c{LMSW}: Load/Store Machine Status Word
|
|
|
|
|
|
|
|
\c LMSW r/m16 ; 0F 01 /6 [286,PRIV]
|
|
|
|
|
|
|
|
\c{LMSW} loads the bottom four bits of the source operand into the
|
|
|
|
bottom four bits of the \c{CR0} control register (or the Machine
|
|
|
|
Status Word, on 286 processors). See also \c{SMSW} (\k{insSMSW}).
|
|
|
|
|
|
|
|
\H{insLOADALL} \i\c{LOADALL}, \i\c{LOADALL286}: Load Processor State
|
|
|
|
|
|
|
|
\c LOADALL ; 0F 07 [386,UNDOC]
|
|
|
|
\c LOADALL286 ; 0F 05 [286,UNDOC]
|
|
|
|
|
|
|
|
This instruction, in its two different-opcode forms, is apparently
|
|
|
|
supported on most 286 processors, some 386 and possibly some 486.
|
|
|
|
The opcode differs between the 286 and the 386.
|
|
|
|
|
|
|
|
The function of the instruction is to load all information relating
|
|
|
|
to the state of the processor out of a block of memory: on the 286,
|
|
|
|
this block is located implicitly at absolute address \c{0x800}, and
|
|
|
|
on the 386 and 486 it is at \c{[ES:EDI]}.
|
|
|
|
|
|
|
|
\H{insLODSB} \i\c{LODSB}, \i\c{LODSW}, \i\c{LODSD}: Load from String
|
|
|
|
|
|
|
|
\c LODSB ; AC [8086]
|
|
|
|
\c LODSW ; o16 AD [8086]
|
|
|
|
\c LODSD ; o32 AD [386]
|
|
|
|
|
|
|
|
\c{LODSB} loads a byte from \c{[DS:SI]} or \c{[DS:ESI]} into \c{AL}.
|
|
|
|
It then increments or decrements (depending on the direction flag:
|
|
|
|
increments if the flag is clear, decrements if it is set) \c{SI} or
|
|
|
|
\c{ESI}.
|
|
|
|
|
|
|
|
The register used is \c{SI} if the address size is 16 bits, and
|
|
|
|
\c{ESI} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The segment register used to load from \c{[SI]} or \c{[ESI]} can be
|
|
|
|
overridden by using a segment register name as a prefix (for
|
|
|
|
example, \c{es lodsb}).
|
|
|
|
|
|
|
|
\c{LODSW} and \c{LODSD} work in the same way, but they load a
|
|
|
|
word or a doubleword instead of a byte, and increment or decrement
|
|
|
|
the addressing registers by 2 or 4 instead of 1.
|
|
|
|
|
|
|
|
\H{insLOOP} \i\c{LOOP}, \i\c{LOOPE}, \i\c{LOOPZ}, \i\c{LOOPNE}, \i\c{LOOPNZ}: Loop with Counter
|
|
|
|
|
|
|
|
\c LOOP imm ; E2 rb [8086]
|
|
|
|
\c LOOP imm,CX ; a16 E2 rb [8086]
|
|
|
|
\c LOOP imm,ECX ; a32 E2 rb [386]
|
|
|
|
|
|
|
|
\c LOOPE imm ; E1 rb [8086]
|
|
|
|
\c LOOPE imm,CX ; a16 E1 rb [8086]
|
|
|
|
\c LOOPE imm,ECX ; a32 E1 rb [386]
|
|
|
|
\c LOOPZ imm ; E1 rb [8086]
|
|
|
|
\c LOOPZ imm,CX ; a16 E1 rb [8086]
|
|
|
|
\c LOOPZ imm,ECX ; a32 E1 rb [386]
|
|
|
|
|
|
|
|
\c LOOPNE imm ; E0 rb [8086]
|
|
|
|
\c LOOPNE imm,CX ; a16 E0 rb [8086]
|
|
|
|
\c LOOPNE imm,ECX ; a32 E0 rb [386]
|
|
|
|
\c LOOPNZ imm ; E0 rb [8086]
|
|
|
|
\c LOOPNZ imm,CX ; a16 E0 rb [8086]
|
|
|
|
\c LOOPNZ imm,ECX ; a32 E0 rb [386]
|
|
|
|
|
|
|
|
\c{LOOP} decrements its counter register (either \c{CX} or \c{ECX} -
|
|
|
|
if one is not specified explicitly, the \c{BITS} setting dictates
|
|
|
|
which is used) by one, and if the counter does not become zero as a
|
|
|
|
result of this operation, it jumps to the given label. The jump has
|
|
|
|
a range of 128 bytes.
|
|
|
|
|
|
|
|
\c{LOOPE} (or its synonym \c{LOOPZ}) adds the additional condition
|
|
|
|
that it only jumps if the counter is nonzero \e{and} the zero flag
|
|
|
|
is set. Similarly, \c{LOOPNE} (and \c{LOOPNZ}) jumps only if the
|
|
|
|
counter is nonzero and the zero flag is clear.
|
|
|
|
|
|
|
|
\H{insLSL} \i\c{LSL}: Load Segment Limit
|
|
|
|
|
|
|
|
\c LSL reg16,r/m16 ; o16 0F 03 /r [286,PRIV]
|
|
|
|
\c LSL reg32,r/m32 ; o32 0F 03 /r [286,PRIV]
|
|
|
|
|
|
|
|
\c{LSL} is given a segment selector in its source (second) operand;
|
|
|
|
it computes the segment limit value by loading the segment limit
|
|
|
|
field from the associated segment descriptor in the GDT or LDT.
|
|
|
|
(This involves shifting left by 12 bits if the segment limit is
|
|
|
|
page-granular, and not if it is byte-granular; so you end up with a
|
|
|
|
byte limit in either case.) The segment limit obtained is then
|
|
|
|
loaded into the destination (first) operand.
|
|
|
|
|
|
|
|
\H{insLTR} \i\c{LTR}: Load Task Register
|
|
|
|
|
|
|
|
\c LTR r/m16 ; 0F 00 /3 [286,PRIV]
|
|
|
|
|
|
|
|
\c{LTR} looks up the segment base and limit in the GDT or LDT
|
|
|
|
descriptor specified by the segment selector given as its operand,
|
|
|
|
and loads them into the Task Register.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insMASKMOVQ} \i\c{MASKMOVQ}: Byte Mask Write
|
|
|
|
|
|
|
|
\c MASKMOVQ mmxreg,mmxreg ; 0F,F7,/r [KATMAI,MMX]
|
|
|
|
|
|
|
|
\c{MASKMOVQ} Data is stored from the mm1 register to the location
|
|
|
|
specified by the di/edi register (using DS segment). The size
|
|
|
|
of the store depends on the address-size attribute. The most
|
|
|
|
significant bit in each byte of the mask register mm2 is used
|
|
|
|
to selectively write the data (0 = no write, 1 = write) on a
|
|
|
|
per-byte basis.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMAXPS} \i\c{MAXPS}: Packed Single-FP Maximum
|
|
|
|
|
|
|
|
\c MAXPS xmmreg,memory ; 0F,5F,/r [KATMAI,SSE]
|
|
|
|
\c MAXPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MAXPS}The MAXPS instruction returns the maximum SP FP numbers
|
|
|
|
from XMM1 and XMM2/Mem.If the values being compared are both
|
|
|
|
zeroes, source2 (xmm2/m128) would be returned. If source2
|
|
|
|
(xmm2/m128) is an sNaN, this sNaN is forwarded unchanged
|
|
|
|
to the destination (i.e., a quieted version of the sNaN
|
|
|
|
is not returned).
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMAXSS} \i\c{MAXSS}: Scalar Single-FP Maximum
|
|
|
|
|
|
|
|
\c MAXSS xmmreg,memory ; F3,0F,5F,/r [KATMAI,SSE]
|
|
|
|
\c MAXSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MAXSS}The MAXSS instruction returns the maximum SP FP number
|
|
|
|
from the lower SP FP numbers of XMM1 and XMM2/Mem; the upper
|
|
|
|
three fields are passed through from xmm1. If the values being
|
|
|
|
compared are both zeroes, source2 (xmm2/m128) will be returned.
|
|
|
|
If source2 (xmm2/m128) is an sNaN, this sNaN is forwarded
|
|
|
|
unchanged to the destination (i.e., a quieted version of the
|
|
|
|
sNaN is not returned).
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMINPS} \i\c{MINPS}: Packed Single-FP Minimum
|
|
|
|
|
|
|
|
\c MINPS xmmreg,memory ; 0F,5D,/r [KATMAI,SSE]
|
|
|
|
\c MINPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MINPS} The MINPS instruction returns the minimum SP FP
|
|
|
|
numbers from XMM1 and XMM2/Mem. If the values being compared
|
|
|
|
are both zeroes, source2 (xmm2/m128) would be returned. If
|
|
|
|
source2 (xmm2/m128) is an sNaN, this sNaN is forwarded unchanged
|
|
|
|
to the destination (i.e., a quieted version of the sNaN is
|
|
|
|
not returned).
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMINSS} \i\c{MINSS}: Scalar Single-FP Minimum
|
|
|
|
|
|
|
|
\c MINSS xmmreg,memory ; F3,0F,5D,/r [KATMAI,SSE]
|
|
|
|
\c MINSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MINSS} The MINSS instruction returns the minimum SP FP number
|
|
|
|
from the lower SP FP numbers from XMM1 and XMM2/Mem; the upper
|
|
|
|
three fields are passed through from xmm1. If the values being
|
|
|
|
compared are both zeroes, source2 (xmm2/m128) would be returned.
|
|
|
|
If source2 (xmm2/m128) is an sNaN, this sNaN is forwarded
|
|
|
|
unchanged to the destination (i.e., a quieted version of the
|
|
|
|
sNaN is not returned).
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMOV} \i\c{MOV}: Move Data
|
|
|
|
|
|
|
|
\c MOV r/m8,reg8 ; 88 /r [8086]
|
|
|
|
\c MOV r/m16,reg16 ; o16 89 /r [8086]
|
|
|
|
\c MOV r/m32,reg32 ; o32 89 /r [386]
|
|
|
|
\c MOV reg8,r/m8 ; 8A /r [8086]
|
|
|
|
\c MOV reg16,r/m16 ; o16 8B /r [8086]
|
|
|
|
\c MOV reg32,r/m32 ; o32 8B /r [386]
|
|
|
|
|
|
|
|
\c MOV reg8,imm8 ; B0+r ib [8086]
|
|
|
|
\c MOV reg16,imm16 ; o16 B8+r iw [8086]
|
|
|
|
\c MOV reg32,imm32 ; o32 B8+r id [386]
|
|
|
|
\c MOV r/m8,imm8 ; C6 /0 ib [8086]
|
|
|
|
\c MOV r/m16,imm16 ; o16 C7 /0 iw [8086]
|
|
|
|
\c MOV r/m32,imm32 ; o32 C7 /0 id [386]
|
|
|
|
|
|
|
|
\c MOV AL,memoffs8 ; A0 ow/od [8086]
|
|
|
|
\c MOV AX,memoffs16 ; o16 A1 ow/od [8086]
|
|
|
|
\c MOV EAX,memoffs32 ; o32 A1 ow/od [386]
|
|
|
|
\c MOV memoffs8,AL ; A2 ow/od [8086]
|
|
|
|
\c MOV memoffs16,AX ; o16 A3 ow/od [8086]
|
|
|
|
\c MOV memoffs32,EAX ; o32 A3 ow/od [386]
|
|
|
|
|
|
|
|
\c MOV r/m16,segreg ; o16 8C /r [8086]
|
|
|
|
\c MOV r/m32,segreg ; o32 8C /r [386]
|
|
|
|
\c MOV segreg,r/m16 ; o16 8E /r [8086]
|
|
|
|
\c MOV segreg,r/m32 ; o32 8E /r [386]
|
|
|
|
|
|
|
|
\c MOV reg32,CR0/2/3/4 ; 0F 20 /r [386]
|
|
|
|
\c MOV reg32,DR0/1/2/3/6/7 ; 0F 21 /r [386]
|
|
|
|
\c MOV reg32,TR3/4/5/6/7 ; 0F 24 /r [386]
|
|
|
|
\c MOV CR0/2/3/4,reg32 ; 0F 22 /r [386]
|
|
|
|
\c MOV DR0/1/2/3/6/7,reg32 ; 0F 23 /r [386]
|
|
|
|
\c MOV TR3/4/5/6/7,reg32 ; 0F 26 /r [386]
|
|
|
|
|
|
|
|
\c{MOV} copies the contents of its source (second) operand into its
|
|
|
|
destination (first) operand.
|
|
|
|
|
|
|
|
In all forms of the \c{MOV} instruction, the two operands are the
|
|
|
|
same size, except for moving between a segment register and an
|
|
|
|
\c{r/m32} operand. These instructions are treated exactly like the
|
|
|
|
corresponding 16-bit equivalent (so that, for example, \c{MOV
|
|
|
|
DS,EAX} functions identically to \c{MOV DS,AX} but saves a prefix
|
|
|
|
when in 32-bit mode), except that when a segment register is moved
|
|
|
|
into a 32-bit destination, the top two bytes of the result are
|
|
|
|
undefined.
|
|
|
|
|
|
|
|
\c{MOV} may not use \c{CS} as a destination.
|
|
|
|
|
|
|
|
\c{CR4} is only a supported register on the Pentium and above.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insMOVAPS} \i\c{MOVAPS}: Move Aligned Four Packed Single-FP
|
|
|
|
|
|
|
|
\c MOVAPS xmmreg,memory ; 0F,28,/r [KATMAI,SSE]
|
|
|
|
\c MOVAPS memory,xmmreg ; 0F,29,/r [KATMAI,SSE]
|
|
|
|
\c MOVAPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
\c MOVAPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVAPS} The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data. When a
|
|
|
|
memory address is indicated, the 16 bytes of data at memory
|
|
|
|
location m128 are loaded or stored. When the register-register
|
|
|
|
form of this operation is used, the content of the 128-bit
|
|
|
|
source register is copied into the 128-bit destination register.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMOVD} \i\c{MOVD}: Move Doubleword to/from MMX Register
|
|
|
|
|
|
|
|
\c MOVD mmxreg,r/m32 ; 0F 6E /r [PENT,MMX]
|
|
|
|
\c MOVD r/m32,mmxreg ; 0F 7E /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{MOVD} copies 32 bits from its source (second) operand into its
|
|
|
|
destination (first) operand. When the destination is a 64-bit MMX
|
|
|
|
register, the top 32 bits are set to zero.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insMOVHLPS} \i\c{MOVHLPS}: High to Low Packed Single-FP
|
|
|
|
|
|
|
|
\c MOVHLPS xmmreg,xmmreg ; OF,12,/r [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVHLPS} The upper 64-bits of the source register xmm2 are
|
|
|
|
loaded into the lower 64-bits of the 128-bit register xmm1,
|
|
|
|
and the upper 64-bits of xmm1 are left unchanged.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMOVHPS} \i\c{MOVHPS}: Move High Packed Single-FP
|
|
|
|
|
|
|
|
\c MOVHPS xmmreg,memory ; 0F,16,/r [KATMAI,SSE]
|
|
|
|
\c MOVHPS memory,xmmreg ; 0F,17,/r [KATMAI,SSE]
|
|
|
|
\c MOVHPS xmmreg,xmmreg ; ?? [KATMAI,SSE,ND]
|
|
|
|
|
|
|
|
\c{MOVHPS} The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data. When the
|
|
|
|
load form of this operation is used, m64 is loaded into the
|
|
|
|
upper 64-bits of the 128-bit register xmm, and the lower 64-bits
|
|
|
|
are left unchanged.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMOVMSKPS} \i\c{MOVMSKPS}: Move Mask To Integer
|
|
|
|
|
|
|
|
\c MOVMSKPS reg32,xmmreg ; 0F,50,/r [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVMSKPS} The MOVMSKPS instruction returns to the integer
|
|
|
|
register r32 a 4-bit mask formed of the most significant bits
|
|
|
|
of each SP FP number of its operand.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMOVNTPS} \i\c{MOVNTPS}: Move Aligned Four Packed Single-FP
|
|
|
|
Non Temporal
|
|
|
|
|
|
|
|
\c MOVNTPS memory,xmmreg ; 0F,2B, /r [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVNTPS} The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data. This store
|
|
|
|
instruction minimizes cache pollution.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMOVNTQ} \i\c{MOVNTQ}: Move 64 Bits Non Temporal
|
|
|
|
|
|
|
|
\c MOVNTQ memory,mmxreg ; 0F,E7,/r [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{MOVNTQ} The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data. This store
|
|
|
|
instruction minimizes cache pollution.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMOVQ} \i\c{MOVQ}: Move Quadword to/from MMX Register
|
|
|
|
|
|
|
|
\c MOVQ mmxreg,r/m64 ; 0F 6F /r [PENT,MMX]
|
|
|
|
\c MOVQ r/m64,mmxreg ; 0F 7F /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{MOVQ} copies 64 bits from its source (second) operand into its
|
|
|
|
destination (first) operand.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMOVSB} \i\c{MOVSB}, \i\c{MOVSW}, \i\c{MOVSD}: Move String
|
|
|
|
|
|
|
|
\c MOVSB ; A4 [8086]
|
|
|
|
\c MOVSW ; o16 A5 [8086]
|
|
|
|
\c MOVSD ; o32 A5 [386]
|
|
|
|
|
|
|
|
\c{MOVSB} copies the byte at \c{[ES:DI]} or \c{[ES:EDI]} to
|
|
|
|
\c{[DS:SI]} or \c{[DS:ESI]}. It then increments or decrements
|
|
|
|
(depending on the direction flag: increments if the flag is clear,
|
|
|
|
decrements if it is set) \c{SI} and \c{DI} (or \c{ESI} and \c{EDI}).
|
|
|
|
|
|
|
|
The registers used are \c{SI} and \c{DI} if the address size is 16
|
|
|
|
bits, and \c{ESI} and \c{EDI} if it is 32 bits. If you need to use
|
|
|
|
an address size not equal to the current \c{BITS} setting, you can
|
|
|
|
use an explicit \i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The segment register used to load from \c{[SI]} or \c{[ESI]} can be
|
|
|
|
overridden by using a segment register name as a prefix (for
|
|
|
|
example, \c{es movsb}). The use of \c{ES} for the store to \c{[DI]}
|
|
|
|
or \c{[EDI]} cannot be overridden.
|
|
|
|
|
|
|
|
\c{MOVSW} and \c{MOVSD} work in the same way, but they copy a word
|
|
|
|
or a doubleword instead of a byte, and increment or decrement the
|
|
|
|
addressing registers by 2 or 4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REP} prefix may be used to repeat the instruction \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insMOVSS} \i\c{MOVSS}: Move Scalar Single-FP
|
|
|
|
|
|
|
|
\c MOVSS xmmreg,memory ; F3,0F,10,/r [KATMAI,SSE]
|
|
|
|
\c MOVSS memory,xmmreg ; F3,0F,11,/r [KATMAI,SSE]
|
|
|
|
\c MOVSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
\c MOVSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVSS} The linear address corresponds to the address of
|
|
|
|
the least-significant byte of the referenced memory data.
|
|
|
|
When a memory address is indicated, the four bytes of data
|
|
|
|
at memory location m32 are loaded or stored. When the load
|
|
|
|
form of this operation is used, the 32 bits from memory are
|
|
|
|
copied into the lower 32 bits of the 128-bit register xmm,
|
|
|
|
the 96 most significant bits being cleared.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMOVSX} \i\c{MOVSX}, \i\c{MOVZX}: Move Data with Sign or Zero Extend
|
|
|
|
|
|
|
|
\c MOVSX reg16,r/m8 ; o16 0F BE /r [386]
|
|
|
|
\c MOVSX reg32,r/m8 ; o32 0F BE /r [386]
|
|
|
|
\c MOVSX reg32,r/m16 ; o32 0F BF /r [386]
|
|
|
|
|
|
|
|
\c MOVZX reg16,r/m8 ; o16 0F B6 /r [386]
|
|
|
|
\c MOVZX reg32,r/m8 ; o32 0F B6 /r [386]
|
|
|
|
\c MOVZX reg32,r/m16 ; o32 0F B7 /r [386]
|
|
|
|
|
|
|
|
\c{MOVSX} sign-extends its source (second) operand to the length of
|
|
|
|
its destination (first) operand, and copies the result into the
|
|
|
|
destination operand. \c{MOVZX} does the same, but zero-extends
|
|
|
|
rather than sign-extending.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insMOVUPS} \i\c{MOVUPS}: Move Unaligned Four Packed Single-FP
|
|
|
|
|
|
|
|
\c MOVUPS xmmreg,memory ; 0F,10,/r [KATMAI,SSE]
|
|
|
|
\c MOVUPS memory,xmmreg ; 0F,11,/r [KATMAI,SSE]
|
|
|
|
\c MOVUPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
\c MOVUPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MOVUPS} The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data. When a
|
|
|
|
memory address is indicated, the 16 bytes of data at memory
|
|
|
|
location m128 are loaded to the 128-bit multimedia register
|
|
|
|
xmm or stored from the 128-bit multimedia register xmm. When
|
|
|
|
the register-register form of this operation is used, the content
|
|
|
|
of the 128-bit source register is copied into 128-bit register
|
|
|
|
xmm. No assumption is made about alignment.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insMUL} \i\c{MUL}: Unsigned Integer Multiply
|
|
|
|
|
|
|
|
\c MUL r/m8 ; F6 /4 [8086]
|
|
|
|
\c MUL r/m16 ; o16 F7 /4 [8086]
|
|
|
|
\c MUL r/m32 ; o32 F7 /4 [386]
|
|
|
|
|
|
|
|
\c{MUL} performs unsigned integer multiplication. The other operand
|
|
|
|
to the multiplication, and the destination operand, are implicit, in
|
|
|
|
the following way:
|
|
|
|
|
|
|
|
\b For \c{MUL r/m8}, \c{AL} is multiplied by the given operand; the
|
|
|
|
product is stored in \c{AX}.
|
|
|
|
|
|
|
|
\b For \c{MUL r/m16}, \c{AX} is multiplied by the given operand;
|
|
|
|
the product is stored in \c{DX:AX}.
|
|
|
|
|
|
|
|
\b For \c{MUL r/m32}, \c{EAX} is multiplied by the given operand;
|
|
|
|
the product is stored in \c{EDX:EAX}.
|
|
|
|
|
|
|
|
Signed integer multiplication is performed by the \c{IMUL}
|
|
|
|
instruction: see \k{insIMUL}.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insMULPS} \i\c{MULPS}: Packed Single-FP Multiply
|
|
|
|
|
|
|
|
\c MULPS xmmreg,memory ; 0F,59,/r [KATMAI,SSE]
|
|
|
|
\c MULPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
|
|
|
|
\c{MULPS} The MULPS instructions multiply the packed SP FP
|
|
|
|
numbers of both their operands.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insMULSS} \i\c{MULSS}: Scalar Single-FP Multiply
|
|
|
|
|
|
|
|
|
|
|
|
\c MULSS xmmreg,memory ; F3,0F,59,/r [KATMAI,SSE]
|
|
|
|
\c MULSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{MULSS}The MULSS instructions multiply the lowest SP FP
|
|
|
|
numbers of both their operands; the upper three fields
|
|
|
|
are passed through from xmm1.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insNEG} \i\c{NEG}, \i\c{NOT}: Two's and One's Complement
|
|
|
|
|
|
|
|
\c NEG r/m8 ; F6 /3 [8086]
|
|
|
|
\c NEG r/m16 ; o16 F7 /3 [8086]
|
|
|
|
\c NEG r/m32 ; o32 F7 /3 [386]
|
|
|
|
|
|
|
|
\c NOT r/m8 ; F6 /2 [8086]
|
|
|
|
\c NOT r/m16 ; o16 F7 /2 [8086]
|
|
|
|
\c NOT r/m32 ; o32 F7 /2 [386]
|
|
|
|
|
|
|
|
\c{NEG} replaces the contents of its operand by the two's complement
|
|
|
|
negation (invert all the bits and then add one) of the original
|
|
|
|
value. \c{NOT}, similarly, performs one's complement (inverts all
|
|
|
|
the bits).
|
|
|
|
|
|
|
|
\H{insNOP} \i\c{NOP}: No Operation
|
|
|
|
|
|
|
|
\c NOP ; 90 [8086]
|
|
|
|
|
|
|
|
\c{NOP} performs no operation. Its opcode is the same as that
|
|
|
|
generated by \c{XCHG AX,AX} or \c{XCHG EAX,EAX} (depending on the
|
|
|
|
processor mode; see \k{insXCHG}).
|
|
|
|
|
|
|
|
\H{insOR} \i\c{OR}: Bitwise OR
|
|
|
|
|
|
|
|
\c OR r/m8,reg8 ; 08 /r [8086]
|
|
|
|
\c OR r/m16,reg16 ; o16 09 /r [8086]
|
|
|
|
\c OR r/m32,reg32 ; o32 09 /r [386]
|
|
|
|
|
|
|
|
\c OR reg8,r/m8 ; 0A /r [8086]
|
|
|
|
\c OR reg16,r/m16 ; o16 0B /r [8086]
|
|
|
|
\c OR reg32,r/m32 ; o32 0B /r [386]
|
|
|
|
|
|
|
|
\c OR r/m8,imm8 ; 80 /1 ib [8086]
|
|
|
|
\c OR r/m16,imm16 ; o16 81 /1 iw [8086]
|
|
|
|
\c OR r/m32,imm32 ; o32 81 /1 id [386]
|
|
|
|
|
|
|
|
\c OR r/m16,imm8 ; o16 83 /1 ib [8086]
|
|
|
|
\c OR r/m32,imm8 ; o32 83 /1 ib [386]
|
|
|
|
|
|
|
|
\c OR AL,imm8 ; 0C ib [8086]
|
|
|
|
\c OR AX,imm16 ; o16 0D iw [8086]
|
|
|
|
\c OR EAX,imm32 ; o32 0D id [386]
|
|
|
|
|
|
|
|
\c{OR} performs a bitwise OR operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if at least one of the
|
|
|
|
corresponding bits of the two inputs was 1), and stores the result
|
|
|
|
in the destination (first) operand.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
|
|
|
The MMX instruction \c{POR} (see \k{insPOR}) performs the same
|
|
|
|
operation on the 64-bit MMX registers.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insORPS} \i\c{ORPS}: Bit-wise Logical OR for Single-FP Data
|
|
|
|
|
|
|
|
\c ORPS xmmreg,memory ; 0F,56,/r [KATMAI,SSE]
|
|
|
|
\c ORPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{ORPS} The ORPS instructions return a bit-wise logical
|
|
|
|
OR between xmm1 and xmm2/mem.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insOUT} \i\c{OUT}: Output Data to I/O Port
|
|
|
|
|
|
|
|
\c OUT imm8,AL ; E6 ib [8086]
|
|
|
|
\c OUT imm8,AX ; o16 E7 ib [8086]
|
|
|
|
\c OUT imm8,EAX ; o32 E7 ib [386]
|
|
|
|
\c OUT DX,AL ; EE [8086]
|
|
|
|
\c OUT DX,AX ; o16 EF [8086]
|
|
|
|
\c OUT DX,EAX ; o32 EF [386]
|
|
|
|
|
2002-05-01 05:01:08 +08:00
|
|
|
\c{OUT} writes the contents of the given source register to the
|
2002-05-01 04:52:49 +08:00
|
|
|
specified I/O port. The port number may be specified as an immediate
|
|
|
|
value if it is between 0 and 255, and otherwise must be stored in
|
|
|
|
\c{DX}. See also \c{IN} (\k{insIN}).
|
|
|
|
|
|
|
|
\H{insOUTSB} \i\c{OUTSB}, \i\c{OUTSW}, \i\c{OUTSD}: Output String to I/O Port
|
|
|
|
|
|
|
|
\c OUTSB ; 6E [186]
|
|
|
|
|
|
|
|
\c OUTSW ; o16 6F [186]
|
|
|
|
|
|
|
|
\c OUTSD ; o32 6F [386]
|
|
|
|
|
|
|
|
\c{OUTSB} loads a byte from \c{[DS:SI]} or \c{[DS:ESI]} and writes
|
|
|
|
it to the I/O port specified in \c{DX}. It then increments or
|
|
|
|
decrements (depending on the direction flag: increments if the flag
|
|
|
|
is clear, decrements if it is set) \c{SI} or \c{ESI}.
|
|
|
|
|
|
|
|
The register used is \c{SI} if the address size is 16 bits, and
|
|
|
|
\c{ESI} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The segment register used to load from \c{[SI]} or \c{[ESI]} can be
|
|
|
|
overridden by using a segment register name as a prefix (for
|
|
|
|
example, \c{es outsb}).
|
|
|
|
|
|
|
|
\c{OUTSW} and \c{OUTSD} work in the same way, but they output a
|
|
|
|
word or a doubleword instead of a byte, and increment or decrement
|
|
|
|
the addressing registers by 2 or 4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REP} prefix may be used to repeat the instruction \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times.
|
|
|
|
|
|
|
|
\H{insPACKSSDW} \i\c{PACKSSDW}, \i\c{PACKSSWB}, \i\c{PACKUSWB}: Pack Data
|
|
|
|
|
|
|
|
\c PACKSSDW mmxreg,r/m64 ; 0F 6B /r [PENT,MMX]
|
|
|
|
\c PACKSSWB mmxreg,r/m64 ; 0F 63 /r [PENT,MMX]
|
|
|
|
\c PACKUSWB mmxreg,r/m64 ; 0F 67 /r [PENT,MMX]
|
|
|
|
|
|
|
|
All these instructions start by forming a notional 128-bit word by
|
|
|
|
placing the source (second) operand on the left of the destination
|
|
|
|
(first) operand. \c{PACKSSDW} then splits this 128-bit word into
|
|
|
|
four doublewords, converts each to a word, and loads them side by
|
|
|
|
side into the destination register; \c{PACKSSWB} and \c{PACKUSWB}
|
|
|
|
both split the 128-bit word into eight words, converts each to a
|
|
|
|
byte, and loads \e{those} side by side into the destination
|
|
|
|
register.
|
|
|
|
|
|
|
|
\c{PACKSSDW} and \c{PACKSSWB} perform signed saturation when
|
|
|
|
reducing the length of numbers: if the number is too large to fit
|
|
|
|
into the reduced space, they replace it by the largest signed number
|
|
|
|
(\c{7FFFh} or \c{7Fh}) that \e{will} fit, and if it is too small
|
|
|
|
then they replace it by the smallest signed number (\c{8000h} or
|
|
|
|
\c{80h}) that will fit. \c{PACKUSWB} performs unsigned saturation:
|
|
|
|
it treats its input as unsigned, and replaces it by the largest
|
|
|
|
unsigned number that will fit.
|
|
|
|
|
|
|
|
\H{insPADDB} \i\c{PADDxx}: MMX Packed Addition
|
|
|
|
|
|
|
|
\c PADDB mmxreg,r/m64 ; 0F FC /r [PENT,MMX]
|
|
|
|
\c PADDW mmxreg,r/m64 ; 0F FD /r [PENT,MMX]
|
|
|
|
\c PADDD mmxreg,r/m64 ; 0F FE /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PADDSB mmxreg,r/m64 ; 0F EC /r [PENT,MMX]
|
|
|
|
\c PADDSW mmxreg,r/m64 ; 0F ED /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PADDUSB mmxreg,r/m64 ; 0F DC /r [PENT,MMX]
|
|
|
|
\c PADDUSW mmxreg,r/m64 ; 0F DD /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PADDxx} all perform packed addition between their two 64-bit
|
|
|
|
operands, storing the result in the destination (first) operand. The
|
|
|
|
\c{PADDxB} forms treat the 64-bit operands as vectors of eight
|
|
|
|
bytes, and add each byte individually; \c{PADDxW} treat the operands
|
|
|
|
as vectors of four words; and \c{PADDD} treats its operands as
|
|
|
|
vectors of two doublewords.
|
|
|
|
|
|
|
|
\c{PADDSB} and \c{PADDSW} perform signed saturation on the sum of
|
|
|
|
each pair of bytes or words: if the result of an addition is too
|
|
|
|
large or too small to fit into a signed byte or word result, it is
|
|
|
|
clipped (saturated) to the largest or smallest value which \e{will}
|
|
|
|
fit. \c{PADDUSB} and \c{PADDUSW} similarly perform unsigned
|
|
|
|
saturation, clipping to \c{0FFh} or \c{0FFFFh} if the result is
|
|
|
|
larger than that.
|
|
|
|
|
|
|
|
\H{insPADDSIW} \i\c{PADDSIW}: MMX Packed Addition to Implicit
|
|
|
|
Destination
|
|
|
|
|
|
|
|
\c PADDSIW mmxreg,r/m64 ; 0F 51 /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
\c{PADDSIW}, specific to the Cyrix extensions to the MMX instruction
|
|
|
|
set, performs the same function as \c{PADDSW}, except that the
|
|
|
|
result is not placed in the register specified by the first operand,
|
|
|
|
but instead in the register whose number differs from the first
|
|
|
|
operand only in the last bit. So \c{PADDSIW MM0,MM2} would put the
|
|
|
|
result in \c{MM1}, but \c{PADDSIW MM1,MM2} would put the result in
|
|
|
|
\c{MM0}.
|
|
|
|
|
|
|
|
\H{insPAND} \i\c{PAND}, \i\c{PANDN}: MMX Bitwise AND and AND-NOT
|
|
|
|
|
|
|
|
\c PAND mmxreg,r/m64 ; 0F DB /r [PENT,MMX]
|
|
|
|
\c PANDN mmxreg,r/m64 ; 0F DF /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PAND} performs a bitwise AND operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if the corresponding
|
|
|
|
bits of the two inputs were both 1), and stores the result in the
|
|
|
|
destination (first) operand.
|
|
|
|
|
|
|
|
\c{PANDN} performs the same operation, but performs a one's
|
|
|
|
complement operation on the destination (first) operand first.
|
|
|
|
|
|
|
|
\H{insPAVEB} \i\c{PAVEB}: MMX Packed Average
|
|
|
|
|
|
|
|
\c PAVEB mmxreg,r/m64 ; 0F 50 /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
\c{PAVEB}, specific to the Cyrix MMX extensions, treats its two
|
|
|
|
operands as vectors of eight unsigned bytes, and calculates the
|
|
|
|
average of the corresponding bytes in the operands. The resulting
|
|
|
|
vector of eight averages is stored in the first operand.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insPAVGB} \i\c{PAVGB}: Packed Average
|
|
|
|
|
|
|
|
\c PAVGB mmxreg,mmxreg ; 0F,E0, /r [KATMAI,MMX]
|
|
|
|
\c PAVGB mmxreg,memory ; 0F,E3, /r [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPAVGW} \i\c{PAVGW}: Packed Average
|
|
|
|
|
|
|
|
\c PAVGW mmxreg,mmxreg ; ?? [KATMAI,MMX]
|
|
|
|
\c PAVGW mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PAVGB} The PAVG instructions add the unsigned data elements
|
|
|
|
of the source operand to the unsigned data elements of the
|
|
|
|
destination register, along with a carry-in. The results of
|
|
|
|
the add are then each independently right-shifted by one bit
|
|
|
|
position. The high order bits of each element are filled with
|
|
|
|
the carry bits of the corresponding sum. The destination operand
|
|
|
|
is an MMXTM technology register. The source operand can either
|
|
|
|
be an MMXTM technology register or a 64-bit memory operand.
|
|
|
|
The PAVGB instruction operates on packed unsigned bytes, and
|
|
|
|
the PAVGW instruction operates on packed unsigned words.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPAVGUSB} \i\c{PAVGUSB}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PAVGUSB mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PAVGUSB mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPCMPEQB} \i\c{PCMPxx}: MMX Packed Comparison
|
|
|
|
|
|
|
|
\c PCMPEQB mmxreg,r/m64 ; 0F 74 /r [PENT,MMX]
|
|
|
|
\c PCMPEQW mmxreg,r/m64 ; 0F 75 /r [PENT,MMX]
|
|
|
|
\c PCMPEQD mmxreg,r/m64 ; 0F 76 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PCMPGTB mmxreg,r/m64 ; 0F 64 /r [PENT,MMX]
|
|
|
|
\c PCMPGTW mmxreg,r/m64 ; 0F 65 /r [PENT,MMX]
|
|
|
|
\c PCMPGTD mmxreg,r/m64 ; 0F 66 /r [PENT,MMX]
|
|
|
|
|
|
|
|
The \c{PCMPxx} instructions all treat their operands as vectors of
|
|
|
|
bytes, words, or doublewords; corresponding elements of the source
|
|
|
|
and destination are compared, and the corresponding element of the
|
|
|
|
destination (first) operand is set to all zeros or all ones
|
|
|
|
depending on the result of the comparison.
|
|
|
|
|
|
|
|
\c{PCMPxxB} treats the operands as vectors of eight bytes,
|
|
|
|
\c{PCMPxxW} treats them as vectors of four words, and \c{PCMPxxD} as
|
|
|
|
two doublewords.
|
|
|
|
|
|
|
|
\c{PCMPEQx} sets the corresponding element of the destination
|
|
|
|
operand to all ones if the two elements compared are equal;
|
|
|
|
\c{PCMPGTx} sets the destination element to all ones if the element
|
|
|
|
of the first (destination) operand is greater (treated as a signed
|
|
|
|
integer) than that of the second (source) operand.
|
|
|
|
|
|
|
|
\H{insPDISTIB} \i\c{PDISTIB}: MMX Packed Distance and Accumulate
|
|
|
|
with Implied Register
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\c PDISTIB mmxreg,mem64 ; 0F 54 /r [CYRIX,MMX]
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\c{PDISTIB}, specific to the Cyrix MMX extensions, treats its two
|
|
|
|
input operands as vectors of eight unsigned bytes. For each byte
|
|
|
|
position, it finds the absolute difference between the bytes in that
|
|
|
|
position in the two input operands, and adds that value to the byte
|
|
|
|
in the same position in the implied output register. The addition is
|
|
|
|
saturated to an unsigned byte in the same way as \c{PADDUSB}.
|
|
|
|
|
|
|
|
The implied output register is found in the same way as \c{PADDSIW}
|
|
|
|
(\k{insPADDSIW}).
|
|
|
|
|
|
|
|
Note that \c{PDISTIB} cannot take a register as its second source
|
|
|
|
operand.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insPEXTRW} \i\c{PEXTRW}: Extract Word
|
|
|
|
|
|
|
|
\c PEXTRW reg32,mmxreg,immediate ; 0F,C5,/r,ib [KATMAI,MMX,SB,AR2]
|
|
|
|
|
|
|
|
\c{PEXTRW}PEXTRW instruction moves the word in MM (selected by the
|
|
|
|
two least significant bits of imm8) to the lower half of a 32-bit
|
|
|
|
integer register.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPF2ID} \i\c{PF2ID}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PF2ID mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PF2ID mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFACC} \i\c{PFACC}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFACC mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFACC mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFADD} \i\c{PFADD}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFADD mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFADD mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFCMPEQ} \i\c{PFCMPEQ}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFCMPEQ mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFCMPEQ mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFCMPGE} \i\c{PFCMPGE}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFCMPGE mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFCMPGE mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFCMPGT} \i\c{PFCMPGT}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFCMPGT mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFCMPGT mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFMAX} \i\c{PFMAX}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFMAX mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFMAX mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFMIN} \i\c{PFMIN}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFMIN mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFMIN mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFMUL} \i\c{PFMUL}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFMUL mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFMUL mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFRCP} \i\c{PFRCP}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFRCP mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFRCP mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFRCPIT1} \i\c{PFRCPIT1}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFRCPIT1 mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFRCPIT1 mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFRCPIT2} \i\c{PFRCPIT2}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFRCPIT2 mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFRCPIT2 mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFRSQIT1} \i\c{PFRSQIT1}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFRSQIT1 mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFRSQIT1 mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFRSQRT} \i\c{PFRSQRT}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFRSQRT mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFRSQRT mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFSUB} \i\c{PFSUB}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFSUB mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFSUB mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPFSUBR} \i\c{PFSUBR}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PFSUBR mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PFSUBR mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPI2FD} \i\c{PI2FD}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PI2FD mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PI2FD mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPINSRW} \i\c{PINSRW}: Insert Word
|
|
|
|
|
|
|
|
\c PINSRW mmxreg,reg16,immediate ;0F,C4,/r,ib [KATMAI,MMX,SB,AR2]
|
|
|
|
\c PINSRW mmxreg,reg32,immediate ; ?? [KATMAI,MMX,SB,AR2,ND]
|
|
|
|
\c PINSRW mmxreg,memory,immediate ; ?? [KATMAI,MMX,SB,AR2]
|
|
|
|
\c PINSRW mmxreg,memory|bits16,immediate ; ?? [KATMAI,MMX,SB,AR2,ND]
|
|
|
|
|
|
|
|
\c{PINSRW} The PINSRW instruction loads a word from the lower half
|
|
|
|
of a 32-bit integer register (or from memory) and inserts it in
|
|
|
|
the MM destination register, at a position defined by the two
|
|
|
|
least significant bits of the imm8 constant. The insertion is
|
|
|
|
done in such a way that the three other words from the
|
|
|
|
destination register are left untouched.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPMACHRIW} \i\c{PMACHRIW}: MMX Packed Multiply and Accumulate
|
|
|
|
with Rounding
|
|
|
|
|
|
|
|
\c PMACHRIW mmxreg,mem64 ; 0F 5E /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
\c{PMACHRIW} acts almost identically to \c{PMULHRIW}
|
|
|
|
(\k{insPMULHRW}), but instead of \e{storing} its result in the
|
|
|
|
implied destination register, it \e{adds} its result, as four packed
|
|
|
|
words, to the implied destination register. No saturation is done:
|
|
|
|
the addition can wrap around.
|
|
|
|
|
|
|
|
Note that \c{PMACHRIW} cannot take a register as its second source
|
|
|
|
operand.
|
|
|
|
|
|
|
|
\H{insPMADDWD} \i\c{PMADDWD}: MMX Packed Multiply and Add
|
|
|
|
|
|
|
|
\c PMADDWD mmxreg,r/m64 ; 0F F5 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PMADDWD} treats its two inputs as vectors of four signed words.
|
|
|
|
It multiplies corresponding elements of the two operands, giving
|
|
|
|
four signed doubleword results. The top two of these are added and
|
|
|
|
placed in the top 32 bits of the destination (first) operand; the
|
|
|
|
bottom two are added and placed in the bottom 32 bits.
|
|
|
|
|
|
|
|
\H{insPMAGW} \i\c{PMAGW}: MMX Packed Magnitude
|
|
|
|
|
|
|
|
\c PMAGW mmxreg,r/m64 ; 0F 52 /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
\c{PMAGW}, specific to the Cyrix MMX extensions, treats both its
|
|
|
|
operands as vectors of four signed words. It compares the absolute
|
|
|
|
values of the words in corresponding positions, and sets each word
|
|
|
|
of the destination (first) operand to whichever of the two words in
|
|
|
|
that position had the larger absolute value.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insPMAXSW} \i\c{PMAXSW}: Packed Signed Integer Word Maximum
|
|
|
|
|
|
|
|
\c PMAXSW mmxreg,mmxreg ; 0F,EE, /r [KATMAI,MMX]
|
|
|
|
\c PMAXSW mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PMAXSW} The PMAXSW instruction returns the maximum between
|
|
|
|
the four signed words in MM1 and MM2/Mem.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPMAXUB} \i\c{PMAXUB}: Packed Unsigned Integer Byte Maximum
|
|
|
|
|
|
|
|
\c PMAXUB mmxreg,mmxreg ; 0F,DE, /r [KATMAI,MMX]
|
|
|
|
\c PMAXUB mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PMAXUB} The PMAXUB instruction returns the maximum between
|
|
|
|
the eight unsigned words in MM1 and MM2/Mem.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPMINSW} \i\c{PMINSW}: Packed Signed Integer Word Minimum
|
|
|
|
|
|
|
|
\c PMINSW mmxreg,mmxreg ; 0F,EA, /r [KATMAI,MMX]
|
|
|
|
\c PMINSW mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PMINSW} The PMINSW instruction returns the minimum between
|
|
|
|
the four signed words in MM1 and MM2/Mem.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPMINUB} \i\c{PMINUB}: Packed Unsigned Integer Byte Minimum
|
|
|
|
|
|
|
|
\c PMINUB mmxreg,mmxreg ; 0F,DA, /r [KATMAI,MMX]
|
|
|
|
\c PMINUB mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PMINUB}The PMINUB instruction returns the minimum between
|
|
|
|
the eight unsigned words in MM1 and MM2/Mem.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPMOVMSKB} \i\c{PMOVMSKB}: Move Byte Mask To Integer
|
|
|
|
|
|
|
|
\c PMOVMSKB reg32,mmxreg ; 0F,D7,/r [KATMAI,MMX]
|
|
|
|
|
|
|
|
\c{PMOVMSKB} The PMOVMSKB instruction returns an 8-bit mask
|
|
|
|
formed of the most significant bits of each byte of its
|
|
|
|
source operand.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPMULHRW} \i\c{PMULHRW}, \i\c{PMULHRIW}: MMX Packed Multiply
|
|
|
|
High with Rounding
|
|
|
|
|
|
|
|
\c PMULHRW mmxreg,r/m64 ; 0F 59 /r [CYRIX,MMX]
|
|
|
|
\c PMULHRIW mmxreg,r/m64 ; 0F 5D /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
These instructions, specific to the Cyrix MMX extensions, treat
|
|
|
|
their operands as vectors of four signed words. Words in
|
|
|
|
corresponding positions are multiplied, to give a 32-bit value in
|
|
|
|
which bits 30 and 31 are guaranteed equal. Bits 30 to 15 of this
|
|
|
|
value (bit mask \c{0x7FFF8000}) are taken and stored in the
|
|
|
|
corresponding position of the destination operand, after first
|
|
|
|
rounding the low bit (equivalent to adding \c{0x4000} before
|
|
|
|
extracting bits 30 to 15).
|
|
|
|
|
|
|
|
For \c{PMULHRW}, the destination operand is the first operand; for
|
|
|
|
\c{PMULHRIW} the destination operand is implied by the first operand
|
|
|
|
in the manner of \c{PADDSIW} (\k{insPADDSIW}).
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insPMULHRWA} \i\c{PMULHRWA}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PMULHRWA mmxreg,memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
\c PMULHRWA mmxreg,mmxreg ; ?? [PENT,3DNOW]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPMULHUW} \i\c{PMULHUW}: Packed Multiply High Unsigned
|
|
|
|
|
|
|
|
\c PMULHUW mmxreg,mmxreg ; 0F,E4,/r [KATMAI,MMX]
|
|
|
|
\c PMULHUW mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PMULHUW} The PMULHUW instruction multiplies the four unsigned
|
|
|
|
words in the destination operand with the four unsigned words
|
|
|
|
in the source operand. The high-order 16 bits of the 32-bit
|
|
|
|
intermediate results are written to the destination operand.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPMULHW} \i\c{PMULHW}, \i\c{PMULLW}: MMX Packed Multiply
|
|
|
|
|
|
|
|
\c PMULHW mmxreg,r/m64 ; 0F E5 /r [PENT,MMX]
|
|
|
|
\c PMULLW mmxreg,r/m64 ; 0F D5 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PMULxW} treats its two inputs as vectors of four signed words. It
|
|
|
|
multiplies corresponding elements of the two operands, giving four
|
|
|
|
signed doubleword results.
|
|
|
|
|
|
|
|
\c{PMULHW} then stores the top 16 bits of each doubleword in the
|
|
|
|
destination (first) operand; \c{PMULLW} stores the bottom 16 bits of
|
|
|
|
each doubleword in the destination operand.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPMVccZB} \i\c{PMVccZB}: MMX Packed Conditional Move
|
|
|
|
|
|
|
|
\c PMVZB mmxreg,mem64 ; 0F 58 /r [CYRIX,MMX]
|
|
|
|
\c PMVNZB mmxreg,mem64 ; 0F 5A /r [CYRIX,MMX]
|
|
|
|
\c PMVLZB mmxreg,mem64 ; 0F 5B /r [CYRIX,MMX]
|
|
|
|
\c PMVGEZB mmxreg,mem64 ; 0F 5C /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
These instructions, specific to the Cyrix MMX extensions, perform
|
|
|
|
parallel conditional moves. The two input operands are treated as
|
|
|
|
vectors of eight bytes. Each byte of the destination (first) operand
|
|
|
|
is either written from the corresponding byte of the source (second)
|
|
|
|
operand, or left alone, depending on the value of the byte in the
|
|
|
|
\e{implied} operand (specified in the same way as \c{PADDSIW}, in
|
|
|
|
\k{insPADDSIW}).
|
|
|
|
|
|
|
|
\c{PMVZB} performs each move if the corresponding byte in the
|
|
|
|
implied operand is zero. \c{PMVNZB} moves if the byte is non-zero.
|
|
|
|
\c{PMVLZB} moves if the byte is less than zero, and \c{PMVGEZB}
|
|
|
|
moves if the byte is greater than or equal to zero.
|
|
|
|
|
|
|
|
Note that these instructions cannot take a register as their second
|
|
|
|
source operand.
|
|
|
|
|
|
|
|
\H{insPOP} \i\c{POP}: Pop Data from Stack
|
|
|
|
|
|
|
|
\c POP reg16 ; o16 58+r [8086]
|
|
|
|
\c POP reg32 ; o32 58+r [386]
|
|
|
|
|
|
|
|
\c POP r/m16 ; o16 8F /0 [8086]
|
|
|
|
\c POP r/m32 ; o32 8F /0 [386]
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\c POP CS ; 0F [8086,UNDOC]
|
2002-05-01 04:52:49 +08:00
|
|
|
\c POP DS ; 1F [8086]
|
|
|
|
\c POP ES ; 07 [8086]
|
|
|
|
\c POP SS ; 17 [8086]
|
|
|
|
\c POP FS ; 0F A1 [386]
|
|
|
|
\c POP GS ; 0F A9 [386]
|
|
|
|
|
|
|
|
\c{POP} loads a value from the stack (from \c{[SS:SP]} or
|
|
|
|
\c{[SS:ESP]}) and then increments the stack pointer.
|
|
|
|
|
|
|
|
The address-size attribute of the instruction determines whether
|
|
|
|
\c{SP} or \c{ESP} is used as the stack pointer: to deliberately
|
|
|
|
override the default given by the \c{BITS} setting, you can use an
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The operand-size attribute of the instruction determines whether the
|
|
|
|
stack pointer is incremented by 2 or 4: this means that segment
|
|
|
|
register pops in \c{BITS 32} mode will pop 4 bytes off the stack and
|
|
|
|
discard the upper two of them. If you need to override that, you can
|
|
|
|
use an \i\c{o16} or \i\c{o32} prefix.
|
|
|
|
|
|
|
|
The above opcode listings give two forms for general-purpose
|
|
|
|
register pop instructions: for example, \c{POP BX} has the two forms
|
|
|
|
\c{5B} and \c{8F C3}. NASM will always generate the shorter form
|
|
|
|
when given \c{POP BX}. NDISASM will disassemble both.
|
|
|
|
|
|
|
|
\c{POP CS} is not a documented instruction, and is not supported on
|
|
|
|
any processor above the 8086 (since they use \c{0Fh} as an opcode
|
|
|
|
prefix for instruction set extensions). However, at least some 8086
|
|
|
|
processors do support it, and so NASM generates it for completeness.
|
|
|
|
|
|
|
|
\H{insPOPA} \i\c{POPAx}: Pop All General-Purpose Registers
|
|
|
|
|
|
|
|
\c POPA ; 61 [186]
|
|
|
|
\c POPAW ; o16 61 [186]
|
|
|
|
\c POPAD ; o32 61 [386]
|
|
|
|
|
|
|
|
\c{POPAW} pops a word from the stack into each of, successively,
|
|
|
|
\c{DI}, \c{SI}, \c{BP}, nothing (it discards a word from the stack
|
|
|
|
which was a placeholder for \c{SP}), \c{BX}, \c{DX}, \c{CX} and
|
|
|
|
\c{AX}. It is intended to reverse the operation of \c{PUSHAW} (see
|
|
|
|
\k{insPUSHA}), but it ignores the value for \c{SP} that was pushed
|
|
|
|
on the stack by \c{PUSHAW}.
|
|
|
|
|
|
|
|
\c{POPAD} pops twice as much data, and places the results in
|
|
|
|
\c{EDI}, \c{ESI}, \c{EBP}, nothing (placeholder for \c{ESP}),
|
|
|
|
\c{EBX}, \c{EDX}, \c{ECX} and \c{EAX}. It reverses the operation of
|
|
|
|
\c{PUSHAD}.
|
|
|
|
|
|
|
|
\c{POPA} is an alias mnemonic for either \c{POPAW} or \c{POPAD},
|
|
|
|
depending on the current \c{BITS} setting.
|
|
|
|
|
|
|
|
Note that the registers are popped in reverse order of their numeric
|
|
|
|
values in opcodes (see \k{iref-rv}).
|
|
|
|
|
|
|
|
\H{insPOPF} \i\c{POPFx}: Pop Flags Register
|
|
|
|
|
|
|
|
\c POPF ; 9D [186]
|
|
|
|
\c POPFW ; o16 9D [186]
|
|
|
|
\c POPFD ; o32 9D [386]
|
|
|
|
|
|
|
|
\c{POPFW} pops a word from the stack and stores it in the bottom 16
|
|
|
|
bits of the flags register (or the whole flags register, on
|
|
|
|
processors below a 386). \c{POPFD} pops a doubleword and stores it
|
|
|
|
in the entire flags register.
|
|
|
|
|
|
|
|
\c{POPF} is an alias mnemonic for either \c{POPFW} or \c{POPFD},
|
|
|
|
depending on the current \c{BITS} setting.
|
|
|
|
|
|
|
|
See also \c{PUSHF} (\k{insPUSHF}).
|
|
|
|
|
|
|
|
\H{insPOR} \i\c{POR}: MMX Bitwise OR
|
|
|
|
|
|
|
|
\c POR mmxreg,r/m64 ; 0F EB /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{POR} performs a bitwise OR operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if at least one of the
|
|
|
|
corresponding bits of the two inputs was 1), and stores the result
|
|
|
|
in the destination (first) operand.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insPREFETCHNTA} \i\c{PREFETCHNTA}: Prefetch
|
|
|
|
|
|
|
|
\c PREFETCHNTA memory ; 0F,18,/0 [KATMAI]
|
|
|
|
|
|
|
|
\c{PREFETCHNTA} Move data specified by address closer to the
|
|
|
|
processor using the nta hint.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPREFETCHT0} \i\c{PREFETCHT0}: Prefetch
|
|
|
|
|
|
|
|
\c PREFETCHT0 memory ; 0F,18,/1 [KATMAI]
|
|
|
|
|
|
|
|
\c{PREFETCHT0} Move data specified by address closer to the
|
|
|
|
processor using the t0 hint.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPREFETCHT1} \i\c{PREFETCHT1}: Prefetch
|
|
|
|
|
|
|
|
\c PREFETCHT1 memory ; 0F,18,/2 [KATMAI]
|
|
|
|
|
|
|
|
\c{PREFETCHT1}Move data specified by address closer to the
|
|
|
|
processor using the t1 hint.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPREFETCHT2} \i\c{PREFETCHT2}: Prefetch
|
|
|
|
|
|
|
|
\c PREFETCHT2 memory ; 0F,18,/3 [KATMAI]
|
|
|
|
|
|
|
|
\c{PREFETCHT2} Move data specified by address closer to the
|
|
|
|
processor using the t2 hint.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPREFETCH} \i\c{PREFETCH}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PREFETCH memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPREFETCHW} \i\c{PREFETCHW}: 3dnow instruction (duh!)
|
|
|
|
|
|
|
|
\c PREFETCHW memory ; ?? [PENT,3DNOW,SM]
|
|
|
|
|
|
|
|
3dnow instruction (duh!)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPSADBW} \i\c{PSADBW}: Packed Sum of Absolute Differences
|
|
|
|
|
|
|
|
\c PSADBW mmxreg,mmxreg ; 0F,F6, /r [KATMAI,MMX]
|
|
|
|
\c PSADBW mmxreg,memory ; ?? [KATMAI,MMX,SM]
|
|
|
|
|
|
|
|
\c{PSADBW} The PSADBW instruction computes the absolute value of
|
|
|
|
the difference of unsigned bytes for mm1 and mm2/m64. These
|
|
|
|
differences are then summed to produce a word result in the lower
|
|
|
|
16-bit field; the upper three words are cleared. The destination
|
|
|
|
operand is an MMXTM technology register. The source operand can
|
|
|
|
either be an MMXTM technology register or a 64-bit memory operand.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insPSHUFW} \i\c{PSHUFW}: Packed Shuffle Word
|
|
|
|
|
|
|
|
\c PSHUFW mmxreg,mmxreg,immediate ; 0F,70,/r,ib [KATMAI,MMX,SB,AR2]
|
|
|
|
\c PSHUFW mmxreg,memory,immediate ; ?? [KATMAI,MMX,SM2,SB,AR2]
|
|
|
|
|
|
|
|
\c{PSHUFW} The PSHUF instruction uses the imm8 operand to select
|
|
|
|
which of the four words in MM2/Mem will be placed in each of the
|
|
|
|
words in MM1. Bits 1 and 0 of imm8 encode the source for
|
|
|
|
destination word 0 (MM1[15-0]), bits 3 and 2 encode for word 1,
|
|
|
|
bits 5 and 4 encode for word 2, and bits 7 and 6 encode for
|
|
|
|
word 3 (MM1[63-48]). Similarly, the two-bit encoding represents
|
|
|
|
which source word is to be used, e.g., a binary encoding of 10
|
|
|
|
indicates that source word 2 (MM2/Mem[47-32]) will be used.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insPSLLD} \i\c{PSLLx}, \i\c{PSRLx}, \i\c{PSRAx}: MMX Bit Shifts
|
|
|
|
|
|
|
|
\c PSLLW mmxreg,r/m64 ; 0F F1 /r [PENT,MMX]
|
|
|
|
\c PSLLW mmxreg,imm8 ; 0F 71 /6 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSLLD mmxreg,r/m64 ; 0F F2 /r [PENT,MMX]
|
|
|
|
\c PSLLD mmxreg,imm8 ; 0F 72 /6 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSLLQ mmxreg,r/m64 ; 0F F3 /r [PENT,MMX]
|
|
|
|
\c PSLLQ mmxreg,imm8 ; 0F 73 /6 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSRAW mmxreg,r/m64 ; 0F E1 /r [PENT,MMX]
|
|
|
|
\c PSRAW mmxreg,imm8 ; 0F 71 /4 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSRAD mmxreg,r/m64 ; 0F E2 /r [PENT,MMX]
|
|
|
|
\c PSRAD mmxreg,imm8 ; 0F 72 /4 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSRLW mmxreg,r/m64 ; 0F D1 /r [PENT,MMX]
|
|
|
|
\c PSRLW mmxreg,imm8 ; 0F 71 /2 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSRLD mmxreg,r/m64 ; 0F D2 /r [PENT,MMX]
|
|
|
|
\c PSRLD mmxreg,imm8 ; 0F 72 /2 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSRLQ mmxreg,r/m64 ; 0F D3 /r [PENT,MMX]
|
|
|
|
\c PSRLQ mmxreg,imm8 ; 0F 73 /2 ib [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PSxxQ} perform simple bit shifts on the 64-bit MMX registers: the
|
|
|
|
destination (first) operand is shifted left or right by the number of
|
|
|
|
bits given in the source (second) operand, and the vacated bits are
|
|
|
|
filled in with zeros (for a logical shift) or copies of the original
|
|
|
|
sign bit (for an arithmetic right shift).
|
|
|
|
|
|
|
|
\c{PSxxW} and \c{PSxxD} perform packed bit shifts: the destination
|
|
|
|
operand is treated as a vector of four words or two doublewords, and
|
|
|
|
each element is shifted individually, so bits shifted out of one
|
|
|
|
element do not interfere with empty bits coming into the next.
|
|
|
|
|
|
|
|
\c{PSLLx} and \c{PSRLx} perform logical shifts: the vacated bits at
|
|
|
|
one end of the shifted number are filled with zeros. \c{PSRAx}
|
|
|
|
performs an arithmetic right shift: the vacated bits at the top of
|
|
|
|
the shifted number are filled with copies of the original top (sign)
|
|
|
|
bit.
|
|
|
|
|
|
|
|
\H{insPSUBB} \i\c{PSUBxx}: MMX Packed Subtraction
|
|
|
|
|
|
|
|
\c PSUBB mmxreg,r/m64 ; 0F F8 /r [PENT,MMX]
|
|
|
|
\c PSUBW mmxreg,r/m64 ; 0F F9 /r [PENT,MMX]
|
|
|
|
\c PSUBD mmxreg,r/m64 ; 0F FA /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSUBSB mmxreg,r/m64 ; 0F E8 /r [PENT,MMX]
|
|
|
|
\c PSUBSW mmxreg,r/m64 ; 0F E9 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PSUBUSB mmxreg,r/m64 ; 0F D8 /r [PENT,MMX]
|
|
|
|
\c PSUBUSW mmxreg,r/m64 ; 0F D9 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PSUBxx} all perform packed subtraction between their two 64-bit
|
|
|
|
operands, storing the result in the destination (first) operand. The
|
|
|
|
\c{PSUBxB} forms treat the 64-bit operands as vectors of eight
|
|
|
|
bytes, and subtract each byte individually; \c{PSUBxW} treat the operands
|
|
|
|
as vectors of four words; and \c{PSUBD} treats its operands as
|
|
|
|
vectors of two doublewords.
|
|
|
|
|
|
|
|
In all cases, the elements of the operand on the right are
|
|
|
|
subtracted from the corresponding elements of the operand on the
|
|
|
|
left, not the other way round.
|
|
|
|
|
|
|
|
\c{PSUBSB} and \c{PSUBSW} perform signed saturation on the sum of
|
|
|
|
each pair of bytes or words: if the result of a subtraction is too
|
|
|
|
large or too small to fit into a signed byte or word result, it is
|
|
|
|
clipped (saturated) to the largest or smallest value which \e{will}
|
|
|
|
fit. \c{PSUBUSB} and \c{PSUBUSW} similarly perform unsigned
|
|
|
|
saturation, clipping to \c{0FFh} or \c{0FFFFh} if the result is
|
|
|
|
larger than that.
|
|
|
|
|
|
|
|
\H{insPSUBSIW} \i\c{PSUBSIW}: MMX Packed Subtract with Saturation to
|
|
|
|
Implied Destination
|
|
|
|
|
|
|
|
\c PSUBSIW mmxreg,r/m64 ; 0F 55 /r [CYRIX,MMX]
|
|
|
|
|
|
|
|
\c{PSUBSIW}, specific to the Cyrix extensions to the MMX instruction
|
|
|
|
set, performs the same function as \c{PSUBSW}, except that the
|
|
|
|
result is not placed in the register specified by the first operand,
|
|
|
|
but instead in the implied destination register, specified as for
|
|
|
|
\c{PADDSIW} (\k{insPADDSIW}).
|
|
|
|
|
|
|
|
\H{insPUNPCKHBW} \i\c{PUNPCKxxx}: Unpack Data
|
|
|
|
|
|
|
|
\c PUNPCKHBW mmxreg,r/m64 ; 0F 68 /r [PENT,MMX]
|
|
|
|
\c PUNPCKHWD mmxreg,r/m64 ; 0F 69 /r [PENT,MMX]
|
|
|
|
\c PUNPCKHDQ mmxreg,r/m64 ; 0F 6A /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c PUNPCKLBW mmxreg,r/m64 ; 0F 60 /r [PENT,MMX]
|
|
|
|
\c PUNPCKLWD mmxreg,r/m64 ; 0F 61 /r [PENT,MMX]
|
|
|
|
\c PUNPCKLDQ mmxreg,r/m64 ; 0F 62 /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PUNPCKxx} all treat their operands as vectors, and produce a new
|
|
|
|
vector generated by interleaving elements from the two inputs. The
|
|
|
|
\c{PUNPCKHxx} instructions start by throwing away the bottom half of
|
|
|
|
each input operand, and the \c{PUNPCKLxx} instructions throw away
|
|
|
|
the top half.
|
|
|
|
|
|
|
|
The remaining elements, totalling 64 bits, are then interleaved into
|
|
|
|
the destination, alternating elements from the second (source)
|
|
|
|
operand and the first (destination) operand: so the leftmost element
|
|
|
|
in the result always comes from the second operand, and the
|
|
|
|
rightmost from the destination.
|
|
|
|
|
|
|
|
\c{PUNPCKxBW} works a byte at a time, \c{PUNPCKxWD} a word at a
|
|
|
|
time, and \c{PUNPCKxDQ} a doubleword at a time.
|
|
|
|
|
|
|
|
So, for example, if the first operand held \c{0x7A6A5A4A3A2A1A0A}
|
|
|
|
and the second held \c{0x7B6B5B4B3B2B1B0B}, then:
|
|
|
|
|
|
|
|
\b \c{PUNPCKHBW} would return \c{0x7B7A6B6A5B5A4B4A}.
|
|
|
|
|
|
|
|
\b \c{PUNPCKHWD} would return \c{0x7B6B7A6A5B4B5A4A}.
|
|
|
|
|
|
|
|
\b \c{PUNPCKHDQ} would return \c{0x7B6B5B4B7A6A5A4A}.
|
|
|
|
|
|
|
|
\b \c{PUNPCKLBW} would return \c{0x3B3A2B2A1B1A0B0A}.
|
|
|
|
|
|
|
|
\b \c{PUNPCKLWD} would return \c{0x3B2B3A2A1B0B1A0A}.
|
|
|
|
|
|
|
|
\b \c{PUNPCKLDQ} would return \c{0x3B2B1B0B3A2A1A0A}.
|
|
|
|
|
|
|
|
\H{insPUSH} \i\c{PUSH}: Push Data on Stack
|
|
|
|
|
|
|
|
\c PUSH reg16 ; o16 50+r [8086]
|
|
|
|
\c PUSH reg32 ; o32 50+r [386]
|
|
|
|
|
|
|
|
\c PUSH r/m16 ; o16 FF /6 [8086]
|
|
|
|
\c PUSH r/m32 ; o32 FF /6 [386]
|
|
|
|
|
|
|
|
\c PUSH CS ; 0E [8086]
|
|
|
|
\c PUSH DS ; 1E [8086]
|
|
|
|
\c PUSH ES ; 06 [8086]
|
|
|
|
\c PUSH SS ; 16 [8086]
|
|
|
|
\c PUSH FS ; 0F A0 [386]
|
|
|
|
\c PUSH GS ; 0F A8 [386]
|
|
|
|
|
|
|
|
\c PUSH imm8 ; 6A ib [286]
|
|
|
|
\c PUSH imm16 ; o16 68 iw [286]
|
|
|
|
\c PUSH imm32 ; o32 68 id [386]
|
|
|
|
|
|
|
|
\c{PUSH} decrements the stack pointer (\c{SP} or \c{ESP}) by 2 or 4,
|
|
|
|
and then stores the given value at \c{[SS:SP]} or \c{[SS:ESP]}.
|
|
|
|
|
|
|
|
The address-size attribute of the instruction determines whether
|
|
|
|
\c{SP} or \c{ESP} is used as the stack pointer: to deliberately
|
|
|
|
override the default given by the \c{BITS} setting, you can use an
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The operand-size attribute of the instruction determines whether the
|
|
|
|
stack pointer is decremented by 2 or 4: this means that segment
|
|
|
|
register pushes in \c{BITS 32} mode will push 4 bytes on the stack,
|
|
|
|
of which the upper two are undefined. If you need to override that,
|
|
|
|
you can use an \i\c{o16} or \i\c{o32} prefix.
|
|
|
|
|
|
|
|
The above opcode listings give two forms for general-purpose
|
|
|
|
\i{register push} instructions: for example, \c{PUSH BX} has the two
|
|
|
|
forms \c{53} and \c{FF F3}. NASM will always generate the shorter
|
|
|
|
form when given \c{PUSH BX}. NDISASM will disassemble both.
|
|
|
|
|
|
|
|
Unlike the undocumented and barely supported \c{POP CS}, \c{PUSH CS}
|
|
|
|
is a perfectly valid and sensible instruction, supported on all
|
|
|
|
processors.
|
|
|
|
|
|
|
|
The instruction \c{PUSH SP} may be used to distinguish an 8086 from
|
|
|
|
later processors: on an 8086, the value of \c{SP} stored is the
|
|
|
|
value it has \e{after} the push instruction, whereas on later
|
|
|
|
processors it is the value \e{before} the push instruction.
|
|
|
|
|
|
|
|
\H{insPUSHA} \i\c{PUSHAx}: Push All General-Purpose Registers
|
|
|
|
|
|
|
|
\c PUSHA ; 60 [186]
|
|
|
|
\c PUSHAD ; o32 60 [386]
|
|
|
|
\c PUSHAW ; o16 60 [186]
|
|
|
|
|
|
|
|
\c{PUSHAW} pushes, in succession, \c{AX}, \c{CX}, \c{DX}, \c{BX},
|
|
|
|
\c{SP}, \c{BP}, \c{SI} and \c{DI} on the stack, decrementing the
|
|
|
|
stack pointer by a total of 16.
|
|
|
|
|
|
|
|
\c{PUSHAD} pushes, in succession, \c{EAX}, \c{ECX}, \c{EDX},
|
|
|
|
\c{EBX}, \c{ESP}, \c{EBP}, \c{ESI} and \c{EDI} on the stack,
|
|
|
|
decrementing the stack pointer by a total of 32.
|
|
|
|
|
|
|
|
In both cases, the value of \c{SP} or \c{ESP} pushed is its
|
|
|
|
\e{original} value, as it had before the instruction was executed.
|
|
|
|
|
|
|
|
\c{PUSHA} is an alias mnemonic for either \c{PUSHAW} or \c{PUSHAD},
|
|
|
|
depending on the current \c{BITS} setting.
|
|
|
|
|
|
|
|
Note that the registers are pushed in order of their numeric values
|
|
|
|
in opcodes (see \k{iref-rv}).
|
|
|
|
|
|
|
|
See also \c{POPA} (\k{insPOPA}).
|
|
|
|
|
|
|
|
\H{insPUSHF} \i\c{PUSHFx}: Push Flags Register
|
|
|
|
|
|
|
|
\c PUSHF ; 9C [186]
|
|
|
|
\c PUSHFD ; o32 9C [386]
|
|
|
|
\c PUSHFW ; o16 9C [186]
|
|
|
|
|
|
|
|
\c{PUSHFW} pops a word from the stack and stores it in the bottom 16
|
|
|
|
bits of the flags register (or the whole flags register, on
|
|
|
|
processors below a 386). \c{PUSHFD} pops a doubleword and stores it
|
|
|
|
in the entire flags register.
|
|
|
|
|
|
|
|
\c{PUSHF} is an alias mnemonic for either \c{PUSHFW} or \c{PUSHFD},
|
|
|
|
depending on the current \c{BITS} setting.
|
|
|
|
|
|
|
|
See also \c{POPF} (\k{insPOPF}).
|
|
|
|
|
|
|
|
\H{insPXOR} \i\c{PXOR}: MMX Bitwise XOR
|
|
|
|
|
|
|
|
\c PXOR mmxreg,r/m64 ; 0F EF /r [PENT,MMX]
|
|
|
|
|
|
|
|
\c{PXOR} performs a bitwise XOR operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if exactly one of the
|
|
|
|
corresponding bits of the two inputs was 1), and stores the result
|
|
|
|
in the destination (first) operand.
|
|
|
|
|
|
|
|
\H{insRCL} \i\c{RCL}, \i\c{RCR}: Bitwise Rotate through Carry Bit
|
|
|
|
|
|
|
|
\c RCL r/m8,1 ; D0 /2 [8086]
|
|
|
|
\c RCL r/m8,CL ; D2 /2 [8086]
|
|
|
|
\c RCL r/m8,imm8 ; C0 /2 ib [286]
|
|
|
|
\c RCL r/m16,1 ; o16 D1 /2 [8086]
|
|
|
|
\c RCL r/m16,CL ; o16 D3 /2 [8086]
|
|
|
|
\c RCL r/m16,imm8 ; o16 C1 /2 ib [286]
|
|
|
|
\c RCL r/m32,1 ; o32 D1 /2 [386]
|
|
|
|
\c RCL r/m32,CL ; o32 D3 /2 [386]
|
|
|
|
\c RCL r/m32,imm8 ; o32 C1 /2 ib [386]
|
|
|
|
|
|
|
|
\c RCR r/m8,1 ; D0 /3 [8086]
|
|
|
|
\c RCR r/m8,CL ; D2 /3 [8086]
|
|
|
|
\c RCR r/m8,imm8 ; C0 /3 ib [286]
|
|
|
|
\c RCR r/m16,1 ; o16 D1 /3 [8086]
|
|
|
|
\c RCR r/m16,CL ; o16 D3 /3 [8086]
|
|
|
|
\c RCR r/m16,imm8 ; o16 C1 /3 ib [286]
|
|
|
|
\c RCR r/m32,1 ; o32 D1 /3 [386]
|
|
|
|
\c RCR r/m32,CL ; o32 D3 /3 [386]
|
|
|
|
\c RCR r/m32,imm8 ; o32 C1 /3 ib [386]
|
|
|
|
|
|
|
|
\c{RCL} and \c{RCR} perform a 9-bit, 17-bit or 33-bit bitwise
|
|
|
|
rotation operation, involving the given source/destination (first)
|
|
|
|
operand and the carry bit. Thus, for example, in the operation
|
|
|
|
\c{RCR AL,1}, a 9-bit rotation is performed in which \c{AL} is
|
|
|
|
shifted left by 1, the top bit of \c{AL} moves into the carry flag,
|
|
|
|
and the original value of the carry flag is placed in the low bit of
|
|
|
|
\c{AL}.
|
|
|
|
|
|
|
|
The number of bits to rotate by is given by the second operand. Only
|
|
|
|
the bottom five bits of the rotation count are considered by
|
|
|
|
processors above the 8086.
|
|
|
|
|
|
|
|
You can force the longer (286 and upwards, beginning with a \c{C1}
|
|
|
|
byte) form of \c{RCL foo,1} by using a \c{BYTE} prefix: \c{RCL
|
|
|
|
foo,BYTE 1}. Similarly with \c{RCR}.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insRCPPS} \i\c{RCPPS}: Packed Single-FP Reciprocal
|
|
|
|
|
|
|
|
\c RCPPS xmmreg,memory ; 0F,53,/r [KATMAI,SSE]
|
|
|
|
\c RCPPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{RCPPS}RCPPS returns an approximation of the reciprocal of the
|
|
|
|
SP FP numbers from xmm2/m128. The maximum error for this
|
|
|
|
approximation is: Error <=1.5x2-12
|
|
|
|
|
|
|
|
|
|
|
|
\H{insRCPSS} \i\c{RCPSS}: Scalar Single-FP Reciprocal
|
|
|
|
|
|
|
|
\c RCPSS xmmreg,memory ; F3,0F,53,/r [KATMAI,SSE]
|
|
|
|
\c RCPSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{RCPSS}RCPSS returns an approximation of the reciprocal of the
|
|
|
|
lower SP FP number from xmm2/m32; the upper three fields are
|
|
|
|
passed through from xmm1. The maximum error for this
|
|
|
|
approximation is: |Error| <= 1.5x2-12
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insRDMSR} \i\c{RDMSR}: Read Model-Specific Registers
|
|
|
|
|
|
|
|
\c RDMSR ; 0F 32 [PENT]
|
|
|
|
|
|
|
|
\c{RDMSR} reads the processor Model-Specific Register (MSR) whose
|
|
|
|
index is stored in \c{ECX}, and stores the result in \c{EDX:EAX}.
|
|
|
|
See also \c{WRMSR} (\k{insWRMSR}).
|
|
|
|
|
|
|
|
\H{insRDPMC} \i\c{RDPMC}: Read Performance-Monitoring Counters
|
|
|
|
|
|
|
|
\c RDPMC ; 0F 33 [P6]
|
|
|
|
|
|
|
|
\c{RDPMC} reads the processor performance-monitoring counter whose
|
|
|
|
index is stored in \c{ECX}, and stores the result in \c{EDX:EAX}.
|
|
|
|
|
|
|
|
\H{insRDTSC} \i\c{RDTSC}: Read Time-Stamp Counter
|
|
|
|
|
|
|
|
\c RDTSC ; 0F 31 [PENT]
|
|
|
|
|
|
|
|
\c{RDTSC} reads the processor's time-stamp counter into \c{EDX:EAX}.
|
|
|
|
|
|
|
|
\H{insRET} \i\c{RET}, \i\c{RETF}, \i\c{RETN}: Return from Procedure Call
|
|
|
|
|
|
|
|
\c RET ; C3 [8086]
|
|
|
|
\c RET imm16 ; C2 iw [8086]
|
|
|
|
|
|
|
|
\c RETF ; CB [8086]
|
|
|
|
\c RETF imm16 ; CA iw [8086]
|
|
|
|
|
|
|
|
\c RETN ; C3 [8086]
|
|
|
|
\c RETN imm16 ; C2 iw [8086]
|
|
|
|
|
|
|
|
\c{RET}, and its exact synonym \c{RETN}, pop \c{IP} or \c{EIP} from
|
|
|
|
the stack and transfer control to the new address. Optionally, if a
|
|
|
|
numeric second operand is provided, they increment the stack pointer
|
|
|
|
by a further \c{imm16} bytes after popping the return address.
|
|
|
|
|
|
|
|
\c{RETF} executes a far return: after popping \c{IP}/\c{EIP}, it
|
|
|
|
then pops \c{CS}, and \e{then} increments the stack pointer by the
|
|
|
|
optional argument if present.
|
|
|
|
|
|
|
|
\H{insROL} \i\c{ROL}, \i\c{ROR}: Bitwise Rotate
|
|
|
|
|
|
|
|
\c ROL r/m8,1 ; D0 /0 [8086]
|
|
|
|
\c ROL r/m8,CL ; D2 /0 [8086]
|
|
|
|
\c ROL r/m8,imm8 ; C0 /0 ib [286]
|
|
|
|
\c ROL r/m16,1 ; o16 D1 /0 [8086]
|
|
|
|
\c ROL r/m16,CL ; o16 D3 /0 [8086]
|
|
|
|
\c ROL r/m16,imm8 ; o16 C1 /0 ib [286]
|
|
|
|
\c ROL r/m32,1 ; o32 D1 /0 [386]
|
|
|
|
\c ROL r/m32,CL ; o32 D3 /0 [386]
|
|
|
|
\c ROL r/m32,imm8 ; o32 C1 /0 ib [386]
|
|
|
|
|
|
|
|
\c ROR r/m8,1 ; D0 /1 [8086]
|
|
|
|
\c ROR r/m8,CL ; D2 /1 [8086]
|
|
|
|
\c ROR r/m8,imm8 ; C0 /1 ib [286]
|
|
|
|
\c ROR r/m16,1 ; o16 D1 /1 [8086]
|
|
|
|
\c ROR r/m16,CL ; o16 D3 /1 [8086]
|
|
|
|
\c ROR r/m16,imm8 ; o16 C1 /1 ib [286]
|
|
|
|
\c ROR r/m32,1 ; o32 D1 /1 [386]
|
|
|
|
\c ROR r/m32,CL ; o32 D3 /1 [386]
|
|
|
|
\c ROR r/m32,imm8 ; o32 C1 /1 ib [386]
|
|
|
|
|
|
|
|
\c{ROL} and \c{ROR} perform a bitwise rotation operation on the given
|
|
|
|
source/destination (first) operand. Thus, for example, in the
|
|
|
|
operation \c{ROR AL,1}, an 8-bit rotation is performed in which
|
|
|
|
\c{AL} is shifted left by 1 and the original top bit of \c{AL} moves
|
|
|
|
round into the low bit.
|
|
|
|
|
|
|
|
The number of bits to rotate by is given by the second operand. Only
|
|
|
|
the bottom 3, 4 or 5 bits (depending on the source operand size) of
|
|
|
|
the rotation count are considered by processors above the 8086.
|
|
|
|
|
|
|
|
You can force the longer (286 and upwards, beginning with a \c{C1}
|
|
|
|
byte) form of \c{ROL foo,1} by using a \c{BYTE} prefix: \c{ROL
|
|
|
|
foo,BYTE 1}. Similarly with \c{ROR}.
|
|
|
|
|
|
|
|
\H{insRSM} \i\c{RSM}: Resume from System-Management Mode
|
|
|
|
|
|
|
|
\c RSM ; 0F AA [PENT]
|
|
|
|
|
|
|
|
\c{RSM} returns the processor to its normal operating mode when it
|
|
|
|
was in System-Management Mode.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insRSQRTPS} \i\c{RSQRTPS}:Packed Single-FP Square Root Reciprocal
|
|
|
|
|
|
|
|
\c RSQRTPS xmmreg,memory ; 0F,52,/r [KATMAI,SSE]
|
|
|
|
\c RSQRTPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{RSQRTPS} RSQRTPS returns an approximation of the reciprocal
|
|
|
|
of the square root of the SP FP numbers rom xmm2/m128. The
|
|
|
|
maximum error for this approximation is: Error| <= 1.5x2-12
|
|
|
|
|
|
|
|
|
|
|
|
\H{insRSQRTSS} \i\c{RSQRTSS}:Scalar Single-FP Square Root Reciprocal
|
|
|
|
|
|
|
|
\c RSQRTSS xmmreg,memory ; F3,0F,52,/r [KATMAI,SSE]
|
|
|
|
\c RSQRTSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{RSQRTSS} RSQRTSS returns an approximation of the reciprocal
|
|
|
|
of the square root of the lowest SP FP number from xmm2/m32;
|
|
|
|
the upper three fields are passed through from xmm1. The maximum
|
|
|
|
error for this approximation is: |Error| <= 1.5x2-12
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insSAHF} \i\c{SAHF}: Store AH to Flags
|
|
|
|
|
|
|
|
\c SAHF ; 9E [8086]
|
|
|
|
|
|
|
|
\c{SAHF} sets the low byte of the flags word according to the
|
|
|
|
contents of the \c{AH} register. See also \c{LAHF} (\k{insLAHF}).
|
|
|
|
|
|
|
|
\H{insSAL} \i\c{SAL}, \i\c{SAR}: Bitwise Arithmetic Shifts
|
|
|
|
|
|
|
|
\c SAL r/m8,1 ; D0 /4 [8086]
|
|
|
|
\c SAL r/m8,CL ; D2 /4 [8086]
|
|
|
|
\c SAL r/m8,imm8 ; C0 /4 ib [286]
|
|
|
|
\c SAL r/m16,1 ; o16 D1 /4 [8086]
|
|
|
|
\c SAL r/m16,CL ; o16 D3 /4 [8086]
|
|
|
|
\c SAL r/m16,imm8 ; o16 C1 /4 ib [286]
|
|
|
|
\c SAL r/m32,1 ; o32 D1 /4 [386]
|
|
|
|
\c SAL r/m32,CL ; o32 D3 /4 [386]
|
|
|
|
\c SAL r/m32,imm8 ; o32 C1 /4 ib [386]
|
|
|
|
|
|
|
|
\c SAR r/m8,1 ; D0 /0 [8086]
|
|
|
|
\c SAR r/m8,CL ; D2 /0 [8086]
|
|
|
|
\c SAR r/m8,imm8 ; C0 /0 ib [286]
|
|
|
|
\c SAR r/m16,1 ; o16 D1 /0 [8086]
|
|
|
|
\c SAR r/m16,CL ; o16 D3 /0 [8086]
|
|
|
|
\c SAR r/m16,imm8 ; o16 C1 /0 ib [286]
|
|
|
|
\c SAR r/m32,1 ; o32 D1 /0 [386]
|
|
|
|
\c SAR r/m32,CL ; o32 D3 /0 [386]
|
|
|
|
\c SAR r/m32,imm8 ; o32 C1 /0 ib [386]
|
|
|
|
|
|
|
|
\c{SAL} and \c{SAR} perform an arithmetic shift operation on the given
|
|
|
|
source/destination (first) operand. The vacated bits are filled with
|
|
|
|
zero for \c{SAL}, and with copies of the original high bit of the
|
|
|
|
source operand for \c{SAR}.
|
|
|
|
|
|
|
|
\c{SAL} is a synonym for \c{SHL} (see \k{insSHL}). NASM will
|
|
|
|
assemble either one to the same code, but NDISASM will always
|
|
|
|
disassemble that code as \c{SHL}.
|
|
|
|
|
|
|
|
The number of bits to shift by is given by the second operand. Only
|
|
|
|
the bottom 3, 4 or 5 bits (depending on the source operand size) of
|
|
|
|
the shift count are considered by processors above the 8086.
|
|
|
|
|
|
|
|
You can force the longer (286 and upwards, beginning with a \c{C1}
|
|
|
|
byte) form of \c{SAL foo,1} by using a \c{BYTE} prefix: \c{SAL
|
|
|
|
foo,BYTE 1}. Similarly with \c{SAR}.
|
|
|
|
|
|
|
|
\H{insSALC} \i\c{SALC}: Set AL from Carry Flag
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\c SALC ; D6 [8086,UNDOC]
|
2002-05-01 04:52:49 +08:00
|
|
|
|
|
|
|
\c{SALC} is an early undocumented instruction similar in concept to
|
|
|
|
\c{SETcc} (\k{insSETcc}). Its function is to set \c{AL} to zero if
|
|
|
|
the carry flag is clear, or to \c{0xFF} if it is set.
|
|
|
|
|
|
|
|
\H{insSBB} \i\c{SBB}: Subtract with Borrow
|
|
|
|
|
|
|
|
\c SBB r/m8,reg8 ; 18 /r [8086]
|
|
|
|
\c SBB r/m16,reg16 ; o16 19 /r [8086]
|
|
|
|
\c SBB r/m32,reg32 ; o32 19 /r [386]
|
|
|
|
|
|
|
|
\c SBB reg8,r/m8 ; 1A /r [8086]
|
|
|
|
\c SBB reg16,r/m16 ; o16 1B /r [8086]
|
|
|
|
\c SBB reg32,r/m32 ; o32 1B /r [386]
|
|
|
|
|
|
|
|
\c SBB r/m8,imm8 ; 80 /3 ib [8086]
|
|
|
|
\c SBB r/m16,imm16 ; o16 81 /3 iw [8086]
|
|
|
|
\c SBB r/m32,imm32 ; o32 81 /3 id [386]
|
|
|
|
|
|
|
|
\c SBB r/m16,imm8 ; o16 83 /3 ib [8086]
|
|
|
|
\c SBB r/m32,imm8 ; o32 83 /3 ib [8086]
|
|
|
|
|
|
|
|
\c SBB AL,imm8 ; 1C ib [8086]
|
|
|
|
\c SBB AX,imm16 ; o16 1D iw [8086]
|
|
|
|
\c SBB EAX,imm32 ; o32 1D id [386]
|
|
|
|
|
|
|
|
\c{SBB} performs integer subtraction: it subtracts its second
|
|
|
|
operand, plus the value of the carry flag, from its first, and
|
|
|
|
leaves the result in its destination (first) operand. The flags are
|
|
|
|
set according to the result of the operation: in particular, the
|
|
|
|
carry flag is affected and can be used by a subsequent \c{SBB}
|
|
|
|
instruction.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
|
|
|
To subtract one number from another without also subtracting the
|
|
|
|
contents of the carry flag, use \c{SUB} (\k{insSUB}).
|
|
|
|
|
|
|
|
\H{insSCASB} \i\c{SCASB}, \i\c{SCASW}, \i\c{SCASD}: Scan String
|
|
|
|
|
|
|
|
\c SCASB ; AE [8086]
|
|
|
|
\c SCASW ; o16 AF [8086]
|
|
|
|
\c SCASD ; o32 AF [386]
|
|
|
|
|
|
|
|
\c{SCASB} compares the byte in \c{AL} with the byte at \c{[ES:DI]}
|
|
|
|
or \c{[ES:EDI]}, and sets the flags accordingly. It then increments
|
|
|
|
or decrements (depending on the direction flag: increments if the
|
|
|
|
flag is clear, decrements if it is set) \c{DI} (or \c{EDI}).
|
|
|
|
|
|
|
|
The register used is \c{DI} if the address size is 16 bits, and
|
|
|
|
\c{EDI} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
Segment override prefixes have no effect for this instruction: the
|
|
|
|
use of \c{ES} for the load from \c{[DI]} or \c{[EDI]} cannot be
|
|
|
|
overridden.
|
|
|
|
|
|
|
|
\c{SCASW} and \c{SCASD} work in the same way, but they compare a
|
|
|
|
word to \c{AX} or a doubleword to \c{EAX} instead of a byte to
|
|
|
|
\c{AL}, and increment or decrement the addressing registers by 2 or
|
|
|
|
4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REPE} and \c{REPNE} prefixes (equivalently, \c{REPZ} and
|
|
|
|
\c{REPNZ}) may be used to repeat the instruction up to \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times until the
|
|
|
|
first unequal or equal byte is found.
|
|
|
|
|
|
|
|
\H{insSETcc} \i\c{SETcc}: Set Register from Condition
|
|
|
|
|
|
|
|
\c SETcc r/m8 ; 0F 90+cc /2 [386]
|
|
|
|
|
|
|
|
\c{SETcc} sets the given 8-bit operand to zero if its condition is
|
|
|
|
not satisfied, and to 1 if it is.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insSFENCE} \i\c{SFENCE}: Store Fence
|
|
|
|
|
|
|
|
\c SFENCE 0,0,0 ; 0F AE /7 [KATMAI]
|
|
|
|
|
|
|
|
\c{SFENCE} Weakly ordered memory types can enable higher
|
|
|
|
performance through such techniques as out-of-order issue,
|
|
|
|
write-combining, and write-collapsing. Memory ordering issues
|
|
|
|
can arise between a producer and a consumer of data and there
|
|
|
|
are a number of common usage models which may be affected by
|
|
|
|
weakly ordered stores:
|
|
|
|
1. library functions, which use weakly ordered memory
|
|
|
|
to write results
|
|
|
|
2. compiler-generated code, which also benefit from writing
|
|
|
|
weakly-ordered results
|
|
|
|
3. hand-written code
|
|
|
|
The degree to which a consumer of data knows that the data is
|
|
|
|
weakly ordered can vary for these cases. As a result, the SFENCE
|
|
|
|
instruction provides a performance-efficient way of ensuring
|
|
|
|
ordering between routines that produce weakly-ordered results
|
|
|
|
and routines that consume this data. The SFENCE is ordered with
|
|
|
|
respect to stores and other SFENCE instructions.
|
|
|
|
SFENCE uses the following ModRM encoding:
|
|
|
|
Mod (7:6) = 11B
|
|
|
|
Reg/Opcode (5:3) = 111B
|
|
|
|
R/M (2:0) = 000B
|
|
|
|
All other ModRM encodings are defined to be reserved, and use
|
|
|
|
of these encodings risks incompatibility with future processors.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insSGDT} \i\c{SGDT}, \i\c{SIDT}, \i\c{SLDT}: Store Descriptor Table Pointers
|
|
|
|
|
|
|
|
\c SGDT mem ; 0F 01 /0 [286,PRIV]
|
|
|
|
\c SIDT mem ; 0F 01 /1 [286,PRIV]
|
|
|
|
\c SLDT r/m16 ; 0F 00 /0 [286,PRIV]
|
|
|
|
|
|
|
|
\c{SGDT} and \c{SIDT} both take a 6-byte memory area as an operand:
|
|
|
|
they store the contents of the GDTR (global descriptor table
|
|
|
|
register) or IDTR (interrupt descriptor table register) into that
|
|
|
|
area as a 32-bit linear address and a 16-bit size limit from that
|
|
|
|
area (in that order). These are the only instructions which directly
|
|
|
|
use \e{linear} addresses, rather than segment/offset pairs.
|
|
|
|
|
|
|
|
\c{SLDT} stores the segment selector corresponding to the LDT (local
|
|
|
|
descriptor table) into the given operand.
|
|
|
|
|
|
|
|
See also \c{LGDT}, \c{LIDT} and \c{LLDT} (\k{insLGDT}).
|
|
|
|
|
|
|
|
\H{insSHL} \i\c{SHL}, \i\c{SHR}: Bitwise Logical Shifts
|
|
|
|
|
|
|
|
\c SHL r/m8,1 ; D0 /4 [8086]
|
|
|
|
\c SHL r/m8,CL ; D2 /4 [8086]
|
|
|
|
\c SHL r/m8,imm8 ; C0 /4 ib [286]
|
|
|
|
\c SHL r/m16,1 ; o16 D1 /4 [8086]
|
|
|
|
\c SHL r/m16,CL ; o16 D3 /4 [8086]
|
|
|
|
\c SHL r/m16,imm8 ; o16 C1 /4 ib [286]
|
|
|
|
\c SHL r/m32,1 ; o32 D1 /4 [386]
|
|
|
|
\c SHL r/m32,CL ; o32 D3 /4 [386]
|
|
|
|
\c SHL r/m32,imm8 ; o32 C1 /4 ib [386]
|
|
|
|
|
|
|
|
\c SHR r/m8,1 ; D0 /5 [8086]
|
|
|
|
\c SHR r/m8,CL ; D2 /5 [8086]
|
|
|
|
\c SHR r/m8,imm8 ; C0 /5 ib [286]
|
|
|
|
\c SHR r/m16,1 ; o16 D1 /5 [8086]
|
|
|
|
\c SHR r/m16,CL ; o16 D3 /5 [8086]
|
|
|
|
\c SHR r/m16,imm8 ; o16 C1 /5 ib [286]
|
|
|
|
\c SHR r/m32,1 ; o32 D1 /5 [386]
|
|
|
|
\c SHR r/m32,CL ; o32 D3 /5 [386]
|
|
|
|
\c SHR r/m32,imm8 ; o32 C1 /5 ib [386]
|
|
|
|
|
|
|
|
\c{SHL} and \c{SHR} perform a logical shift operation on the given
|
|
|
|
source/destination (first) operand. The vacated bits are filled with
|
|
|
|
zero.
|
|
|
|
|
|
|
|
A synonym for \c{SHL} is \c{SAL} (see \k{insSAL}). NASM will
|
|
|
|
assemble either one to the same code, but NDISASM will always
|
|
|
|
disassemble that code as \c{SHL}.
|
|
|
|
|
|
|
|
The number of bits to shift by is given by the second operand. Only
|
|
|
|
the bottom 3, 4 or 5 bits (depending on the source operand size) of
|
|
|
|
the shift count are considered by processors above the 8086.
|
|
|
|
|
|
|
|
You can force the longer (286 and upwards, beginning with a \c{C1}
|
|
|
|
byte) form of \c{SHL foo,1} by using a \c{BYTE} prefix: \c{SHL
|
|
|
|
foo,BYTE 1}. Similarly with \c{SHR}.
|
|
|
|
|
|
|
|
\H{insSHLD} \i\c{SHLD}, \i\c{SHRD}: Bitwise Double-Precision Shifts
|
|
|
|
|
|
|
|
\c SHLD r/m16,reg16,imm8 ; o16 0F A4 /r ib [386]
|
|
|
|
\c SHLD r/m16,reg32,imm8 ; o32 0F A4 /r ib [386]
|
|
|
|
\c SHLD r/m16,reg16,CL ; o16 0F A5 /r [386]
|
|
|
|
\c SHLD r/m16,reg32,CL ; o32 0F A5 /r [386]
|
|
|
|
|
|
|
|
\c SHRD r/m16,reg16,imm8 ; o16 0F AC /r ib [386]
|
|
|
|
\c SHRD r/m32,reg32,imm8 ; o32 0F AC /r ib [386]
|
|
|
|
\c SHRD r/m16,reg16,CL ; o16 0F AD /r [386]
|
|
|
|
\c SHRD r/m32,reg32,CL ; o32 0F AD /r [386]
|
|
|
|
|
|
|
|
\c{SHLD} performs a double-precision left shift. It notionally places
|
|
|
|
its second operand to the right of its first, then shifts the entire
|
|
|
|
bit string thus generated to the left by a number of bits specified
|
|
|
|
in the third operand. It then updates only the \e{first} operand
|
|
|
|
according to the result of this. The second operand is not modified.
|
|
|
|
|
|
|
|
\c{SHRD} performs the corresponding right shift: it notionally
|
|
|
|
places the second operand to the \e{left} of the first, shifts the
|
|
|
|
whole bit string right, and updates only the first operand.
|
|
|
|
|
|
|
|
For example, if \c{EAX} holds \c{0x01234567} and \c{EBX} holds
|
|
|
|
\c{0x89ABCDEF}, then the instruction \c{SHLD EAX,EBX,4} would update
|
|
|
|
\c{EAX} to hold \c{0x12345678}. Under the same conditions, \c{SHRD
|
|
|
|
EAX,EBX,4} would update \c{EAX} to hold \c{0xF0123456}.
|
|
|
|
|
|
|
|
The number of bits to shift by is given by the third operand. Only
|
|
|
|
the bottom 5 bits of the shift count are considered.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insSHUFPS} \i\c{SHUFPS}: Shuffle Single-FP
|
|
|
|
|
|
|
|
\c SHUFPS xmmreg,memory,immediate ; 0F,C6,/r, ib [KATMAI,SSE,SB,AR2]
|
|
|
|
\c SHUFPS xmmreg,xmmreg,immediate ; ?? [KATMAI,SSE,SB,AR2]
|
|
|
|
|
|
|
|
\c{SHUFPS} The SHUFPS instruction is able to shuffle any of the
|
|
|
|
four SP FP numbers from xmm1 to the lower two destination fields;
|
|
|
|
the upper two destination fields are generated from a shuffle of
|
|
|
|
any of the four SP FP numbers from xmm2/m128.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insSMI} \i\c{SMI}: System Management Interrupt
|
|
|
|
|
|
|
|
\c SMI ; F1 [386,UNDOC]
|
|
|
|
|
|
|
|
This is an opcode apparently supported by some AMD processors (which
|
|
|
|
is why it can generate the same opcode as \c{INT1}), and places the
|
|
|
|
machine into system-management mode, a special debugging mode.
|
|
|
|
|
|
|
|
\H{insSMSW} \i\c{SMSW}: Store Machine Status Word
|
|
|
|
|
|
|
|
\c SMSW r/m16 ; 0F 01 /4 [286,PRIV]
|
|
|
|
|
|
|
|
\c{SMSW} stores the bottom half of the \c{CR0} control register (or
|
|
|
|
the Machine Status Word, on 286 processors) into the destination
|
|
|
|
operand. See also \c{LMSW} (\k{insLMSW}).
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insSQRTPS} \i\c{SQRTPS}: Packed Single-FP Square Root
|
|
|
|
|
|
|
|
\c SQRTPS xmmreg,memory ; 0F,51,/r [KATMAI,SSE]
|
|
|
|
\c SQRTPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{SQRTPS} The SQRTPS instruction returns the square root of
|
|
|
|
the packed SP FP numbers from xmm2/m128.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insSQRTSS} \i\c{SQRTSS}: Scalar Single-FP Square Root
|
|
|
|
|
|
|
|
\c SQRTSS xmmreg,memory ; F3,0F,51,/r [KATMAI,SSE]
|
|
|
|
\c SQRTSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{SQRTSS} The SQRTSS instructions return the square root of
|
|
|
|
the lowest SP FP numbers of their operand.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insSTC} \i\c{STC}, \i\c{STD}, \i\c{STI}: Set Flags
|
|
|
|
|
|
|
|
\c STC ; F9 [8086]
|
|
|
|
\c STD ; FD [8086]
|
|
|
|
\c STI ; FB [8086]
|
|
|
|
|
|
|
|
These instructions set various flags. \c{STC} sets the carry flag;
|
|
|
|
\c{STD} sets the direction flag; and \c{STI} sets the interrupt flag
|
|
|
|
(thus enabling interrupts).
|
|
|
|
|
|
|
|
To clear the carry, direction, or interrupt flags, use the \c{CLC},
|
|
|
|
\c{CLD} and \c{CLI} instructions (\k{insCLC}). To invert the carry
|
|
|
|
flag, use \c{CMC} (\k{insCMC}).
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insSTMXCSR} \i\c{STMXCSR}: Store Streaming SIMD Extension
|
|
|
|
Control/Status
|
|
|
|
|
|
|
|
\c STMXCSR memory ; 0F,AE,/3 [KATMAI,SSE,SD]
|
|
|
|
|
|
|
|
\c{STMXCSR} The MXCSR control/status register is used to enable
|
|
|
|
masked/unmasked exception handling, to set rounding modes,
|
|
|
|
to set flush-to-zero mode, and to view exception status flags.
|
|
|
|
Refer to LDMXCSR for a description of the format of MXCSR.
|
|
|
|
The linear address corresponds to the address of the
|
|
|
|
least-significant byte of the referenced memory data.
|
|
|
|
The reserved bits in the MXCSR are stored as zeroes.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insSTOSB} \i\c{STOSB}, \i\c{STOSW}, \i\c{STOSD}: Store Byte to String
|
|
|
|
|
|
|
|
\c STOSB ; AA [8086]
|
|
|
|
\c STOSW ; o16 AB [8086]
|
|
|
|
\c STOSD ; o32 AB [386]
|
|
|
|
|
|
|
|
\c{STOSB} stores the byte in \c{AL} at \c{[ES:DI]} or \c{[ES:EDI]},
|
|
|
|
and sets the flags accordingly. It then increments or decrements
|
|
|
|
(depending on the direction flag: increments if the flag is clear,
|
|
|
|
decrements if it is set) \c{DI} (or \c{EDI}).
|
|
|
|
|
|
|
|
The register used is \c{DI} if the address size is 16 bits, and
|
|
|
|
\c{EDI} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
Segment override prefixes have no effect for this instruction: the
|
|
|
|
use of \c{ES} for the store to \c{[DI]} or \c{[EDI]} cannot be
|
|
|
|
overridden.
|
|
|
|
|
|
|
|
\c{STOSW} and \c{STOSD} work in the same way, but they store the
|
|
|
|
word in \c{AX} or the doubleword in \c{EAX} instead of the byte in
|
|
|
|
\c{AL}, and increment or decrement the addressing registers by 2 or
|
|
|
|
4 instead of 1.
|
|
|
|
|
|
|
|
The \c{REP} prefix may be used to repeat the instruction \c{CX} (or
|
|
|
|
\c{ECX} - again, the address size chooses which) times.
|
|
|
|
|
|
|
|
\H{insSTR} \i\c{STR}: Store Task Register
|
|
|
|
|
|
|
|
\c STR r/m16 ; 0F 00 /1 [286,PRIV]
|
|
|
|
|
|
|
|
\c{STR} stores the segment selector corresponding to the contents of
|
|
|
|
the Task Register into its operand.
|
|
|
|
|
|
|
|
\H{insSUB} \i\c{SUB}: Subtract Integers
|
|
|
|
|
|
|
|
\c SUB r/m8,reg8 ; 28 /r [8086]
|
|
|
|
\c SUB r/m16,reg16 ; o16 29 /r [8086]
|
|
|
|
\c SUB r/m32,reg32 ; o32 29 /r [386]
|
|
|
|
|
|
|
|
\c SUB reg8,r/m8 ; 2A /r [8086]
|
|
|
|
\c SUB reg16,r/m16 ; o16 2B /r [8086]
|
|
|
|
\c SUB reg32,r/m32 ; o32 2B /r [386]
|
|
|
|
|
|
|
|
\c SUB r/m8,imm8 ; 80 /5 ib [8086]
|
|
|
|
\c SUB r/m16,imm16 ; o16 81 /5 iw [8086]
|
|
|
|
\c SUB r/m32,imm32 ; o32 81 /5 id [386]
|
|
|
|
|
|
|
|
\c SUB r/m16,imm8 ; o16 83 /5 ib [8086]
|
|
|
|
\c SUB r/m32,imm8 ; o32 83 /5 ib [386]
|
|
|
|
|
|
|
|
\c SUB AL,imm8 ; 2C ib [8086]
|
|
|
|
\c SUB AX,imm16 ; o16 2D iw [8086]
|
|
|
|
\c SUB EAX,imm32 ; o32 2D id [386]
|
|
|
|
|
|
|
|
\c{SUB} performs integer subtraction: it subtracts its second
|
|
|
|
operand from its first, and leaves the result in its destination
|
|
|
|
(first) operand. The flags are set according to the result of the
|
|
|
|
operation: in particular, the carry flag is affected and can be used
|
|
|
|
by a subsequent \c{SBB} instruction (\k{insSBB}).
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insSUBPS} \i\c{SUBPS}: Packed Single-FP Subtract
|
|
|
|
|
|
|
|
\c SUBPS xmmreg,memory ; 0F,5C,/r [KATMAI,SSE]
|
|
|
|
\c SUBPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{SUBPS}T he SUBPS instruction subtracts the packed SP FP
|
|
|
|
numbers of both their operands.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insSUBSS} \i\c{SUBSS}: Scalar Single-FP Subtract
|
|
|
|
|
|
|
|
\c SUBSS xmmreg,memory ; F3,0F,5C, /r [KATMAI,SSE]
|
|
|
|
\c SUBSS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{SUBSS} The SUBSS instruction subtracts the lower SP FP
|
|
|
|
numbers of both their operands.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insTEST} \i\c{TEST}: Test Bits (notional bitwise AND)
|
|
|
|
|
|
|
|
\c TEST r/m8,reg8 ; 84 /r [8086]
|
|
|
|
\c TEST r/m16,reg16 ; o16 85 /r [8086]
|
|
|
|
\c TEST r/m32,reg32 ; o32 85 /r [386]
|
|
|
|
|
|
|
|
\c TEST r/m8,imm8 ; F6 /7 ib [8086]
|
|
|
|
\c TEST r/m16,imm16 ; o16 F7 /7 iw [8086]
|
|
|
|
\c TEST r/m32,imm32 ; o32 F7 /7 id [386]
|
|
|
|
|
|
|
|
\c TEST AL,imm8 ; A8 ib [8086]
|
|
|
|
\c TEST AX,imm16 ; o16 A9 iw [8086]
|
|
|
|
\c TEST EAX,imm32 ; o32 A9 id [386]
|
|
|
|
|
|
|
|
\c{TEST} performs a `mental' bitwise AND of its two operands, and
|
|
|
|
affects the flags as if the operation had taken place, but does not
|
|
|
|
store the result of the operation anywhere.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
\H{insUCOMISS} \i\c{UCOMISS}: Unordered Scalar Single-FP compare
|
|
|
|
and set EFLAGS
|
|
|
|
|
|
|
|
\c UCOMISS xmmreg,memory ; 0F,2E,/r [KATMAI,SSE]
|
|
|
|
\c UCOMISS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{UCOMISS} The UCOMISS instructions compare the two lowest scalar
|
|
|
|
SP FP numbers, and set the ZF,PF,CF bits in the EFLAGS register
|
|
|
|
as described above. In addition, the OF, SF, and AF bits in the
|
|
|
|
EFLAGS register are zeroed out. The unordered predicate is
|
|
|
|
returned if either source operand is a NaN (qNaN or sNaN).
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insUMOV} \i\c{UMOV}: User Move Data
|
|
|
|
|
|
|
|
\c UMOV r/m8,reg8 ; 0F 10 /r [386,UNDOC]
|
|
|
|
\c UMOV r/m16,reg16 ; o16 0F 11 /r [386,UNDOC]
|
|
|
|
\c UMOV r/m32,reg32 ; o32 0F 11 /r [386,UNDOC]
|
|
|
|
|
|
|
|
\c UMOV reg8,r/m8 ; 0F 12 /r [386,UNDOC]
|
|
|
|
\c UMOV reg16,r/m16 ; o16 0F 13 /r [386,UNDOC]
|
|
|
|
\c UMOV reg32,r/m32 ; o32 0F 13 /r [386,UNDOC]
|
|
|
|
|
|
|
|
This undocumented instruction is used by in-circuit emulators to
|
|
|
|
access user memory (as opposed to host memory). It is used just like
|
|
|
|
an ordinary memory/register or register/register \c{MOV}
|
|
|
|
instruction, but accesses user space.
|
|
|
|
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
\H{insUNPCKHPS} \i\c{UNPCKHPS}: Unpack High Packed Single-FP Data
|
|
|
|
|
|
|
|
\c UNPCKHPS xmmreg,memory ; 0F,15,/r [KATMAI,SSE]
|
|
|
|
\c UNPCKHPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{UNPCKHPS} The UNPCKHPS instruction performs an interleaved
|
|
|
|
unpack of the high-order data elements of XMM1 and XMM2/Mem.
|
|
|
|
It ignores the lower half of the sources.
|
|
|
|
|
|
|
|
|
|
|
|
\H{insUNPCKLPS} \i\c{UNPCKLPS}: Unpack Low Packed Single-FP Data
|
|
|
|
|
|
|
|
\c UNPCKLPS xmmreg,memory ; 0F,14,/r [KATMAI,SSE]
|
|
|
|
\c UNPCKLPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{UNPCKLPS} The UNPCKLPS instruction performs an interleaved
|
|
|
|
unpack of the low-order data elements of XMM1 and XMM2/Mem.
|
|
|
|
It ignores the upper half part of the sources.
|
|
|
|
|
|
|
|
|
2002-05-01 04:52:49 +08:00
|
|
|
\H{insVERR} \i\c{VERR}, \i\c{VERW}: Verify Segment Readability/Writability
|
|
|
|
|
|
|
|
\c VERR r/m16 ; 0F 00 /4 [286,PRIV]
|
|
|
|
|
|
|
|
\c VERW r/m16 ; 0F 00 /5 [286,PRIV]
|
|
|
|
|
|
|
|
\c{VERR} sets the zero flag if the segment specified by the selector
|
|
|
|
in its operand can be read from at the current privilege level.
|
|
|
|
\c{VERW} sets the zero flag if the segment can be written.
|
|
|
|
|
|
|
|
\H{insWAIT} \i\c{WAIT}: Wait for Floating-Point Processor
|
|
|
|
|
|
|
|
\c WAIT ; 9B [8086]
|
|
|
|
|
|
|
|
\c{WAIT}, on 8086 systems with a separate 8087 FPU, waits for the
|
|
|
|
FPU to have finished any operation it is engaged in before
|
|
|
|
continuing main processor operations, so that (for example) an FPU
|
|
|
|
store to main memory can be guaranteed to have completed before the
|
|
|
|
CPU tries to read the result back out.
|
|
|
|
|
|
|
|
On higher processors, \c{WAIT} is unnecessary for this purpose, and
|
|
|
|
it has the alternative purpose of ensuring that any pending unmasked
|
|
|
|
FPU exceptions have happened before execution continues.
|
|
|
|
|
|
|
|
\H{insWBINVD} \i\c{WBINVD}: Write Back and Invalidate Cache
|
|
|
|
|
|
|
|
\c WBINVD ; 0F 09 [486]
|
|
|
|
|
|
|
|
\c{WBINVD} invalidates and empties the processor's internal caches,
|
|
|
|
and causes the processor to instruct external caches to do the same.
|
|
|
|
It writes the contents of the caches back to memory first, so no
|
|
|
|
data is lost. To flush the caches quickly without bothering to write
|
|
|
|
the data back first, use \c{INVD} (\k{insINVD}).
|
|
|
|
|
|
|
|
\H{insWRMSR} \i\c{WRMSR}: Write Model-Specific Registers
|
|
|
|
|
|
|
|
\c WRMSR ; 0F 30 [PENT]
|
|
|
|
|
|
|
|
\c{WRMSR} writes the value in \c{EDX:EAX} to the processor
|
|
|
|
Model-Specific Register (MSR) whose index is stored in \c{ECX}. See
|
|
|
|
also \c{RDMSR} (\k{insRDMSR}).
|
|
|
|
|
|
|
|
\H{insXADD} \i\c{XADD}: Exchange and Add
|
|
|
|
|
|
|
|
\c XADD r/m8,reg8 ; 0F C0 /r [486]
|
|
|
|
\c XADD r/m16,reg16 ; o16 0F C1 /r [486]
|
|
|
|
\c XADD r/m32,reg32 ; o32 0F C1 /r [486]
|
|
|
|
|
|
|
|
\c{XADD} exchanges the values in its two operands, and then adds
|
|
|
|
them together and writes the result into the destination (first)
|
|
|
|
operand. This instruction can be used with a \c{LOCK} prefix for
|
|
|
|
multi-processor synchronisation purposes.
|
|
|
|
|
|
|
|
\H{insXBTS} \i\c{XBTS}: Extract Bit String
|
|
|
|
|
|
|
|
\c XBTS reg16,r/m16 ; o16 0F A6 /r [386,UNDOC]
|
|
|
|
\c XBTS reg32,r/m32 ; o32 0F A6 /r [386,UNDOC]
|
|
|
|
|
|
|
|
No clear documentation seems to be available for this instruction:
|
|
|
|
the best I've been able to find reads `Takes a string of bits from
|
|
|
|
the first operand and puts them in the second operand'. It is
|
|
|
|
present only in early 386 processors, and conflicts with the opcodes
|
|
|
|
for \c{CMPXCHG486}. NASM supports it only for completeness. Its
|
|
|
|
counterpart is \c{IBTS} (see \k{insIBTS}).
|
|
|
|
|
|
|
|
\H{insXCHG} \i\c{XCHG}: Exchange
|
|
|
|
|
|
|
|
\c XCHG reg8,r/m8 ; 86 /r [8086]
|
|
|
|
\c XCHG reg16,r/m8 ; o16 87 /r [8086]
|
|
|
|
\c XCHG reg32,r/m32 ; o32 87 /r [386]
|
|
|
|
|
|
|
|
\c XCHG r/m8,reg8 ; 86 /r [8086]
|
|
|
|
\c XCHG r/m16,reg16 ; o16 87 /r [8086]
|
|
|
|
\c XCHG r/m32,reg32 ; o32 87 /r [386]
|
|
|
|
|
|
|
|
\c XCHG AX,reg16 ; o16 90+r [8086]
|
|
|
|
\c XCHG EAX,reg32 ; o32 90+r [386]
|
|
|
|
\c XCHG reg16,AX ; o16 90+r [8086]
|
|
|
|
\c XCHG reg32,EAX ; o32 90+r [386]
|
|
|
|
|
|
|
|
\c{XCHG} exchanges the values in its two operands. It can be used
|
|
|
|
with a \c{LOCK} prefix for purposes of multi-processor
|
|
|
|
synchronisation.
|
|
|
|
|
|
|
|
\c{XCHG AX,AX} or \c{XCHG EAX,EAX} (depending on the \c{BITS}
|
|
|
|
setting) generates the opcode \c{90h}, and so is a synonym for
|
|
|
|
\c{NOP} (\k{insNOP}).
|
|
|
|
|
|
|
|
\H{insXLATB} \i\c{XLATB}: Translate Byte in Lookup Table
|
|
|
|
|
|
|
|
\c XLATB ; D7 [8086]
|
|
|
|
|
|
|
|
\c{XLATB} adds the value in \c{AL}, treated as an unsigned byte, to
|
|
|
|
\c{BX} or \c{EBX}, and loads the byte from the resulting address (in
|
|
|
|
the segment specified by \c{DS}) back into \c{AL}.
|
|
|
|
|
|
|
|
The base register used is \c{BX} if the address size is 16 bits, and
|
|
|
|
\c{EBX} if it is 32 bits. If you need to use an address size not
|
|
|
|
equal to the current \c{BITS} setting, you can use an explicit
|
|
|
|
\i\c{a16} or \i\c{a32} prefix.
|
|
|
|
|
|
|
|
The segment register used to load from \c{[BX+AL]} or \c{[EBX+AL]}
|
|
|
|
can be overridden by using a segment register name as a prefix (for
|
|
|
|
example, \c{es xlatb}).
|
|
|
|
|
|
|
|
\H{insXOR} \i\c{XOR}: Bitwise Exclusive OR
|
|
|
|
|
|
|
|
\c XOR r/m8,reg8 ; 30 /r [8086]
|
|
|
|
\c XOR r/m16,reg16 ; o16 31 /r [8086]
|
|
|
|
\c XOR r/m32,reg32 ; o32 31 /r [386]
|
|
|
|
|
|
|
|
\c XOR reg8,r/m8 ; 32 /r [8086]
|
|
|
|
\c XOR reg16,r/m16 ; o16 33 /r [8086]
|
|
|
|
\c XOR reg32,r/m32 ; o32 33 /r [386]
|
|
|
|
|
|
|
|
\c XOR r/m8,imm8 ; 80 /6 ib [8086]
|
|
|
|
\c XOR r/m16,imm16 ; o16 81 /6 iw [8086]
|
|
|
|
\c XOR r/m32,imm32 ; o32 81 /6 id [386]
|
|
|
|
|
|
|
|
\c XOR r/m16,imm8 ; o16 83 /6 ib [8086]
|
|
|
|
\c XOR r/m32,imm8 ; o32 83 /6 ib [386]
|
|
|
|
|
|
|
|
\c XOR AL,imm8 ; 34 ib [8086]
|
|
|
|
\c XOR AX,imm16 ; o16 35 iw [8086]
|
|
|
|
\c XOR EAX,imm32 ; o32 35 id [386]
|
|
|
|
|
|
|
|
\c{XOR} performs a bitwise XOR operation between its two operands
|
|
|
|
(i.e. each bit of the result is 1 if and only if exactly one of the
|
|
|
|
corresponding bits of the two inputs was 1), and stores the result
|
|
|
|
in the destination (first) operand.
|
|
|
|
|
|
|
|
In the forms with an 8-bit immediate second operand and a longer
|
|
|
|
first operand, the second operand is considered to be signed, and is
|
|
|
|
sign-extended to the length of the first operand. In these cases,
|
|
|
|
the \c{BYTE} qualifier is necessary to force NASM to generate this
|
|
|
|
form of the instruction.
|
|
|
|
|
|
|
|
The MMX instruction \c{PXOR} (see \k{insPXOR}) performs the same
|
|
|
|
operation on the 64-bit MMX registers.
|
2002-05-01 05:00:33 +08:00
|
|
|
|
|
|
|
|
|
|
|
\H{insXORPS} \i\c{XORPS}: Bit-wise Logical Xor for Single-FP Data
|
|
|
|
|
|
|
|
\c XORPS xmmreg,memory ; 0F,57,/r [KATMAI,SSE]
|
|
|
|
\c XORPS xmmreg,xmmreg ; ?? [KATMAI,SSE]
|
|
|
|
|
|
|
|
\c{XORPS} The XORPS instruction returns a bit-wise logical XOR
|
|
|
|
between XMM1 and XMM2/Mem.
|