Merge branch 'master' of ssh://repo.or.cz/srv/git/nasm

2025-04-12 18:40:23 +08:00 · 2018-02-22 14:53:46 -08:00 · 2018-02-22 14:53:46 -08:00 · 281f5bd92c
commit 281f5bd92c
parent 6686fc627e 4dbf3a96a4
16 changed files with 528 additions and 288 deletions
--- a/aclocal.m4
+++ b/aclocal.m4
@ -172,3 +172,30 @@ AC_DEFUN(_PA_ADD_HEADER,

 AC_DEFUN(PA_ADD_HEADERS,
 [m4_map_args_w([$1],[_PA_ADD_HEADER(],[)])])
+
+dnl --------------------------------------------------------------------------
+dnl PA_CHECK_BAD_STDC_INLINE
+dnl
+dnl Some versions of gcc seem to apply -Wmissing-prototypes to C99
+dnl inline functions, which means we need to use GNU inline syntax
+dnl --------------------------------------------------------------------------
+AC_DEFUN(PA_CHECK_BAD_STDC_INLINE,
+[AC_MSG_CHECKING([if $CC supports C99 external inlines])
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+AC_INCLUDES_DEFAULT
+
+/* Don't mistake GNU inlines for c99 */
+#ifdef __GNUC_GNU_INLINE__
+# error "Using gnu inline standard"
+#endif
+
+inline int foo(int x)
+{
+	return x+1;
+}
+ ])],
+ [AC_MSG_RESULT([yes])
+  AC_DEFINE(HAVE_STDC_INLINE, 1,
+    [Define to 1 if your compiler supports C99 extern inline])],
+ [AC_MSG_RESULT([no])
+  PA_ADD_CFLAGS([-fgnu89-inline])])])
--- a/asm/assemble.c
+++ b/asm/assemble.c
@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@ -1379,7 +1379,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
            length++;
        } else if ((ins->rex & REX_L) &&
                   !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
-                   iflag_ffs(&cpu) >= IF_X86_64) {
+                   iflag_cpu_level_ok(&cpu, IF_X86_64)) {
            /* LOCK-as-REX.R */
            assert_no_prefix(ins, PPS_LOCK);
            lockcheck = false;  /* Already errored, no need for warning */
--- a/asm/directiv.c
+++ b/asm/directiv.c
@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@ -56,78 +56,90 @@
 #include "labels.h"
 #include "iflag.h"

-static iflag_t get_cpu(char *value)
+struct cpunames {
+    const char *name;
+    unsigned int level;
+    /* Eventually a table of features */
+};
+
+static iflag_t get_cpu(const char *value)
 {
    iflag_t r;
+    const struct cpunames *cpu;
+    static const struct cpunames cpunames[] = {
+        { "8086", IF_8086 },
+        { "186",  IF_186  },
+        { "286",  IF_286  },
+        { "386",  IF_386  },
+        { "486",  IF_486  },
+        { "586",  IF_PENT },
+        { "pentium", IF_PENT },
+        { "pentiummmx", IF_PENT },
+        { "686",  IF_P6 },
+        { "p6",   IF_P6 },
+        { "ppro", IF_P6 },
+        { "pentiumpro", IF_P6 },
+        { "p2", IF_P6 },        /* +MMX */
+        { "pentiumii", IF_P6 },
+        { "p3", IF_KATMAI },
+        { "katmai", IF_KATMAI },
+        { "p4", IF_WILLAMETTE },
+        { "willamette", IF_WILLAMETTE },
+        { "prescott", IF_PRESCOTT },
+        { "x64", IF_X86_64 },
+        { "x86-64", IF_X86_64 },
+        { "ia64", IF_IA64 },
+        { "ia-64", IF_IA64 },
+        { "itanium", IF_IA64 },
+        { "itanic", IF_IA64 },
+        { "merced", IF_IA64 },
+        { "any", IF_PLEVEL },
+        { "default", IF_PLEVEL },
+        { "all", IF_PLEVEL },
+        { NULL, IF_PLEVEL }     /* Error and final default entry */
+    };

-    iflag_clear_all(&r);
-
-    if (!strcmp(value, "8086"))
-        iflag_set(&r, IF_8086);
-    else if (!strcmp(value, "186"))
-        iflag_set(&r, IF_186);
-    else if (!strcmp(value, "286"))
-        iflag_set(&r, IF_286);
-    else if (!strcmp(value, "386"))
-        iflag_set(&r, IF_386);
-    else if (!strcmp(value, "486"))
-        iflag_set(&r, IF_486);
-    else if (!strcmp(value, "586") ||
-             !nasm_stricmp(value, "pentium"))
-        iflag_set(&r, IF_PENT);
-    else if (!strcmp(value, "686")              ||
-             !nasm_stricmp(value, "ppro")       ||
-             !nasm_stricmp(value, "pentiumpro") ||
-             !nasm_stricmp(value, "p2"))
-        iflag_set(&r, IF_P6);
-    else if (!nasm_stricmp(value, "p3") ||
-             !nasm_stricmp(value, "katmai"))
-        iflag_set(&r, IF_KATMAI);
-    else if (!nasm_stricmp(value, "p4") ||   /* is this right? -- jrc */
-             !nasm_stricmp(value, "willamette"))
-        iflag_set(&r, IF_WILLAMETTE);
-    else if (!nasm_stricmp(value, "prescott"))
-        iflag_set(&r, IF_PRESCOTT);
-    else if (!nasm_stricmp(value, "x64") ||
-             !nasm_stricmp(value, "x86-64"))
-        iflag_set(&r, IF_X86_64);
-    else if (!nasm_stricmp(value, "ia64")   ||
-             !nasm_stricmp(value, "ia-64")  ||
-             !nasm_stricmp(value, "itanium")||
-             !nasm_stricmp(value, "itanic") ||
-             !nasm_stricmp(value, "merced"))
-        iflag_set(&r, IF_IA64);
-    else {
-        iflag_set(&r, IF_PLEVEL);
-        nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                   "unknown 'cpu' type");
+    for (cpu = cpunames; cpu->name; cpu++) {
+        if (!strcmp(value, cpu->name))
+            break;
    }
+
+    if (!cpu->name) {
+        nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
+                   "unknown 'cpu' type '%s'", value);
+    }
+
+    iflag_set_cpu(&r, cpu->level);
    return r;
 }

-static int get_bits(char *value)
+static int get_bits(const char *value)
 {
-    int i;
+    int i = atoi(value);

-    if ((i = atoi(value)) == 16)
-        return i;               /* set for a 16-bit segment */
-    else if (i == 32) {
-        if (iflag_ffs(&cpu) < IF_386) {
+    switch (i) {
+    case 16:
+        break;                  /* Always safe */
+    case 32:
+        if (!iflag_cpu_level_ok(&cpu, IF_386)) {
            nasm_error(ERR_NONFATAL,
-                         "cannot specify 32-bit segment on processor below a 386");
+                       "cannot specify 32-bit segment on processor below a 386");
            i = 16;
        }
-    } else if (i == 64) {
-        if (iflag_ffs(&cpu) < IF_X86_64) {
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) {
            nasm_error(ERR_NONFATAL,
-                         "cannot specify 64-bit segment on processor below an x86-64");
+                       "cannot specify 64-bit segment on processor below an x86-64");
            i = 16;
        }
-    } else {
+        break;
+    default:
        nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                     "`%s' is not a valid segment size; must be 16, 32 or 64",
-                     value);
+                   "`%s' is not a valid segment size; must be 16, 32 or 64",
+                   value);
        i = 16;
+        break;
    }
    return i;
 }
--- a/asm/nasm.c
+++ b/asm/nasm.c
@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@ -86,6 +86,11 @@ static void usage(void);
 static bool using_debug_info, opt_verbose_info;
 static const char *debug_format;

+#ifndef ABORT_ON_PANIC
+# define ABORT_ON_PANIC 0
+#endif
+static bool abort_on_panic = ABORT_ON_PANIC;
+
 bool tasm_compatible_mode = false;
 int pass0, passn;
 static int pass1, pass2;	/* XXX: Get rid of these, they are redundant */
@ -323,8 +328,8 @@ int main(int argc, char **argv)

    timestamp();

-    iflag_set(&cpu, IF_PLEVEL);
-    iflag_set(&cmd_cpu, IF_PLEVEL);
+    iflag_set_default_cpu(&cpu);
+    iflag_set_default_cpu(&cmd_cpu);

    pass0 = 0;
    want_usage = terminate_after_phase = false;
@ -690,19 +695,25 @@ static char *quote_for_wmake(const char *str)
    return os;
 }

-struct textargs {
-    const char *label;
-    int value;
-};
-
 enum text_options {
+    OPT_BOGUS,
+    OPT_VERSION,
+    OPT_ABORT_ON_PANIC,
    OPT_PREFIX,
    OPT_POSTFIX
 };
+struct textargs {
+    const char *label;
+    enum text_options opt;
+    bool need_arg;
+};
 static const struct textargs textopts[] = {
-    {"prefix", OPT_PREFIX},
-    {"postfix", OPT_POSTFIX},
-    {NULL, 0}
+    {"v", OPT_VERSION, false},
+    {"version", OPT_VERSION, false},
+    {"abort-on-panic", OPT_ABORT_ON_PANIC, false},
+    {"prefix", OPT_PREFIX, true},
+    {"postfix", OPT_POSTFIX, true},
+    {NULL, OPT_BOGUS, false}
 };

 static void show_version(void)
@ -1022,61 +1033,49 @@ static bool process_arg(char *p, char *q, int pass)

        case '-':
            {
-                int s;
+                const struct textargs *tx;

                if (p[2] == 0) {        /* -- => stop processing options */
-                    stopoptions = 1;
+                    stopoptions = true;
                    break;
                }

-                if (!nasm_stricmp(p, "--v"))
-                    show_version();
-
-                if (!nasm_stricmp(p, "--version"))
-                    show_version();
-
-                for (s = 0; textopts[s].label; s++) {
-                    if (!nasm_stricmp(p + 2, textopts[s].label)) {
+                for (tx = textopts; tx->label; tx++) {
+                    if (!nasm_stricmp(p + 2, tx->label))
                        break;
-                    }
                }

-                switch (s) {
-                case OPT_PREFIX:
-                case OPT_POSTFIX:
-                    {
-                        if (!q) {
-                            nasm_error(ERR_NONFATAL | ERR_NOFILE |
-                                         ERR_USAGE,
-                                         "option `--%s' requires an argument",
-                                         p + 2);
-                            break;
-                        } else {
-                            advance = 1, param = q;
-                        }
-
-                        switch (s) {
-                        case OPT_PREFIX:
-                            if (pass == 2)
-                                strlcpy(lprefix, param, PREFIX_MAX);
-                            break;
-                        case OPT_POSTFIX:
-                            if (pass == 2)
-                                strlcpy(lpostfix, param, POSTFIX_MAX);
-                            break;
-                        default:
-                            panic();
-                            break;
-                        }
-                        break;
-                    }
-
-                default:
-                    {
+                if (tx->need_arg) {
+                    if (!q) {
                        nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
-                                     "unrecognised option `--%s'", p + 2);
+                                   "option `--%s' requires an argument",
+                                   p + 2);
                        break;
                    }
+                    advance = true;
+                }
+
+                switch (tx->opt) {
+                case OPT_VERSION:
+                    show_version();
+                    break;
+                case OPT_ABORT_ON_PANIC:
+                    abort_on_panic = true;
+                    break;
+                case OPT_PREFIX:
+                    if (pass == 2)
+                        strlcpy(lprefix, q, PREFIX_MAX);
+                    break;
+                case OPT_POSTFIX:
+                    if (pass == 2)
+                        strlcpy(lpostfix, q, POSTFIX_MAX);
+                    break;
+                case OPT_BOGUS:
+                    nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+                               "unrecognized option `--%s'", p + 2);
+                    break;
+                default:
+                    panic();
                }
                break;
            }
@ -1289,8 +1288,21 @@ static void assemble_file(const char *fname, StrList **depend_ptr)
    uint64_t prev_offset_changed;
    unsigned int stall_count = 0; /* Make sure we make forward progress... */

-    if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
-	nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+    switch (cmd_sb) {
+    case 16:
+        break;
+    case 32:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_386))
+            nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64))
+            nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu");
+        break;
+    default:
+        panic();
+        break;
+    }

    pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
    for (passn = 1; pass0 <= 2; passn++) {
@ -1812,9 +1824,10 @@ static void nasm_verror_common(int severity, const char *fmt, va_list args)
        break;                  /* placate silly compilers */
    case ERR_PANIC:
        fflush(NULL);
-#ifdef ABORT_ON_PANIC
-        abort();                /* halt, catch fire, dump core/stop debugger */
-#endif
+
+        if (abort_on_panic)
+            abort();		/* halt, catch fire, dump core/stop debugger */
+
        if (ofile) {
            fclose(ofile);
            remove(outname);
--- a/configure.ac
+++ b/configure.ac
@ -287,6 +287,13 @@ PA_ARG_ENABLED([werror],
  PA_ADD_CFLAGS([-Werror=vla])]
 )

+dnl
+dnl On some versions of gcc, -Werror=missing-prototypes causes problems
+dnl with C99-style external inlines.  Test this *after* adding the -Werror
+dnl options.
+dnl
+PA_CHECK_BAD_STDC_INLINE
+
 dnl
 dnl support ccache
 dnl
--- a/doc/changes.src
+++ b/doc/changes.src
@ -7,10 +7,18 @@
 The NASM 2 series supports x86-64, and is the production version of NASM
 since 2007.

+\S{cl-2.13.04} Version 2.13.04
+
+\b Added \c{-W}, \c{-D}, and \c{-Q} suffix aliases for \c{RET}
+   instructions so the operand sizes of these instructions can be
+   encoded without using \c{o16}, \c{o32} or \c{o64}.
+
 \S{cl-2.13.03} Version 2.13.03

 \b Added AVX and AVX512 \c{VAES*} and \c{VPCLMULQDQ} instructions.

+\b Fixed missing dwarf record in x32 ELF output format.
+
 \S{cl-2.13.02} Version 2.13.02

 \b Fix false positive in testing of numeric overflows.
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@ -1,6 +1,6 @@
 \# --------------------------------------------------------------------------
 \#
-\#   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+\#   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 \#   See the file AUTHORS included with the NASM distribution for
 \#   the specific copyright holders.
 \#
@ -7704,10 +7704,15 @@ platforms pass arguments in registers rather than on the stack.
 Furthermore, 64-bit platforms use SSE2 by default for floating point.
 Please see the ABI documentation for your platform.

-64-bit platforms differ in the sizes of the fundamental datatypes, not
-just from 32-bit platforms but from each other.  If a specific size
-data type is desired, it is probably best to use the types defined in
-the Standard C header \c{<inttypes.h>}.
+64-bit platforms differ in the sizes of the C/C++ fundamental
+datatypes, not just from 32-bit platforms but from each other.  If a
+specific size data type is desired, it is probably best to use the
+types defined in the standard C header \c{<inttypes.h>}.
+
+All known 64-bit platforms except some embedded platforms require that
+the stack is 16-byte aligned at the entry to a function.  In order to
+enforce that, the stack pointer (\c{RSP}) needs to be aligned on an
+\c{odd} multiple of 8 bytes before the \c{CALL} instruction.

 In 64-bit mode, the default instruction size is still 32 bits.  When
 loading a value into a 32-bit register (but not an 8- or 16-bit
@ -7755,12 +7760,30 @@ immediate as \c{DWORD}:

 The length of these instructions are 10, 5 and 7 bytes, respectively.

+If optimization is enabled and NASM can determine at assembly time
+that a shorter instruction will suffice, the shorter instruction will
+be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is
+specified (see \k{strict}):
+
+\c      mov rax,1		; Assembles as "mov eax,1" (5 bytes)
+\c      mov rax,strict qword 1  ; Full 10-byte instruction
+\c	mov rax,strict dword 1	; 7-byte instruction
+\c      mov rax,symbol          ; 10 bytes, not known at assembly time
+\c      lea rax,[rel symbol]    ; 7 bytes, usually preferred by the ABI
+
+Note that \c{lea rax,[rel symbol]} is position-independent, whereas
+\c{mov rax,symbol} is not.  Most ABIs prefer or even require
+position-independent code in 64-bit mode.  However, the \c{MOV}
+instruction is able to reference a symbol anywhere in the 64-bit
+address space, whereas \c{LEA} is only able to access a symbol within
+within 2 GB of the instruction itself (see below.)
+
 The only instructions which take a full \I{64-bit displacement}64-bit
 \e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX},
 \c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address.
 Since this is a relatively rarely used instruction (64-bit code generally uses
 relative addressing), the programmer has to explicitly declare the
-displacement size as \c{QWORD}:
+displacement size as \c{ABS QWORD}:

 \c      default abs
 \c
@ -7797,9 +7820,11 @@ calls, and thus are available for use by the function without saving.
 Integer return values are passed in \c{RAX} and \c{RDX}, in that order.

 Floating point is done using SSE registers, except for \c{long
-double}.  Floating-point arguments are passed in \c{XMM0} to \c{XMM7};
-return is \c{XMM0} and \c{XMM1}.  \c{long double} are passed on the
-stack, and returned in \c{ST0} and \c{ST1}.
+double}, which is 80 bits (\c{TWORD}) on most platforms (Android is
+one exception; there \c{long double} is 64 bits and treated the same
+as \c{double}.)  Floating-point arguments are passed in \c{XMM0} to
+\c{XMM7}; return is \c{XMM0} and \c{XMM1}.  \c{long double} are passed
+on the stack, and returned in \c{ST0} and \c{ST1}.

 All SSE and x87 registers are destroyed by function calls.

--- a/include/compiler.h
+++ b/include/compiler.h
@ -214,15 +214,20 @@ size_t strnlen(const char *s, size_t maxlen);
 /*
 * Hack to support external-linkage inline functions
 */
-#ifdef __GNUC__
-# ifdef __GNUC_STDC_INLINE__
-#  define HAVE_STDC_INLINE
-# else
-#  define HAVE_GNU_INLINE
-# endif
-#elif defined(__STDC_VERSION__)
-# if __STDC_VERSION__ >= 199901L
-#  define HAVE_STDC_INLINE
+#ifndef HAVE_STDC_INLINE
+# ifdef __GNUC__
+#  ifdef __GNUC_STDC_INLINE__
+#   define HAVE_STDC_INLINE
+#  else
+#   define HAVE_GNU_INLINE
+#  endif
+# elif defined(__GNUC_GNU_INLINE__)
+/* Some other compiler implementing only GNU inline semantics? */
+#   define HAVE_GNU_INLINE
+# elif defined(__STDC_VERSION__)
+#  if __STDC_VERSION__ >= 199901L
+#   define HAVE_STDC_INLINE
+#  endif
 # endif
 #endif

@ -230,6 +235,7 @@ size_t strnlen(const char *s, size_t maxlen);
 # define extern_inline inline
 #elif defined(HAVE_GNU_INLINE)
 # define extern_inline extern inline
+# define inline_prototypes
 #else
 # define inline_prototypes
 #endif
--- a/include/iflag.h
+++ b/include/iflag.h
@ -1,30 +1,28 @@
 #ifndef NASM_IFLAG_H
 #define NASM_IFLAG_H

-#include <string.h>
-
 #include "compiler.h"
 #include "ilog2.h"
+
+#include <string.h>
+
 #include "iflaggen.h"

 #define IF_GENBIT(bit)          (UINT32_C(1) << (bit))

-static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit)
+static inline bool iflag_test(const iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    return f->field[index] & (UINT32_C(1) << (bit - (index * 32)));
+    return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31));
 }

 static inline void iflag_set(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] |= (UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] |= IF_GENBIT(bit & 31);
 }

 static inline void iflag_clear(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] &= ~IF_GENBIT(bit & 31);
 }

 static inline void iflag_clear_all(iflag_t *f)
@ -34,39 +32,21 @@ static inline void iflag_clear_all(iflag_t *f)

 static inline void iflag_set_all(iflag_t *f)
 {
-     memset(f, 0xff, sizeof(*f));
+     memset(f, ~0, sizeof(*f));
 }

+#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++)
+
 static inline int iflag_cmp(const iflag_t *a, const iflag_t *b)
 {
    int i;

-    for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) {
+    /* This is intentionally a reverse loop! */
+    for (i = IF_FIELD_COUNT-1; i >= 0; i--) {
        if (a->field[i] == b->field[i])
            continue;

-        return (a->field[i] > b->field[i]) ? 1 : -1;
-    }
-
-    return 0;
-}
-
-static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
-{
-    if (a->field[3] < b->field[3])
-        return -1;
-    else if (a->field[3] > b->field[3])
-        return 1;
-    return 0;
-}
-
-static inline unsigned int iflag_ffs(const iflag_t *a)
-{
-    unsigned int i;
-
-    for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) {
-        if (a->field[i])
-            return ilog2_32(a->field[i]) + (i * 32);
+        return (int)(a->field[i] - b->field[i]);
    }

    return 0;
@ -78,7 +58,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
        unsigned int i;                                                 \
        iflag_t res;                                                    \
                                                                        \
-        for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++)    \
+        iflag_for_each_field(i)                                         \
            res.field[i] = a->field[i] op b->field[i];                  \
                                                                        \
        return res;                                                     \
@ -86,13 +66,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a)

 IF_GEN_HELPER(xor, ^)

-
-/* Use this helper to test instruction template flags */
-#define itemp_has(itemp, bit)   iflag_test(&insns_flags[(itemp)->iflag_idx], bit)
-
-
-/* Maximum processor level at moment */
-#define IF_PLEVEL               IF_IA64
 /* Some helpers which are to work with predefined masks */
 #define IF_SMASK        \
    (IF_GENBIT(IF_SB)  |\
@ -118,23 +91,67 @@ IF_GEN_HELPER(xor, ^)
 #define itemp_arg(itemp)        _itemp_arg((itemp)->iflag_idx)
 #define itemp_armask(itemp)     _itemp_armask((itemp)->iflag_idx)

+/*
+ * IF_8086 is the first CPU level flag and IF_PLEVEL the last
+ */
+#if IF_8086 & 31
+#error "IF_8086 must be on a uint32_t boundary"
+#endif
+#define IF_PLEVEL               IF_IA64
+#define IF_CPU_FIELD	       (IF_8086 >> 5)
+#define IF_CPU_LEVEL_MASK      ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1)
+
+/*
+ * IF_PRIV is the firstr instruction filtering flag
+ */
+#if IF_PRIV & 31
+#error "IF_PRIV must be on a uint32_t boundary"
+#endif
+#define IF_FEATURE_FIELD	(IF_PRIV >> 5)
+
+static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
+{
+    return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]);
+}
+
+static inline uint32_t _iflag_cpu_level(const iflag_t *a)
+{
+    return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK;
+}
+
 static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b)
 {
-    iflag_t v1 = *a;
-    iflag_t v2 = *b;
+    uint32_t aa = _iflag_cpu_level(a);
+    uint32_t bb = _iflag_cpu_level(b);

-    iflag_clear(&v1, IF_CYRIX);
-    iflag_clear(&v1, IF_AMD);
+    return (int)(aa - bb);
+}

-    iflag_clear(&v2, IF_CYRIX);
-    iflag_clear(&v2, IF_AMD);
+/* Returns true if the CPU level is at least a certain value */
+static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit)
+{
+    return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31);
+}

-    if (v1.field[3] < v2.field[3])
-        return -1;
-    else if (v1.field[3] > v2.field[3])
-        return 1;
+static inline void iflag_set_all_features(iflag_t *a)
+{
+    size_t i;

-    return 0;
+    for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++)
+        a->field[i] = ~UINT32_C(0);
+}
+
+static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu)
+{
+    a->field[0] = 0;     /* Not applicable to the CPU type */
+    iflag_set_all_features(a);    /* All feature masking bits set for now */
+    a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK;
+    iflag_set(a, cpu);
+}
+
+static inline void iflag_set_default_cpu(iflag_t *a)
+{
+    iflag_set_cpu(a, IF_PLEVEL);
 }

 static inline iflag_t _iflag_pfmask(const iflag_t *a)
--- a/include/insns.h
+++ b/include/insns.h
@ -23,6 +23,12 @@ struct itemplate {
    uint32_t        iflag_idx;          /* some flags referenced by index */
 };

+/* Use this helper to test instruction template flags */
+static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit)
+{
+    return iflag_test(&insns_flags[itemp->iflag_idx], bit);
+}
+
 /* Disassembler table structure */

 /*
--- a/include/nasmlib.h
+++ b/include/nasmlib.h
@ -189,11 +189,9 @@ int64_t readnum(char *str, bool *error);
 int64_t readstrnum(char *str, int length, bool *warn);

 /*
- * seg_init: Initialise the segment-number allocator.
 * seg_alloc: allocate a hitherto unused segment number.
 */
-void pure_func seg_init(void);
-int32_t pure_func seg_alloc(void);
+int32_t seg_alloc(void);

 /*
 * Add/replace or remove an extension to the end of a filename
--- a/output/outelf.c
+++ b/output/outelf.c
@ -3273,6 +3273,9 @@ static void dwarf_generate(void)
    if (is_elf32()) {
        WRITELONG(pbuf,0);  /* null  beginning offset */
        WRITELONG(pbuf,0);  /* null  ending offset */
+    } else if (is_elfx32()) {
+        WRITELONG(pbuf,0);  /* null  beginning offset */
+        WRITELONG(pbuf,0);  /* null  ending offset */
    } else {
        nasm_assert(is_elf64());
        WRITEDLONG(pbuf,0);  /* null  beginning offset */
--- a/test/ret.asm
+++ b/test/ret.asm
@ -0,0 +1,56 @@
+	;; All the flavors of RET
+%ifndef ERROR
+ %define ERROR 0
+%endif
+
+
+	bits 16
+
+	ret
+	retn
+	retf
+	retw
+	retnw
+	retfw
+	retd
+	retnd
+	retfd
+%if ERROR
+	retq
+	retnq
+	retfq
+%endif
+
+	bits 32
+
+	ret
+	retn
+	retf
+	retw
+	retnw
+	retfw
+	retd
+	retnd
+	retfd
+%if ERROR
+	retq
+	retnq
+	retfq
+%endif
+
+	bits 64
+
+	ret
+	retn
+	retf		; Probably should have been RETFQ, but: legacy...
+	retw
+	retnw
+	retfw
+%if ERROR
+	retd
+	retnd
+%endif
+	retfd
+	retq
+	retnq
+	retfq
--- a/test/vaesenc.asm
+++ b/test/vaesenc.asm
@ -0,0 +1,22 @@
+;; BR 3392454, 3392460
+
+	bits 64
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
+
+	bits 32
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
+
+	bits 16
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
--- a/x86/insns-iflags.ph
+++ b/x86/insns-iflags.ph
@ -64,109 +64,117 @@
 # for a set of flags, so be careful moving bits (and
 # don't forget to update C code generation then).
 #
+sub dword_align($) {
+    my($n) = @_;
+
+    $$n = ($$n + 31) & ~31;
+    return $n;
+}
+
+my $f = 0;
 my %insns_flag_bit = (
    #
    # dword bound, index 0 - specific flags
    #
-    "SM"                => [  0, "Size match"],
-    "SM2"               => [  1, "Size match first two operands"],
-    "SB"                => [  2, "Unsized operands can't be non-byte"],
-    "SW"                => [  3, "Unsized operands can't be non-word"],
-    "SD"                => [  4, "Unsized operands can't be non-dword"],
-    "SQ"                => [  5, "Unsized operands can't be non-qword"],
-    "SO"                => [  6, "Unsized operands can't be non-oword"],
-    "SY"                => [  7, "Unsized operands can't be non-yword"],
-    "SZ"                => [  8, "Unsized operands can't be non-zword"],
-    "SIZE"              => [  9, "Unsized operands must match the bitsize"],
-    "SX"                => [ 10, "Unsized operands not allowed"],
-    "AR0"               => [ 11, "SB, SW, SD applies to argument 0"],
-    "AR1"               => [ 12, "SB, SW, SD applies to argument 1"],
-    "AR2"               => [ 13, "SB, SW, SD applies to argument 2"],
-    "AR3"               => [ 14, "SB, SW, SD applies to argument 3"],
-    "AR4"               => [ 15, "SB, SW, SD applies to argument 4"],
-    "OPT"               => [ 16, "Optimizing assembly only"],
+    "SM"                => [$f++, "Size match"],
+    "SM2"               => [$f++, "Size match first two operands"],
+    "SB"                => [$f++, "Unsized operands can't be non-byte"],
+    "SW"                => [$f++, "Unsized operands can't be non-word"],
+    "SD"                => [$f++, "Unsized operands can't be non-dword"],
+    "SQ"                => [$f++, "Unsized operands can't be non-qword"],
+    "SO"                => [$f++, "Unsized operands can't be non-oword"],
+    "SY"                => [$f++, "Unsized operands can't be non-yword"],
+    "SZ"                => [$f++, "Unsized operands can't be non-zword"],
+    "SIZE"              => [$f++, "Unsized operands must match the bitsize"],
+    "SX"                => [$f++, "Unsized operands not allowed"],
+    "AR0"               => [$f++, "SB, SW, SD applies to argument 0"],
+    "AR1"               => [$f++, "SB, SW, SD applies to argument 1"],
+    "AR2"               => [$f++, "SB, SW, SD applies to argument 2"],
+    "AR3"               => [$f++, "SB, SW, SD applies to argument 3"],
+    "AR4"               => [$f++, "SB, SW, SD applies to argument 4"],
+    "OPT"               => [$f++, "Optimizing assembly only"],

    #
-    # dword bound, index 1 - instruction filtering flags
+    # dword bound - instruction filtering flags
    #
-    "PRIV"              => [ 32, "Privileged instruction"],
-    "SMM"               => [ 33, "Only valid in SMM"],
-    "PROT"              => [ 34, "Protected mode only"],
-    "LOCK"              => [ 35, "Lockable if operand 0 is memory"],
-    "NOLONG"            => [ 36, "Not available in long mode"],
-    "LONG"              => [ 37, "Long mode"],
-    "NOHLE"             => [ 38, "HLE prefixes forbidden"],
-    "MIB"               => [ 39, "disassemble with split EA"],
-    "BND"               => [ 40, "BND (0xF2) prefix available"],
-    "UNDOC"             => [ 41, "Undocumented"],
-    "HLE"               => [ 42, "HLE prefixed"],
-    "FPU"               => [ 43, "FPU"],
-    "MMX"               => [ 44, "MMX"],
-    "3DNOW"             => [ 45, "3DNow!"],
-    "SSE"               => [ 46, "SSE (KNI, MMX2)"],
-    "SSE2"              => [ 47, "SSE2"],
-    "SSE3"              => [ 48, "SSE3 (PNI)"],
-    "VMX"               => [ 49, "VMX"],
-    "SSSE3"             => [ 50, "SSSE3"],
-    "SSE4A"             => [ 51, "AMD SSE4a"],
-    "SSE41"             => [ 52, "SSE4.1"],
-    "SSE42"             => [ 53, "SSE4.2"],
-    "SSE5"              => [ 54, "SSE5"],
-    "AVX"               => [ 55, "AVX (128b)"],
-    "AVX2"              => [ 56, "AVX2 (256b)"],
-    "FMA"               => [ 57, ""],
-    "BMI1"              => [ 58, ""],
-    "BMI2"              => [ 59, ""],
-    "TBM"               => [ 60, ""],
-    "RTM"               => [ 61, ""],
-    "INVPCID"           => [ 62, ""],
+    "PRIV"              => [${dword_align(\$f)}++, "Privileged instruction"],
+    "SMM"               => [$f++, "Only valid in SMM"],
+    "PROT"              => [$f++, "Protected mode only"],
+    "LOCK"              => [$f++, "Lockable if operand 0 is memory"],
+    "NOLONG"            => [$f++, "Not available in long mode"],
+    "LONG"              => [$f++, "Long mode"],
+    "NOHLE"             => [$f++, "HLE prefixes forbidden"],
+    "MIB"               => [$f++, "disassemble with split EA"],
+    "BND"               => [$f++, "BND (0xF2) prefix available"],
+    "UNDOC"             => [$f++, "Undocumented"],
+    "HLE"               => [$f++, "HLE prefixed"],
+    "FPU"               => [$f++, "FPU"],
+    "MMX"               => [$f++, "MMX"],
+    "3DNOW"             => [$f++, "3DNow!"],
+    "SSE"               => [$f++, "SSE (KNI, MMX2)"],
+    "SSE2"              => [$f++, "SSE2"],
+    "SSE3"              => [$f++, "SSE3 (PNI)"],
+    "VMX"               => [$f++, "VMX"],
+    "SSSE3"             => [$f++, "SSSE3"],
+    "SSE4A"             => [$f++, "AMD SSE4a"],
+    "SSE41"             => [$f++, "SSE4.1"],
+    "SSE42"             => [$f++, "SSE4.2"],
+    "SSE5"              => [$f++, "SSE5"],
+    "AVX"               => [$f++, "AVX  (256-bit floating point)"],
+    "AVX2"              => [$f++, "AVX2 (256-bit integer)"],
+    "FMA"               => [$f++, ""],
+    "BMI1"              => [$f++, ""],
+    "BMI2"              => [$f++, ""],
+    "TBM"               => [$f++, ""],
+    "RTM"               => [$f++, ""],
+    "INVPCID"           => [$f++, ""],
+    "AVX512"            => [$f++, "AVX-512F (512-bit base architecture)"],
+    "AVX512CD"          => [$f++, "AVX-512 Conflict Detection"],
+    "AVX512ER"          => [$f++, "AVX-512 Exponential and Reciprocal"],
+    "AVX512PF"          => [$f++, "AVX-512 Prefetch"],
+    "MPX"               => [$f++, "MPX"],
+    "SHA"               => [$f++, "SHA"],
+    "PREFETCHWT1"       => [$f++, "PREFETCHWT1"],
+    "AVX512VL"          => [$f++, "AVX-512 Vector Length Orthogonality"],
+    "AVX512DQ"          => [$f++, "AVX-512 Dword and Qword"],
+    "AVX512BW"          => [$f++, "AVX-512 Byte and Word"],
+    "AVX512IFMA"        => [$f++, "AVX-512 IFMA instructions"],
+    "AVX512VBMI"        => [$f++, "AVX-512 VBMI instructions"],
+    "AES"               => [$f++, "AES instructions"],
+    "VAES"              => [$f++, "AES AVX instructions"],
+    "VPCLMULQDQ"        => [$f++, "Carry-Less Multiplication extention"],
+
+    # Put these last
+    "OBSOLETE"          => [$f++, "Instruction removed from architecture"],
+    "VEX"               => [$f++, "VEX or XOP encoded instruction"],
+    "EVEX"              => [$f++, "EVEX encoded instruction"],

    #
-    # dword bound, index 2 - instruction filtering flags
-    #
-    "AVX512"            => [ 64, "AVX-512F (512b)"],
-    "AVX512CD"          => [ 65, "AVX-512 Conflict Detection"],
-    "AVX512ER"          => [ 66, "AVX-512 Exponential and Reciprocal"],
-    "AVX512PF"          => [ 67, "AVX-512 Prefetch"],
-    "MPX"               => [ 68	,"MPX"],
-    "SHA"               => [ 69	,"SHA"],
-    "PREFETCHWT1"       => [ 70	,"PREFETCHWT1"],
-    "AVX512VL"          => [ 71, "AVX-512 Vector Length Orthogonality"],
-    "AVX512DQ"          => [ 72, "AVX-512 Dword and Qword"],
-    "AVX512BW"          => [ 73, "AVX-512 Byte and Word"],
-    "AVX512IFMA"        => [ 74, "AVX-512 IFMA instructions"],
-    "AVX512VBMI"        => [ 75, "AVX-512 VBMI instructions"],
-    "OBSOLETE"          => [ 93, "Instruction removed from architecture"],
-    "VEX"               => [ 94, "VEX or XOP encoded instruction"],
-    "EVEX"              => [ 95, "EVEX encoded instruction"],
-    "AES"               => [ 96, "AES instructions"],
-    "VAES"              => [ 97, "AES AVX instructions"],
-    "VPCLMULQDQ"        => [ 98, "Carry-Less Multiplication extention"],
-
-    #
-    # dword bound, cpu type flags
+    # dword bound - cpu type flags
    #
    # The CYRIX and AMD flags should have the highest bit values; the
    # disassembler selection algorithm depends on it.
    #
-    "8086"              => [128, "8086"],
-    "186"               => [129, "186+"],
-    "286"               => [130, "286+"],
-    "386"               => [131, "386+"],
-    "486"               => [132, "486+"],
-    "PENT"              => [133, "Pentium"],
-    "P6"                => [134, "P6"],
-    "KATMAI"            => [135, "Katmai"],
-    "WILLAMETTE"        => [136, "Willamette"],
-    "PRESCOTT"          => [137, "Prescott"],
-    "X86_64"            => [138, "x86-64 (long or legacy mode)"],
-    "NEHALEM"           => [139, "Nehalem"],
-    "WESTMERE"          => [140, "Westmere"],
-    "SANDYBRIDGE"       => [141, "Sandy Bridge"],
-    "FUTURE"            => [142, "Future processor (not yet disclosed)"],
-    "IA64"              => [143, "IA64 (in x86 mode)"],
-    "CYRIX"             => [144, "Cyrix-specific"],
-    "AMD"               => [145, "AMD-specific"],
+    "8086"              => [${dword_align(\$f)}++, "8086"],
+    "186"               => [$f++, "186+"],
+    "286"               => [$f++, "286+"],
+    "386"               => [$f++, "386+"],
+    "486"               => [$f++, "486+"],
+    "PENT"              => [$f++, "Pentium"],
+    "P6"                => [$f++, "P6"],
+    "KATMAI"            => [$f++, "Katmai"],
+    "WILLAMETTE"        => [$f++, "Willamette"],
+    "PRESCOTT"          => [$f++, "Prescott"],
+    "X86_64"            => [$f++, "x86-64 (long or legacy mode)"],
+    "NEHALEM"           => [$f++, "Nehalem"],
+    "WESTMERE"          => [$f++, "Westmere"],
+    "SANDYBRIDGE"       => [$f++, "Sandy Bridge"],
+    "FUTURE"            => [$f++, "Future processor (not yet disclosed)"],
+    "IA64"              => [$f++, "IA64 (in x86 mode)"],
+
+    # Put these last
+    "CYRIX"             => [$f++, "Cyrix-specific"],
+    "AMD"               => [$f++, "AMD-specific"],
 );

 my %insns_flag_hash = ();
@ -176,9 +184,9 @@ my $iflag_words;
 sub get_flag_words() {
    my $max = -1;

-    foreach my $key (keys(%insns_flag_bit)) {
-	if (${$insns_flag_bit{$key}}[0] > $max) {
-	    $max = ${$insns_flag_bit{$key}}[0];
+    foreach my $vp (values(%insns_flag_bit)) {
+	if ($vp->[0] > $max) {
+	    $max = $vp->[0];
 	}
    }

@ -218,14 +226,28 @@ sub write_iflaggen_h() {
    print N "#ifndef NASM_IFLAGGEN_H\n";
    print N "#define NASM_IFLAGGEN_H 1\n\n";

-    foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) {
+    my @flagnames = keys(%insns_flag_bit);
+    @flagnames = sort {
+	$insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0]
+    } @flagnames;
+    my $next = 0;
+    foreach my $key (@flagnames) {
+	my $v = $insns_flag_bit{$key};
+	if ($v->[0] > $next) {
+	    printf N "%-31s /* %-64s */\n", '',
+		($next != $v->[0]-1) ?
+		sprintf("%d...%d unused", $next, $v->[0]-1) :
+		sprintf("%d unused", $next);
+	}
        print N sprintf("#define IF_%-16s %3d /* %-64s */\n",
-            $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]);
+			$key, $v->[0], $v->[1]);
+	$next = $v->[0] + 1;
    }

    print N "\n";
+    printf N "#define IF_FIELD_COUNT %d\n", $iflag_words;
    print N "typedef struct {\n";
-    printf N "    uint32_t field[%d];\n", $iflag_words;
+    print N "    uint32_t field[IF_FIELD_COUNT];\n";
    print N "} iflag_t;\n";

    print N "\n";
--- a/x86/insns.dat
+++ b/x86/insns.dat
@ -1,6 +1,6 @@
 ;; --------------------------------------------------------------------------
 ;;
-;;   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+;;   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 ;;   See the file AUTHORS included with the NASM distribution for
 ;;   the specific copyright holders.
 ;;
@ -1129,6 +1129,24 @@ RETF		void				[	cb]					8086
 RETF		imm				[i:	ca iw]					8086,SW
 RETN		void				[	c3]					8086,BND
 RETN		imm				[i:	c2 iw]					8086,SW,BND
+RETW		void				[	o16 c3]					8086,BND
+RETW		imm				[i:	c2 iw]					8086,SW,BND
+RETFW		void				[	o16 cb]					8086
+RETFW		imm				[i:	o16 ca iw]				8086,SW
+RETNW		void				[	o16 c3]					8086,BND
+RETNW		imm				[i:	o16 c2 iw]				8086,SW,BND
+RETD		void				[	o32 c3]					8086,BND,NOLONG
+RETD		imm				[i:	o32 c2 iw]				8086,SW,BND,NOLONG
+RETFD		void				[	o32 cb]					8086
+RETFD		imm				[i:	o32 ca iw]				8086,SW
+RETND		void				[	o32 c3]					8086,BND,NOLONG
+RETND		imm				[i:	o32 c2 iw]				8086,SW,BND,NOLONG
+RETQ		void				[	o64nw c3]				X64,BND
+RETQ		imm				[i:	o64nw c2 iw]				X64,SW,BND
+RETFQ		void				[	o64 cb]					X64
+RETFQ		imm				[i:	o64 ca iw]				X64,SW
+RETNQ		void				[	o64nw c3]				X64,BND
+RETNQ		imm				[i:	o64nw c2 iw]				X64,SW,BND

 ROL		rm8,unity			[m-:	d0 /0]					8086
 ROL		rm8,reg_cl			[m-:	d2 /0]					8086