nasm/parser.c

/* parser.c   source line parser for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * initial version 27/iii/95 by Simon Tatham
 */

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>

#include "nasm.h"
#include "nasmlib.h"
#include "parser.h"
#include "float.h"

#include "names.c"


static long reg_flags[] = {	       /* sizes and special flags */
    0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
    REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
    REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
    REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
    REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
    MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
    REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
    FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
    REG_TREG
};

enum {				       /* special tokens */
    S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
    S_TWORD, S_WORD
};

static char *special_names[] = {       /* and the actual text */
    "byte", "dword", "far", "long", "near", "qword", "short", "to",
    "tword", "word"
};

static char *prefix_names[] = {
    "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
    "repnz", "repz", "times"
};

/*
 * Evaluator datatype. Expressions, within the evaluator, are
 * stored as an array of these beasts, terminated by a record with
 * type==0. Mostly, it's a vector type: each type denotes some kind
 * of a component, and the value denotes the multiple of that
 * component present in the expression. The exception is the WRT
 * type, whose `value' field denotes the segment to which the
 * expression is relative. These segments will be segment-base
 * types, i.e. either odd segment values or SEG_ABS types. So it is
 * still valid to assume that anything with a `value' field of zero
 * is insignificant.
 */
typedef struct {
    long type;			       /* a register, or EXPR_xxx */
    long value;			       /* must be >= 32 bits */
} expr;

static void eval_reset(void);
static expr *evaluate(int);

/*
 * ASSUMPTION MADE HERE. The number of distinct register names
 * (i.e. possible "type" fields for an expr structure) does not
 * exceed 126.
 */
#define EXPR_SIMPLE 126
#define EXPR_WRT 127
#define EXPR_SEGBASE 128

static int is_reloc(expr *);
static int is_simple(expr *);
static int is_really_simple (expr *);
static long reloc_value(expr *);
static long reloc_seg(expr *);
static long reloc_wrt(expr *);

enum {				       /* token types, other than chars */
    TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
    TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
    TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
    TOKEN_FLOAT
};

struct tokenval {
    long t_integer, t_inttwo;
    char *t_charptr;
};

static char tempstorage[1024], *q;
static int bsi (char *string, char **array, int size);/* binary search */

static int nexttoken (void);
static int is_comma_next (void);

static char *bufptr;
static int i;
static struct tokenval tokval;
static lfunc labelfunc;
static efunc error;
static char *label;
static struct ofmt *outfmt;

static long seg, ofs;

static int forward;

insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
		  char *buffer, insn *result, struct ofmt *output,
		  efunc errfunc) {
    int operand;
    int critical;

    forward = result->forw_ref = FALSE;
    q = tempstorage;
    bufptr = buffer;
    labelfunc = lookup_label;
    outfmt = output;
    error = errfunc;
    seg = segment;
    ofs = offset;
    label = "";

    i = nexttoken();

    result->eops = NULL;	       /* must do this, whatever happens */

    if (i==0) {			       /* blank line - ignore */
	result->label = NULL;	       /* so, no label on it */
	result->opcode = -1;	       /* and no instruction either */
	return result;
    }
    if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
	(i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
	error (ERR_NONFATAL, "label or instruction expected"
	       " at start of line");
	result->label = NULL;
	result->opcode = -1;
	return result;
    }

    if (i == TOKEN_ID) {	       /* there's a label here */
	label = result->label = tokval.t_charptr;
	i = nexttoken();
	if (i == ':') {		       /* skip over the optional colon */
	    i = nexttoken();
	}
    } else			       /* no label; so, moving swiftly on */
	result->label = NULL;

    if (i==0) {
	result->opcode = -1;	       /* this line contains just a label */
	return result;
    }

    result->nprefix = 0;
    result->times = 1;

    while (i == TOKEN_PREFIX ||
	   (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
	/*
	 * Handle special case: the TIMES prefix.
	 */
	if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
	    expr *value;

	    i = nexttoken();
	    eval_reset();
	    value = evaluate (pass);
	    if (!value) {	       /* but, error in evaluator */
		result->opcode = -1;   /* unrecoverable parse error: */
		return result;	       /* ignore this instruction */
	    }
	    if (!is_simple (value)) {
		error (ERR_NONFATAL,
		       "non-constant argument supplied to TIMES");
		result->times = 1;
	    } else
		result->times = value->value;
	} else {
	    if (result->nprefix == MAXPREFIX)
		error (ERR_NONFATAL,
		       "instruction has more than %d prefixes", MAXPREFIX);
	    else
		result->prefixes[result->nprefix++] = tokval.t_integer;
	    i = nexttoken();
	}
    }

    if (i != TOKEN_INSN) {
	error (ERR_NONFATAL, "parser: instruction expected");
	result->opcode = -1;
	return result;
    }

    result->opcode = tokval.t_integer;
    result->condition = tokval.t_inttwo;

    /*
     * RESB, RESW and RESD cannot be satisfied with incorrectly
     * evaluated operands, since the correct values _must_ be known
     * on the first pass. Hence, even in pass one, we set the
     * `critical' flag on calling evaluate(), so that it will bomb
     * out on undefined symbols. Nasty, but there's nothing we can
     * do about it.
     *
     * For the moment, EQU has the same difficulty, so we'll
     * include that.
     */
    if (result->opcode == I_RESB ||
	result->opcode == I_RESW ||
	result->opcode == I_RESD ||
	result->opcode == I_RESQ ||
	result->opcode == I_REST ||
	result->opcode == I_EQU)
	critical = pass;
    else
	critical = (pass==2 ? 2 : 0);

    if (result->opcode == I_DB ||
	result->opcode == I_DW ||
	result->opcode == I_DD ||
	result->opcode == I_DQ ||
	result->opcode == I_DT) {
	extop *eop, **tail = &result->eops;
	int oper_num = 0;

	/*
	 * Begin to read the DB/DW/DD/DQ/DT operands.
	 */
	while (1) {
	    i = nexttoken();
	    if (i == 0)
		break;
	    eop = *tail = nasm_malloc(sizeof(extop));
	    tail = &eop->next;
	    eop->next = NULL;
	    eop->type = EOT_NOTHING;
	    oper_num++;

	    if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
		eop->type = EOT_DB_STRING;
		eop->stringval = tokval.t_charptr;
		eop->stringlen = tokval.t_inttwo;
		i = nexttoken();       /* eat the comma */
		continue;
	    }

	    if (i == TOKEN_FLOAT || i == '-') {
		long sign = +1L;

		if (i == '-') {
		    char *save = bufptr;
		    i = nexttoken();
		    sign = -1L;
		    if (i != TOKEN_FLOAT) {
			bufptr = save;
			i = '-';
		    }
		}

		if (i == TOKEN_FLOAT) {
		    eop->type = EOT_DB_STRING;
		    eop->stringval = q;
		    if (result->opcode == I_DD)
			eop->stringlen = 4;
		    else if (result->opcode == I_DQ)
			eop->stringlen = 8;
		    else if (result->opcode == I_DT)
		    eop->stringlen = 10;
		    else {
			error(ERR_NONFATAL, "floating-point constant"
			      " encountered in `D%c' instruction",
			      result->opcode == I_DW ? 'W' : 'B');
			eop->type = EOT_NOTHING;
		    }
		    q += eop->stringlen;
		    if (!float_const (tokval.t_charptr, sign,
				      (unsigned char *)eop->stringval,
				      eop->stringlen, error))
			eop->type = EOT_NOTHING;
		    i = nexttoken();       /* eat the comma */
		    continue;
		}
	    }

	    /* anything else */ {
		expr *value;
		eval_reset();
		value = evaluate (critical);
		if (!value) {	       /* but, error in evaluator */
		    result->opcode = -1;/* unrecoverable parse error: */
		    return result;     /* ignore this instruction */
		}
		if (is_reloc(value)) {
		    eop->type = EOT_DB_NUMBER;
		    eop->offset = reloc_value(value);
		    eop->segment = reloc_seg(value);
		    eop->wrt = reloc_wrt(value);
		} else {
		    error (ERR_NONFATAL,
			   "`%s' operand %d: expression is not simple"
			   " or relocatable",
			   insn_names[result->opcode], oper_num);
		}
	    }
	}
	return result;
    }

    /* right. Now we begin to parse the operands. There may be up to three
     * of these, separated by commas, and terminated by a zero token. */

    for (operand = 0; operand < 3; operand++) {
	expr *seg, *value;	       /* used most of the time */
	int mref;		       /* is this going to be a memory ref? */

	result->oprs[operand].addr_size = 0;/* have to zero this whatever */
	i = nexttoken();
	if (i == 0) break;	       /* end of operands: get out of here */
	result->oprs[operand].type = 0;   /* so far, no override */
	while (i == TOKEN_SPECIAL)	{/* size specifiers */
	    switch ((int)tokval.t_integer) {
	      case S_BYTE:
		result->oprs[operand].type |= BITS8;
		break;
	      case S_WORD:
		result->oprs[operand].type |= BITS16;
		break;
	      case S_DWORD:
	      case S_LONG:
		result->oprs[operand].type |= BITS32;
		break;
	      case S_QWORD:
		result->oprs[operand].type |= BITS64;
		break;
	      case S_TWORD:
		result->oprs[operand].type |= BITS80;
		break;
	      case S_TO:
		result->oprs[operand].type |= TO;
		break;
	      case S_FAR:
		result->oprs[operand].type |= FAR;
		break;
	      case S_NEAR:
		result->oprs[operand].type |= NEAR;
		break;
	      case S_SHORT:
		result->oprs[operand].type |= SHORT;
		break;
	    }
	    i = nexttoken();
	}

	if (i == '[') {		       /* memory reference */
	    i = nexttoken();
	    mref = TRUE;
	    if (i == TOKEN_SPECIAL) {  /* check for address size override */
		switch ((int)tokval.t_integer) {
		  case S_WORD:
		    result->oprs[operand].addr_size = 16;
		    break;
		  case S_DWORD:
		  case S_LONG:
		    result->oprs[operand].addr_size = 32;
		    break;
		  default:
		    error (ERR_NONFATAL, "invalid size specification in"
			   " effective address");
		}
		i = nexttoken();
	    }
	} else 			       /* immediate operand, or register */
	    mref = FALSE;

	eval_reset();

	value = evaluate (critical);
	if (forward)
	    result->forw_ref = TRUE;
	if (!value) {		       /* error in evaluator */
	    result->opcode = -1;       /* unrecoverable parse error: */
	    return result;	       /* ignore this instruction */
	}
	if (i == ':' && mref) {	       /* it was seg:offset */
	    seg = value;	       /* so shift this into the segment */
	    i = nexttoken();	       /* then skip the colon */
	    if (i == TOKEN_SPECIAL) {  /* another check for size override */
		switch ((int)tokval.t_integer) {
		  case S_WORD:
		    result->oprs[operand].addr_size = 16;
		    break;
		  case S_DWORD:
		  case S_LONG:
		    result->oprs[operand].addr_size = 32;
		    break;
		  default:
		    error (ERR_NONFATAL, "invalid size specification in"
			   " effective address");
		}
		i = nexttoken();
	    }
	    value = evaluate (critical);
	    if (forward)
		result->forw_ref = TRUE;
	    /* and get the offset */
	    if (!value) {	       /* but, error in evaluator */
		result->opcode = -1;   /* unrecoverable parse error: */
		return result;	       /* ignore this instruction */
	    }
	} else seg = NULL;
	if (mref) {		       /* find ] at the end */
	    if (i != ']') {
		error (ERR_NONFATAL, "parser: expecting ]");
		do {		       /* error recovery again */
		    i = nexttoken();
		} while (i != 0 && i != ',');
	    } else		       /* we got the required ] */
		i = nexttoken();
	} else {		       /* immediate operand */
	    if (i != 0 && i != ',' && i != ':') {
		error (ERR_NONFATAL, "comma or end of line expected");
		do {		       /* error recovery */
		    i = nexttoken();
		} while (i != 0 && i != ',');
	    } else if (i == ':') {
		result->oprs[operand].type |= COLON;
	    }
	}

	/* now convert the exprs returned from evaluate() into operand
	 * descriptions... */

	if (mref) {		       /* it's a memory reference */
	    expr *e = value;
	    int b, i, s;	       /* basereg, indexreg, scale */
	    long o;		       /* offset */

	    if (seg) {		       /* segment override */
		if (seg[1].type!=0 || seg->value!=1 ||
		    REG_SREG & ~reg_flags[seg->type])
		    error (ERR_NONFATAL, "invalid segment override");
		else if (result->nprefix == MAXPREFIX)
		    error (ERR_NONFATAL,
			   "instruction has more than %d prefixes",
			   MAXPREFIX);
		else
		    result->prefixes[result->nprefix++] = seg->type;
	    }

	    b = i = -1, o = s = 0;

	    if (e->type < EXPR_SIMPLE) {   /* this bit's a register */
		if (e->value == 1) /* in fact it can be basereg */
		    b = e->type;
		else	       /* no, it has to be indexreg */
		    i = e->type, s = e->value;
		e++;
	    }
	    if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
		if (e->value != 1) {   /* it has to be indexreg */
		    if (i != -1) {     /* but it can't be */
			error(ERR_NONFATAL, "invalid effective address");
			result->opcode = -1;
			return result;
		    } else
			i = e->type, s = e->value;
		} else {	       /* it can be basereg */
		    if (b != -1)       /* or can it? */
			i = e->type, s = 1;
		    else
			b = e->type;
		}
		e++;
	    }
	    if (e->type != 0) {	       /* is there an offset? */
		if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
		    error (ERR_NONFATAL, "invalid effective address");
		    result->opcode = -1;
		    return result;
		} else {
		    if (e->type == EXPR_SIMPLE) {
			o = e->value;
			e++;
		    }
		    if (e->type == EXPR_WRT) {
			result->oprs[operand].wrt = e->value;
			e++;
		    } else
			result->oprs[operand].wrt = NO_SEG;
		    /*
		     * Look for a segment base type.
		     */
		    if (e->type && e->type < EXPR_SEGBASE) {
			error (ERR_NONFATAL, "invalid effective address");
			result->opcode = -1;
			return result;
		    }
		    while (e->type && e->value == 0)
			e++;
		    if (e->type && e->value != 1) {
			error (ERR_NONFATAL, "invalid effective address");
			result->opcode = -1;
			return result;
		    }
		    if (e->type) {
			result->oprs[operand].segment = e->type-EXPR_SEGBASE;
			e++;
		    } else
			result->oprs[operand].segment = NO_SEG;
		    while (e->type && e->value == 0)
			e++;
		    if (e->type) {
			error (ERR_NONFATAL, "invalid effective address");
			result->opcode = -1;
			return result;
		    }
		}
	    } else {
		o = 0;
		result->oprs[operand].wrt = NO_SEG;
		result->oprs[operand].segment = NO_SEG;
	    }

	    if (e->type != 0) {    /* there'd better be nothing left! */
		error (ERR_NONFATAL, "invalid effective address");
		result->opcode = -1;
		return result;
	    }

	    result->oprs[operand].type |= MEMORY;
	    if (b==-1 && (i==-1 || s==0))
		result->oprs[operand].type |= MEM_OFFS;
	    result->oprs[operand].basereg = b;
	    result->oprs[operand].indexreg = i;
	    result->oprs[operand].scale = s;
	    result->oprs[operand].offset = o;
	} else {		       /* it's not a memory reference */
	    if (is_reloc(value)) {     /* it's immediate */
		result->oprs[operand].type |= IMMEDIATE;
		result->oprs[operand].offset = reloc_value(value);
		result->oprs[operand].segment = reloc_seg(value);
		result->oprs[operand].wrt = reloc_wrt(value);
		if (is_simple(value) && reloc_value(value)==1)
		    result->oprs[operand].type |= UNITY;
	    } else {	       /* it's a register */
		if (value->type>=EXPR_SIMPLE || value->value!=1) {
		    error (ERR_NONFATAL, "invalid operand type");
		    result->opcode = -1;
		    return result;
		}
		/* clear overrides, except TO which applies to FPU regs */
		result->oprs[operand].type &= TO;
		result->oprs[operand].type |= REGISTER;
		result->oprs[operand].type |= reg_flags[value->type];
		result->oprs[operand].basereg = value->type;
	    }
	}
    }

    result->operands = operand;       /* set operand count */

    while (operand<3)		       /* clear remaining operands */
	result->oprs[operand++].type = 0;

    /*
     * Transform RESW, RESD, RESQ, REST into RESB.
     */
    switch (result->opcode) {
      case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
      case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
      case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
      case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
    }

    return result;
}

static int is_comma_next (void) {
    char *p;

    p = bufptr;
    while (isspace(*p)) p++;
    return (*p == ',' || *p == ';' || !*p);
}

/* isidstart matches any character that may start an identifier, and isidchar
 * matches any character that may appear at places other than the start of an
 * identifier. E.g. a period may only appear at the start of an identifier
 * (for local labels), whereas a number may appear anywhere *but* at the
 * start. */

#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
#define isidchar(c)  ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
                                                  || (c)=='@' || (c)=='~' )

/* Ditto for numeric constants. */

#define isnumstart(c)  ( isdigit(c) || (c)=='$' )
#define isnumchar(c)   ( isalnum(c) )

/* This returns the numeric value of a given 'digit'. */

#define numvalue(c)  ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')

/*
 * This tokeniser routine has only one side effect, that of
 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
 * performed.
 */

static int nexttoken (void) {
    char ourcopy[256], *r, *s;

    while (isspace(*bufptr)) bufptr++;
    if (!*bufptr) return 0;

    /* we have a token; either an id, a number or a char */
    if (isidstart(*bufptr) ||
	(*bufptr == '$' && isidstart(bufptr[1]))) {
	/* now we've got an identifier */
	int i;
	int is_sym = FALSE;

	if (*bufptr == '$') {
	    is_sym = TRUE;
	    bufptr++;
	}

 	tokval.t_charptr = q;
	*q++ = *bufptr++;
	while (isidchar(*bufptr)) *q++ = *bufptr++;
	*q++ = '\0';
	for (s=tokval.t_charptr, r=ourcopy; *s; s++)
	    *r++ = tolower (*s);
	*r = '\0';
	if (is_sym)
	    return TOKEN_ID;	       /* bypass all other checks */
	/* right, so we have an identifier sitting in temp storage. now,
	 * is it actually a register or instruction name, or what? */
	if ((tokval.t_integer=bsi(ourcopy, reg_names,
				  elements(reg_names)))>=0)
	    return TOKEN_REG;
	if ((tokval.t_integer=bsi(ourcopy, insn_names,
				  elements(insn_names)))>=0)
	    return TOKEN_INSN;
	for (i=0; i<elements(icn); i++)
	    if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
		char *p = ourcopy + strlen(icn[i]);
		tokval.t_integer = ico[i];
		if ((tokval.t_inttwo=bsi(p, conditions,
					 elements(conditions)))>=0)
		    return TOKEN_INSN;
	    }
	if ((tokval.t_integer=bsi(ourcopy, prefix_names,
				  elements(prefix_names)))>=0) {
	    tokval.t_integer += PREFIX_ENUM_START;
	    return TOKEN_PREFIX;
	}
	if ((tokval.t_integer=bsi(ourcopy, special_names,
				  elements(special_names)))>=0)
	    return TOKEN_SPECIAL;
	if (!strcmp(ourcopy, "seg"))
	    return TOKEN_SEG;
	if (!strcmp(ourcopy, "wrt"))
	    return TOKEN_WRT;
	return TOKEN_ID;
    } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
	/*
	 * It's a $ sign with no following hex number; this must
	 * mean it's a Here token ($), evaluating to the current
	 * assembly location, or a Base token ($$), evaluating to
	 * the base of the current segment.
	 */
	bufptr++;
	if (*bufptr == '$') {
	    bufptr++;
	    return TOKEN_BASE;
	}
	return TOKEN_HERE;
    } else if (isnumstart(*bufptr)) {	       /* now we've got a number */
	char *r = q;
	int rn_error;

	*q++ = *bufptr++;
	while (isnumchar(*bufptr)) {
	    *q++ = *bufptr++;
	}
	if (*bufptr == '.') {
	    /*
	     * a floating point constant
	     */
	    *q++ = *bufptr++;
	    while (isnumchar(*bufptr)) {
		*q++ = *bufptr++;
	    }
	    *q++ = '\0';
	    tokval.t_charptr = r;
	    return TOKEN_FLOAT;
	}
	*q++ = '\0';
	tokval.t_integer = readnum(r, &rn_error);
	if (rn_error)
	    return TOKEN_ERRNUM;       /* some malformation occurred */
	tokval.t_charptr = NULL;
	return TOKEN_NUM;
    } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
    	char quote = *bufptr++, *r;
	r = tokval.t_charptr = bufptr;
	while (*bufptr && *bufptr != quote) bufptr++;
	tokval.t_inttwo = bufptr - r;      /* store full version */
	if (!*bufptr)
	    return TOKEN_ERRNUM;       /* unmatched quotes */
	tokval.t_integer = 0;
	r = bufptr++;		       /* skip over final quote */
	while (quote != *--r) {
	    tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
	}
	return TOKEN_NUM;
    } else if (*bufptr == ';') {       /* a comment has happened - stay */
	return 0;
    } else if ((*bufptr == '>' || *bufptr == '<' ||
		*bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
	bufptr += 2;
	return (bufptr[-2] == '>' ? TOKEN_SHR :
		bufptr[-2] == '<' ? TOKEN_SHL :
		bufptr[-2] == '/' ? TOKEN_SDIV :
		TOKEN_SMOD);
    } else			       /* just an ordinary char */
    	return (unsigned char) (*bufptr++);
}

/* return index of "string" in "array", or -1 if no match. */
static int bsi (char *string, char **array, int size) {
    int i = -1, j = size;	       /* always, i < index < j */
    while (j-i >= 2) {
	int k = (i+j)/2;
	int l = strcmp(string, array[k]);
	if (l<0)		       /* it's in the first half */
	    j = k;
	else if (l>0)		       /* it's in the second half */
	    i = k;
	else			       /* we've got it :) */
	    return k;
    }
    return -1;			       /* we haven't got it :( */
}

void cleanup_insn (insn *i) {
    extop *e;

    while (i->eops) {
	e = i->eops;
	i->eops = i->eops->next;
	nasm_free (e);
    }
}

/* ------------- Evaluator begins here ------------------ */

static expr exprtempstorage[1024], *tempptr;   /* store exprs in here */

/*
 * Add two vector datatypes. We have some bizarre behaviour on far-
 * absolute segment types: we preserve them during addition _only_
 * if one of the segments is a truly pure scalar.
 */
static expr *add_vectors(expr *p, expr *q) {
    expr *r = tempptr;
    int preserve;

    preserve = is_really_simple(p) || is_really_simple(q);

    while (p->type && q->type &&
	   p->type < EXPR_SEGBASE+SEG_ABS &&
	   q->type < EXPR_SEGBASE+SEG_ABS)
    	if (p->type > q->type) {
	    tempptr->type = q->type;
	    tempptr->value = q->value;
	    tempptr++, q++;
	} else if (p->type < q->type) {
	    tempptr->type = p->type;
	    tempptr->value = p->value;
	    tempptr++, p++;
	} else {		       /* *p and *q have same type */
	    tempptr->type = p->type;
	    tempptr->value = p->value + q->value;
	    tempptr++, p++, q++;
	}
    while (p->type &&
	   (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
	tempptr->type = p->type;
	tempptr->value = p->value;
	tempptr++, p++;
    }
    while (q->type &&
	   (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
	tempptr->type = q->type;
	tempptr->value = q->value;
	tempptr++, q++;
    }
    (tempptr++)->type = 0;

    return r;
}

/*
 * Multiply a vector by a scalar. Strip far-absolute segment part
 * if present.
 */
static expr *scalar_mult(expr *vect, long scalar) {
    expr *p = vect;

    while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
	p->value = scalar * (p->value);
	p++;
    }
    p->type = 0;

    return vect;
}

static expr *scalarvect (long scalar) {
    expr *p = tempptr;
    tempptr->type = EXPR_SIMPLE;
    tempptr->value = scalar;
    tempptr++;
    tempptr->type = 0;
    tempptr++;
    return p;
}

/*
 * Return TRUE if the argument is a simple scalar. (Or a far-
 * absolute, which counts.)
 */
static int is_simple (expr *vect) {
    while (vect->type && !vect->value)
    	vect++;
    if (!vect->type)
	return 1;
    if (vect->type != EXPR_SIMPLE)
	return 0;
    do {
	vect++;
    } while (vect->type && !vect->value);
    if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
    return 1;
}

/*
 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
 * absolute.
 */
static int is_really_simple (expr *vect) {
    while (vect->type && !vect->value)
    	vect++;
    if (!vect->type)
	return 1;
    if (vect->type != EXPR_SIMPLE)
	return 0;
    do {
	vect++;
    } while (vect->type && !vect->value);
    if (vect->type) return 0;
    return 1;
}

/*
 * Return TRUE if the argument is relocatable (i.e. a simple
 * scalar, plus at most one segment-base, plus possibly a WRT).
 */
static int is_reloc (expr *vect) {
    while (vect->type && !vect->value)
    	vect++;
    if (!vect->type)
	return 1;
    if (vect->type < EXPR_SIMPLE)
	return 0;
    if (vect->type == EXPR_SIMPLE) {
	do {
	    vect++;
	} while (vect->type && !vect->value);
	if (!vect->type)
	    return 1;
    }
    do {
	vect++;
    } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
    if (!vect->type)
	return 1;
    return 1;
}

/*
 * Return the scalar part of a relocatable vector. (Including
 * simple scalar vectors - those qualify as relocatable.)
 */
static long reloc_value (expr *vect) {
    while (vect->type && !vect->value)
    	vect++;
    if (!vect->type) return 0;
    if (vect->type == EXPR_SIMPLE)
	return vect->value;
    else
	return 0;
}

/*
 * Return the segment number of a relocatable vector, or NO_SEG for
 * simple scalars.
 */
static long reloc_seg (expr *vect) {
    while (vect->type && (vect->type == EXPR_WRT || !vect->value))
    	vect++;
    if (vect->type == EXPR_SIMPLE) {
	do {
	    vect++;
	} while (vect->type && (vect->type == EXPR_WRT || !vect->value));
    }
    if (!vect->type)
	return NO_SEG;
    else
	return vect->type - EXPR_SEGBASE;
}

/*
 * Return the WRT segment number of a relocatable vector, or NO_SEG
 * if no WRT part is present.
 */
static long reloc_wrt (expr *vect) {
    while (vect->type && vect->type < EXPR_WRT)
    	vect++;
    if (vect->type == EXPR_WRT) {
	return vect->value;
    } else
	return NO_SEG;
}

static void eval_reset(void) {
    tempptr = exprtempstorage;	       /* initialise temporary storage */
}

/*
 * The SEG operator: calculate the segment part of a relocatable
 * value. Return NULL, as usual, if an error occurs. Report the
 * error too.
 */
static expr *segment_part (expr *e) {
    long seg;

    if (!is_reloc(e)) {
	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
	return NULL;
    }

    seg = reloc_seg(e);
    if (seg == NO_SEG) {
	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
	return NULL;
    } else if (seg & SEG_ABS)
	return scalarvect(seg & ~SEG_ABS);
    else {
	expr *f = tempptr++;
	tempptr++->type = 0;
	f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
	f->value = 1;
	return f;
    }
}

/*
 * Recursive-descent parser. Called with a single boolean operand,
 * which is TRUE if the evaluation is critical (i.e. unresolved
 * symbols are an error condition). Must update the global `i' to
 * reflect the token after the parsed string. May return NULL.
 *
 * evaluate() should report its own errors: on return it is assumed
 * that if NULL has been returned, the error has already been
 * reported.
 */

/*
 * Grammar parsed is:
 *
 * expr  : expr0 [ WRT expr6 ]
 * expr0 : expr1 [ {|} expr1]
 * expr1 : expr2 [ {^} expr2]
 * expr2 : expr3 [ {&} expr3]
 * expr3 : expr4 [ {<<,>>} expr4...]
 * expr4 : expr5 [ {+,-} expr5...]
 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
 * expr6 : { ~,+,-,SEG } expr6
 *       | (expr0)
 *       | symbol
 *       | $
 *       | number
 */

static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
static expr *expr4(int), *expr5(int), *expr6(int);

static expr *expr0(int critical) {
    expr *e, *f;

    e = expr1(critical);
    if (!e)
	return NULL;
    while (i == '|') {
	i = nexttoken();
	f = expr1(critical);
	if (!f)
	    return NULL;
	if (!is_simple(e) || !is_simple(f)) {
	    error(ERR_NONFATAL, "`|' operator may only be applied to"
		  " scalar values");
	}
	e = scalarvect (reloc_value(e) | reloc_value(f));
    }
    return e;
}

static expr *expr1(int critical) {
    expr *e, *f;

    e = expr2(critical);
    if (!e)
	return NULL;
    while (i == '^') {
	i = nexttoken();
	f = expr2(critical);
	if (!f)
	    return NULL;
	if (!is_simple(e) || !is_simple(f)) {
	    error(ERR_NONFATAL, "`^' operator may only be applied to"
		  " scalar values");
	}
	e = scalarvect (reloc_value(e) ^ reloc_value(f));
    }
    return e;
}

static expr *expr2(int critical) {
    expr *e, *f;

    e = expr3(critical);
    if (!e)
	return NULL;
    while (i == '&') {
	i = nexttoken();
	f = expr3(critical);
	if (!f)
	    return NULL;
	if (!is_simple(e) || !is_simple(f)) {
	    error(ERR_NONFATAL, "`&' operator may only be applied to"
		  " scalar values");
	}
	e = scalarvect (reloc_value(e) & reloc_value(f));
    }
    return e;
}

static expr *expr3(int critical) {
    expr *e, *f;

    e = expr4(critical);
    if (!e)
	return NULL;
    while (i == TOKEN_SHL || i == TOKEN_SHR) {
	int j = i;
	i = nexttoken();
	f = expr4(critical);
	if (!f)
	    return NULL;
	if (!is_simple(e) || !is_simple(f)) {
	    error(ERR_NONFATAL, "shift operator may only be applied to"
		  " scalar values");
	}
	switch (j) {
	  case TOKEN_SHL:
	    e = scalarvect (reloc_value(e) << reloc_value(f));
	    break;
	  case TOKEN_SHR:
	    e = scalarvect (((unsigned long)reloc_value(e)) >>
			    reloc_value(f));
	    break;
	}
    }
    return e;
}

static expr *expr4(int critical) {
    expr *e, *f;

    e = expr5(critical);
    if (!e)
	return NULL;
    while (i == '+' || i == '-') {
	int j = i;
	i = nexttoken();
	f = expr5(critical);
	if (!f)
	    return NULL;
	switch (j) {
	  case '+':
	    e = add_vectors (e, f);
	    break;
	  case '-':
	    e = add_vectors (e, scalar_mult(f, -1L));
	    break;
	}
    }
    return e;
}

static expr *expr5(int critical) {
    expr *e, *f;

    e = expr6(critical);
    if (!e)
	return NULL;
    while (i == '*' || i == '/' || i == '*' ||
	   i == TOKEN_SDIV || i == TOKEN_SMOD) {
	int j = i;
	i = nexttoken();
	f = expr6(critical);
	if (!f)
	    return NULL;
	if (j != '*' && (!is_simple(e) || !is_simple(f))) {
	    error(ERR_NONFATAL, "division operator may only be applied to"
		  " scalar values");
	    return NULL;
	}
	if (j != '*' && reloc_value(f) == 0) {
	    error(ERR_NONFATAL, "division by zero");
	    return NULL;
	}
	switch (j) {
	  case '*':
	    if (is_simple(e))
		e = scalar_mult (f, reloc_value(e));
	    else if (is_simple(f))
		e = scalar_mult (e, reloc_value(f));
	    else {
		error(ERR_NONFATAL, "unable to multiply two "
		      "non-scalar objects");
		return NULL;
	    }
	    break;
	  case '/':
	    e = scalarvect (((unsigned long)reloc_value(e)) /
			    ((unsigned long)reloc_value(f)));
	    break;
	  case '%':
	    e = scalarvect (((unsigned long)reloc_value(e)) %
			    ((unsigned long)reloc_value(f)));
	    break;
	  case TOKEN_SDIV:
	    e = scalarvect (((signed long)reloc_value(e)) /
			    ((signed long)reloc_value(f)));
	    break;
	  case TOKEN_SMOD:
	    e = scalarvect (((signed long)reloc_value(e)) %
			    ((signed long)reloc_value(f)));
	    break;
	}
    }
    return e;
}

static expr *expr6(int critical) {
    expr *e;
    long label_seg, label_ofs;

    if (i == '-') {
	i = nexttoken();
	e = expr6(critical);
	if (!e)
	    return NULL;
	return scalar_mult (e, -1L);
    } else if (i == '+') {
	i = nexttoken();
	return expr6(critical);
    } else if (i == '~') {
	i = nexttoken();
	e = expr6(critical);
	if (!e)
	    return NULL;
	if (!is_simple(e)) {
	    error(ERR_NONFATAL, "`~' operator may only be applied to"
		  " scalar values");
	    return NULL;
	}
	return scalarvect(~reloc_value(e));
    } else if (i == TOKEN_SEG) {
	i = nexttoken();
	e = expr6(critical);
	if (!e)
	    return NULL;
	return segment_part(e);
    } else if (i == '(') {
	i = nexttoken();
	e = expr0(critical);
	if (!e)
	    return NULL;
	if (i != ')') {
	    error(ERR_NONFATAL, "expecting `)'");
	    return NULL;
	}
	i = nexttoken();
	return e;
    } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
	       i == TOKEN_HERE || i == TOKEN_BASE) {
	e = tempptr;
	switch (i) {
	  case TOKEN_NUM:
	    e->type = EXPR_SIMPLE;
	    e->value = tokval.t_integer;
	    break;
	  case TOKEN_REG:
	    e->type = tokval.t_integer;
	    e->value = 1;
	    break;
	  case TOKEN_ID:
	  case TOKEN_HERE:
	  case TOKEN_BASE:
	    /*
	     * Since the whole line is parsed before the label it
	     * defines is given to the label manager, we have
	     * problems with lines such as
	     *
	     *   end: TIMES 512-(end-start) DB 0
	     *
	     * where `end' is not known on pass one, despite not
	     * really being a forward reference, and due to
	     * criticality it is _needed_. Hence we check our label
	     * against the currently defined one, and do our own
	     * resolution of it if we have to.
	     */
	    if (i == TOKEN_BASE) {
		label_seg = seg;
		label_ofs = 0;
	    } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
		label_seg = seg;
		label_ofs = ofs;
	    } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
		if (critical == 2) {
		    error (ERR_NONFATAL, "symbol `%s' undefined",
			   tokval.t_charptr);
		    return NULL;
		} else if (critical == 1) {
		    error (ERR_NONFATAL, "symbol `%s' not defined before use",
			   tokval.t_charptr);
		    return NULL;
		} else {
		    forward = TRUE;
		    label_seg = seg;
		    label_ofs = ofs;
		}
	    }
	    e->type = EXPR_SIMPLE;
	    e->value = label_ofs;
	    if (label_seg!=NO_SEG) {
		tempptr++;
		tempptr->type = EXPR_SEGBASE + label_seg;
		tempptr->value = 1;
	    }
	    break;
	}
	tempptr++;
	tempptr->type = 0;
	tempptr++;
	i = nexttoken();
	return e;
    } else {
	error(ERR_NONFATAL, "expression syntax error");
	return NULL;
    }
}

static expr *evaluate (int critical) {
    expr *e;
    expr *f = NULL;

    e = expr0 (critical);
    if (!e)
	return NULL;

    if (i == TOKEN_WRT) {
	if (!is_reloc(e)) {
	    error(ERR_NONFATAL, "invalid left-hand operand to WRT");
	    return NULL;
	}
	i = nexttoken();	       /* eat the WRT */
	f = expr6 (critical);
	if (!f)
	    return NULL;
    }
    e = scalar_mult (e, 1L);	       /* strip far-absolute segment part */
    if (f) {
	expr *g = tempptr++;
	tempptr++->type = 0;
	g->type = EXPR_WRT;
	if (!is_reloc(f)) {
	    error(ERR_NONFATAL, "invalid right-hand operand to WRT");
	    return NULL;
	}
	g->value = reloc_seg(f);
	if (g->value == NO_SEG)
	    g->value = reloc_value(f) | SEG_ABS;
	else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
	    error(ERR_NONFATAL, "invalid right-hand operand to WRT");
	    return NULL;
	}
	e = add_vectors (e, g);
    }
    return e;
}