mirror of
https://github.com/netwide-assembler/nasm.git
synced 2024-11-27 08:10:07 +08:00
preprocessor: major cleanups; inline text into Token
Major cleanups of the preprocessor. In particular, the block-allocation of Token is pretty ridiculous since nearly every token requires a text allocation anyway. Change the definition of Token so that only very long tokens (48+ characters on 64-bit systems) need to be stored out of line. If malloc() preserves alignment (XXX: glibc doesn't) then this means that each Token will fit in a cache line. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
parent
f7dbdb2e13
commit
8571f06061
1583
asm/preproc.c
1583
asm/preproc.c
File diff suppressed because it is too large
Load Diff
@ -83,7 +83,7 @@ void stdscan_cleanup(void)
|
||||
nasm_free(stdscan_tempstorage);
|
||||
}
|
||||
|
||||
static char *stdscan_copy(char *p, int len)
|
||||
static char *stdscan_copy(const char *p, int len)
|
||||
{
|
||||
char *text;
|
||||
|
||||
@ -124,7 +124,7 @@ static int stdscan_handle_brace(struct tokenval *tv)
|
||||
|
||||
int stdscan(void *private_data, struct tokenval *tv)
|
||||
{
|
||||
char ourcopy[MAX_KEYWORD + 1], *r, *s;
|
||||
const char *r;
|
||||
|
||||
(void)private_data; /* Don't warn that this parameter is unused */
|
||||
|
||||
@ -156,13 +156,7 @@ int stdscan(void *private_data, struct tokenval *tv)
|
||||
if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
|
||||
return tv->t_type = TOKEN_ID; /* bypass all other checks */
|
||||
|
||||
for (s = tv->t_charptr, r = ourcopy; *s; s++)
|
||||
*r++ = nasm_tolower(*s);
|
||||
*r = '\0';
|
||||
/* right, so we have an identifier sitting in temp storage. now,
|
||||
* is it actually a register or instruction name, or what? */
|
||||
token_type = nasm_token_hash(ourcopy, tv);
|
||||
|
||||
token_type = nasm_token_hash(tv->t_charptr, tv);
|
||||
if (unlikely(tv->t_flag & TFLAG_WARN)) {
|
||||
/*!
|
||||
*!ptr [on] non-NASM keyword used in other assemblers
|
||||
@ -293,14 +287,8 @@ int stdscan(void *private_data, struct tokenval *tv)
|
||||
|
||||
stdscan_bufptr++; /* skip closing brace */
|
||||
|
||||
for (s = tv->t_charptr, r = ourcopy; *s; s++)
|
||||
*r++ = nasm_tolower(*s);
|
||||
*r = '\0';
|
||||
|
||||
/* right, so we have a decorator sitting in temp storage. */
|
||||
nasm_token_hash(ourcopy, tv);
|
||||
|
||||
/* handle tokens inside braces */
|
||||
nasm_token_hash(tv->t_charptr, tv);
|
||||
return stdscan_handle_brace(tv);
|
||||
} else if (*stdscan_bufptr == ';') {
|
||||
/* a comment has happened - stay */
|
||||
@ -332,8 +320,13 @@ int stdscan(void *private_data, struct tokenval *tv)
|
||||
stdscan_bufptr += 2;
|
||||
return tv->t_type = TOKEN_NE;
|
||||
} else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
|
||||
if (stdscan_bufptr[2] == '>') {
|
||||
stdscan_bufptr += 3;
|
||||
return tv->t_type = TOKEN_LEG;
|
||||
} else {
|
||||
stdscan_bufptr += 2;
|
||||
return tv->t_type = TOKEN_LE;
|
||||
}
|
||||
} else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
|
||||
stdscan_bufptr += 2;
|
||||
return tv->t_type = TOKEN_GE;
|
||||
|
@ -149,3 +149,45 @@ z
|
||||
evex
|
||||
vex3
|
||||
vex2
|
||||
|
||||
# Multi-character operators. Used in ppscan().
|
||||
% TOKEN_SHR, 0, 0, 0
|
||||
>>
|
||||
|
||||
% TOKEN_SAR, 0, 0, 0
|
||||
>>>
|
||||
|
||||
% TOKEN_SHL, 0, 0, 0
|
||||
<<
|
||||
<<<
|
||||
|
||||
% TOKEN_SDIV, 0, 0, 0
|
||||
//
|
||||
|
||||
% TOKEN_SMOD, 0, 0, 0
|
||||
%%
|
||||
|
||||
% TOKEN_EQ, 0, 0, 0
|
||||
==
|
||||
|
||||
% TOKEN_NE, 0, 0, 0
|
||||
!=
|
||||
<>
|
||||
|
||||
% TOKEN_LE, 0, 0, 0
|
||||
<=
|
||||
|
||||
% TOKEN_GE, 0, 0, 0
|
||||
>=
|
||||
|
||||
% TOKEN_LEG, 0, 0, 0
|
||||
<=>
|
||||
|
||||
% TOKEN_DBL_AND, 0, 0, 0
|
||||
&&
|
||||
|
||||
% TOKEN_DBL_OR, 0, 0, 0
|
||||
||
|
||||
|
||||
% TOKEN_DBL_XOR, 0, 0, 0
|
||||
^^
|
||||
|
@ -129,9 +129,10 @@ close(RD);
|
||||
#
|
||||
open(TD, '<', $tokens_dat) or die "$0: cannot open $tokens_dat: $!\n";
|
||||
while (defined($line = <TD>)) {
|
||||
$line =~ s/\s*(|\#.*)$//;
|
||||
if ($line =~ /^\%\s+(.*)$/) {
|
||||
$pattern = $1;
|
||||
} elsif ($line =~ /^([\?\@\.a-z0-9_-]+)/) {
|
||||
} elsif ($line =~ /^(\S+)/) {
|
||||
$token = $1;
|
||||
|
||||
if (defined($tokens{$token})) {
|
||||
@ -257,19 +258,23 @@ if ($output eq 'h') {
|
||||
print " };\n";
|
||||
|
||||
print " uint32_t k1, k2;\n";
|
||||
print " uint64_t crc;\n";
|
||||
print " size_t len;\n";
|
||||
# For correct overflow behavior, "ix" should be unsigned of the same
|
||||
# width as the hash arrays.
|
||||
print " uint16_t ix;\n";
|
||||
print " const struct tokendata *data;\n";
|
||||
printf " char lcbuf[%d];\n", $max_len+1;
|
||||
print " const char *p = token;\n";
|
||||
print " char c, *q = lcbuf;\n";
|
||||
print " size_t len = 0;\n";
|
||||
printf " uint64_t crc = UINT64_C(0x%08x%08x);\n", $$sv[0], $$sv[1];
|
||||
print "\n";
|
||||
print " len = strlen(token);\n";
|
||||
print " if (unlikely(len > $max_len))\n";
|
||||
print " while ((c = *p++)) {\n";
|
||||
printf " if (++len > %d)\n", $max_len;
|
||||
print " goto notfound;\n";
|
||||
print " *q++ = c = nasm_tolower(c);\n";
|
||||
print " crc = crc64_byte(crc, c);\n";
|
||||
print " };\n";
|
||||
print "\n";
|
||||
printf " crc = crc64b(UINT64_C(0x%08x%08x), token, len);\n",
|
||||
$$sv[0], $$sv[1];
|
||||
print " k1 = (uint32_t)crc;\n";
|
||||
print " k2 = (uint32_t)(crc >> 32);\n";
|
||||
print "\n";
|
||||
@ -278,7 +283,9 @@ if ($output eq 'h') {
|
||||
print " goto notfound;\n";
|
||||
print "\n";
|
||||
print " data = &tokendata[ix];\n";
|
||||
print " if (data->len != len || memcmp(data->string, token, len))\n";
|
||||
print " if (data->len != len)\n";
|
||||
print " goto notfound;\n";
|
||||
print " if (memcmp(data->string, lcbuf, len))\n";
|
||||
print " goto notfound;\n";
|
||||
print "\n";
|
||||
print " tv->t_integer = data->num;\n";
|
||||
|
@ -73,6 +73,12 @@ uint64_t crc64b(uint64_t crc, const void *data, size_t len);
|
||||
uint64_t crc64ib(uint64_t crc, const void *data, size_t len);
|
||||
#define CRC64_INIT UINT64_C(0xffffffffffffffff)
|
||||
|
||||
static inline uint64_t crc64_byte(uint64_t crc, uint8_t v)
|
||||
{
|
||||
extern const uint64_t crc64_tab[256];
|
||||
return crc64_tab[(uint8_t)(v ^ crc)] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
void **hash_find(struct hash_table *head, const char *string,
|
||||
struct hash_insert *insert);
|
||||
void **hash_findb(struct hash_table *head, const void *key, size_t keylen,
|
||||
|
@ -35,7 +35,7 @@
|
||||
#include "nctype.h"
|
||||
#include "hashtbl.h"
|
||||
|
||||
static const uint64_t crc64_tab[256] = {
|
||||
const uint64_t crc64_tab[256] = {
|
||||
UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
|
||||
UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
|
||||
UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
|
||||
@ -170,9 +170,8 @@ uint64_t crc64(uint64_t crc, const char *str)
|
||||
{
|
||||
uint8_t c;
|
||||
|
||||
while ((c = *str++) != 0) {
|
||||
crc = crc64_tab[(uint8_t)crc ^ c] ^ (crc >> 8);
|
||||
}
|
||||
while ((c = *str++) != 0)
|
||||
crc = crc64_byte(crc, c);
|
||||
|
||||
return crc;
|
||||
}
|
||||
@ -181,9 +180,8 @@ uint64_t crc64i(uint64_t crc, const char *str)
|
||||
{
|
||||
uint8_t c;
|
||||
|
||||
while ((c = *str++) != 0) {
|
||||
crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(c)] ^ (crc >> 8);
|
||||
}
|
||||
while ((c = *str++) != 0)
|
||||
crc = crc64_byte(crc, nasm_tolower(c));
|
||||
|
||||
return crc;
|
||||
}
|
||||
@ -192,9 +190,8 @@ uint64_t crc64b(uint64_t crc, const void *data, size_t len)
|
||||
{
|
||||
const uint8_t *str = data;
|
||||
|
||||
while (len--) {
|
||||
crc = crc64_tab[(uint8_t)crc ^ *str++] ^ (crc >> 8);
|
||||
}
|
||||
while (len--)
|
||||
crc = crc64_byte(crc, *str++);
|
||||
|
||||
return crc;
|
||||
}
|
||||
@ -203,9 +200,8 @@ uint64_t crc64ib(uint64_t crc, const void *data, size_t len)
|
||||
{
|
||||
const uint8_t *str = data;
|
||||
|
||||
while (len--) {
|
||||
crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(*str++)] ^ (crc >> 8);
|
||||
}
|
||||
while (len--)
|
||||
crc = crc64_byte(crc, nasm_tolower(*str++));
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user