gas: consolidate whitespace recognition

Let's extend lex_type[] to also cover whitespace, then having a simple
macro to uniformly recognize both blanks and tabs (and \r when it's not
EOL) as such.

In macro.c use sb_skip_white() as appropriate, instead of open-coding
it.
This commit is contained in:
Jan Beulich 2025-02-03 11:48:55 +01:00
parent a450dd002f
commit e8efdd32b5
7 changed files with 43 additions and 51 deletions

View File

@ -250,7 +250,7 @@ get_mri_string (int terminator, int *len)
&& ! is_end_of_line[(unsigned char) *input_line_pointer])
++input_line_pointer;
s = input_line_pointer;
while (s > ret && (s[-1] == ' ' || s[-1] == '\t'))
while (s > ret && is_whitespace (s[-1]))
--s;
}

View File

@ -1432,7 +1432,7 @@ operand (expressionS *expressionP, enum expr_mode mode)
created. Doing it here saves lines of code. */
clean_up_expression (expressionP);
SKIP_ALL_WHITESPACE (); /* -> 1st char after operand. */
know (*input_line_pointer != ' ');
know (!is_whitespace (*input_line_pointer));
/* The PA port needs this information. */
if (expressionP->X_add_symbol)
@ -1854,7 +1854,7 @@ expr (int rankarg, /* Larger # is higher rank. */
retval = operand (resultP, mode);
/* operand () gobbles spaces. */
know (*input_line_pointer != ' ');
know (!is_whitespace (*input_line_pointer));
op_left = operatorf (&op_chars);
while (op_left != O_illegal && op_rank[(int) op_left] > rank)
@ -1876,7 +1876,7 @@ expr (int rankarg, /* Larger # is higher rank. */
right.X_op_symbol = NULL;
}
know (*input_line_pointer != ' ');
know (!is_whitespace (*input_line_pointer));
if (op_left == O_index)
{

View File

@ -1152,7 +1152,7 @@ debugging_pseudo (list_info_type *list ATTRIBUTE_UNUSED, const char *line)
in_debug = false;
#endif
while (ISSPACE (*line))
while (is_whitespace (*line))
line++;
if (*line != '.')

View File

@ -29,10 +29,8 @@
/* The routines in this file handle macro definition and expansion.
They are called by gas. */
#define ISWHITE(x) ((x) == ' ' || (x) == '\t')
#define ISSEP(x) \
((x) == ' ' || (x) == '\t' || (x) == ',' || (x) == '"' || (x) == ';' \
(is_whitespace (x) || (x) == ',' || (x) == '"' || (x) == ';' \
|| (x) == ')' || (x) == '(' \
|| ((flag_macro_alternate || flag_mri) && ((x) == '<' || (x) == '>')))
@ -139,8 +137,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
if (! LABELS_WITHOUT_COLONS)
{
/* Skip leading whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i]))
i++;
i = sb_skip_white (i, ptr);
}
for (;;)
@ -154,8 +151,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
if (i < ptr->len && is_name_ender (ptr->ptr[i]))
i++;
/* Skip whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i]))
i++;
i = sb_skip_white (i, ptr);
/* Check for the colon. */
if (i >= ptr->len || ptr->ptr[i] != ':')
{
@ -174,8 +170,7 @@ buffer_and_nest (const char *from, const char *to, sb *ptr,
}
/* Skip trailing whitespace. */
while (i < ptr->len && ISWHITE (ptr->ptr[i]))
i++;
i = sb_skip_white (i, ptr);
if (i < ptr->len && (ptr->ptr[i] == '.'
|| NO_PSEUDO_DOT
@ -424,9 +419,7 @@ get_any_string (size_t idx, sb *in, sb *out)
*in_br = '\0';
while (idx < in->len
&& (*in_br
|| (in->ptr[idx] != ' '
&& in->ptr[idx] != '\t'))
&& (*in_br || !is_whitespace (in->ptr[idx]))
&& in->ptr[idx] != ','
&& (in->ptr[idx] != '<'
|| (! flag_macro_alternate && ! flag_mri)))
@ -916,7 +909,7 @@ macro_expand_body (sb *in, sb *out, formal_entry *formals,
if (! macro
|| src + 5 >= in->len
|| strncasecmp (in->ptr + src, "LOCAL", 5) != 0
|| ! ISWHITE (in->ptr[src + 5])
|| ! is_whitespace (in->ptr[src + 5])
/* PR 11507: Skip keyword LOCAL if it is found inside a quoted string. */
|| inquote)
{
@ -1069,9 +1062,7 @@ macro_expand (size_t idx, sb *in, macro_entry *m, sb *out)
/* The Microtec assembler ignores this if followed by a white space.
(Macro invocation with empty extension) */
idx++;
if ( idx < in->len
&& in->ptr[idx] != ' '
&& in->ptr[idx] != '\t')
if (idx < in->len && !is_whitespace (in->ptr[idx]))
{
formal_entry *n = new_formal ();
@ -1192,7 +1183,7 @@ macro_expand (size_t idx, sb *in, macro_entry *m, sb *out)
{
if (idx < in->len && in->ptr[idx] == ',')
++idx;
if (idx < in->len && ISWHITE (in->ptr[idx]))
if (idx < in->len && is_whitespace (in->ptr[idx]))
break;
}
}

View File

@ -76,6 +76,12 @@ bool input_from_string = false;
die horribly;
#endif
#ifndef CR_EOL
#define LEX_CR LEX_WHITE
#else
#define LEX_CR 0
#endif
#ifndef LEX_AT
#define LEX_AT 0
#endif
@ -112,9 +118,9 @@ die horribly;
/* Used by is_... macros. our ctype[]. */
char lex_type[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ABCDEFGHIJKLMNO */
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, LEX_CR, 0, 0, /* @ABCDEFGHIJKLMNO */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* PQRSTUVWXYZ[\]^_ */
0, 0, 0, LEX_HASH, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
8, 0, 0, LEX_HASH, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, LEX_QM, /* 0123456789:;<=>? */
LEX_AT, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, LEX_BR, 0, LEX_BR, 0, 3, /* PQRSTUVWXYZ[\]^_ */
@ -1068,11 +1074,11 @@ read_a_source_file (const char *name)
if (*rest == ':')
++rest;
if (*rest == ' ' || *rest == '\t')
if (is_whitespace (*rest))
++rest;
if ((strncasecmp (rest, "EQU", 3) == 0
|| strncasecmp (rest, "SET", 3) == 0)
&& (rest[3] == ' ' || rest[3] == '\t'))
&& is_whitespace (rest[3]))
{
input_line_pointer = rest + 3;
equals (line_start,
@ -1080,8 +1086,7 @@ read_a_source_file (const char *name)
continue;
}
if (strncasecmp (rest, "MACRO", 5) == 0
&& (rest[5] == ' '
|| rest[5] == '\t'
&& (is_whitespace (rest[5])
|| is_end_of_line[(unsigned char) rest[5]]))
mri_line_macro = 1;
}
@ -1117,7 +1122,7 @@ read_a_source_file (const char *name)
level. */
do
nul_char = next_char = *input_line_pointer++;
while (next_char == '\t' || next_char == ' ' || next_char == '\f');
while (is_whitespace (next_char) || next_char == '\f');
/* C is the 1st significant character.
Input_line_pointer points after that character. */
@ -1146,12 +1151,12 @@ read_a_source_file (const char *name)
if (*rest == ':')
++rest;
if (*rest == ' ' || *rest == '\t')
if (is_whitespace (*rest))
++rest;
if ((strncasecmp (rest, "EQU", 3) == 0
|| strncasecmp (rest, "SET", 3) == 0)
&& (rest[3] == ' ' || rest[3] == '\t'))
&& is_whitespace (rest[3]))
{
input_line_pointer = rest + 3;
equals (s, 1);
@ -1169,7 +1174,7 @@ read_a_source_file (const char *name)
SKIP_WHITESPACE ();
}
else if ((next_char == '=' && *rest == '=')
|| ((next_char == ' ' || next_char == '\t')
|| (is_whitespace (next_char)
&& rest[0] == '='
&& rest[1] == '='))
{
@ -1177,7 +1182,7 @@ read_a_source_file (const char *name)
demand_empty_rest_of_line ();
}
else if ((next_char == '='
|| ((next_char == ' ' || next_char == '\t')
|| (is_whitespace (next_char)
&& *rest == '='))
#ifdef TC_EQUAL_IN_INSN
&& !TC_EQUAL_IN_INSN (next_char, s)
@ -1284,7 +1289,7 @@ read_a_source_file (const char *name)
/* The following skip of whitespace is compulsory.
A well shaped space is sometimes all that separates
keyword from operands. */
if (next_char == ' ' || next_char == '\t')
if (is_whitespace (next_char))
input_line_pointer++;
/* Input_line is restored.
@ -1497,7 +1502,7 @@ mri_comment_field (char *stopcp)
know (flag_m68k_mri);
for (s = input_line_pointer;
((!is_end_of_line[(unsigned char) *s] && *s != ' ' && *s != '\t')
((!is_end_of_line[(unsigned char) *s] && !is_whitespace (*s))
|| inquote);
s++)
{
@ -6321,7 +6326,7 @@ equals (char *sym_name, int reassign)
if (reassign < 0 && *input_line_pointer == '=')
input_line_pointer++;
while (*input_line_pointer == ' ' || *input_line_pointer == '\t')
while (is_whitespace (*input_line_pointer))
input_line_pointer++;
if (flag_mri)
@ -6495,8 +6500,7 @@ s_include (int arg ATTRIBUTE_UNUSED)
SKIP_WHITESPACE ();
i = 0;
while (!is_end_of_line[(unsigned char) *input_line_pointer]
&& *input_line_pointer != ' '
&& *input_line_pointer != '\t')
&& !is_whitespace (*input_line_pointer))
{
obstack_1grow (&notes, *input_line_pointer);
++input_line_pointer;

View File

@ -29,17 +29,18 @@ extern bool input_from_string;
#ifdef PERMIT_WHITESPACE
#define SKIP_WHITESPACE() \
((*input_line_pointer == ' ') ? ++input_line_pointer : 0)
(is_whitespace (*input_line_pointer) ? ++input_line_pointer : 0)
#define SKIP_ALL_WHITESPACE() \
while (*input_line_pointer == ' ') ++input_line_pointer
while (is_whitespace (*input_line_pointer)) ++input_line_pointer
#else
#define SKIP_WHITESPACE() know (*input_line_pointer != ' ' )
#define SKIP_WHITESPACE() know (!is_whitespace (*input_line_pointer))
#define SKIP_ALL_WHITESPACE() SKIP_WHITESPACE()
#endif
#define LEX_NAME (1) /* may continue a name */
#define LEX_NAME (1) /* may continue a name */
#define LEX_BEGIN_NAME (2) /* may begin a name */
#define LEX_END_NAME (4) /* ends a name */
#define LEX_WHITE (8) /* whitespace */
#define is_name_beginner(c) \
( lex_type[(unsigned char) (c)] & LEX_BEGIN_NAME )
@ -47,6 +48,8 @@ extern bool input_from_string;
( lex_type[(unsigned char) (c)] & LEX_NAME )
#define is_name_ender(c) \
( lex_type[(unsigned char) (c)] & LEX_END_NAME )
#define is_whitespace(c) \
( lex_type[(unsigned char) (c)] & LEX_WHITE )
/* The distinction of "line" and "statement" sadly is blurred by unhelpful
naming of e.g. the underlying array. Most users really mean "end of

View File

@ -215,9 +215,7 @@ sb_terminate (sb *in)
size_t
sb_skip_white (size_t idx, sb *ptr)
{
while (idx < ptr->len
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
idx++;
return idx;
}
@ -229,18 +227,14 @@ sb_skip_white (size_t idx, sb *ptr)
size_t
sb_skip_comma (size_t idx, sb *ptr)
{
while (idx < ptr->len
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
idx++;
if (idx < ptr->len
&& ptr->ptr[idx] == ',')
idx++;
while (idx < ptr->len
&& (ptr->ptr[idx] == ' '
|| ptr->ptr[idx] == '\t'))
while (idx < ptr->len && is_whitespace (ptr->ptr[idx]))
idx++;
return idx;