mirror of
https://git.openldap.org/openldap/openldap.git
synced 2025-01-30 13:30:57 +08:00
433 lines
11 KiB
C
433 lines
11 KiB
C
/*++
|
|
/* NAME
|
|
/* tok_class 3
|
|
/* SUMMARY
|
|
/* token classification
|
|
/* PACKAGE
|
|
/* unproto
|
|
/* SYNOPSIS
|
|
/* #include "token.h"
|
|
/*
|
|
/* void tok_unget(t)
|
|
/* struct token *t;
|
|
/*
|
|
/* struct token *tok_class()
|
|
/* DESCRIPTION
|
|
/* tok_class() collects single and composite tokens, and
|
|
/* recognizes keywords.
|
|
/* At present, the only composite tokens are ()-delimited,
|
|
/* comma-separated lists, and non-whitespace tokens with attached
|
|
/* whitespace or comment tokens.
|
|
/*
|
|
/* Source transformations are: __DATE__ and __TIME__ are rewritten
|
|
/* to string constants with the current date and time, respectively.
|
|
/* Multiple string constants are concatenated. Optionally, "void *"
|
|
/* is mapped to "char *", and plain "void" to "int".
|
|
/*
|
|
/* tok_unget() implements an arbitrary amount of token pushback.
|
|
/* Only tokens obtained through tok_class() should be given to
|
|
/* tok_unget(). This function accepts a list of tokens in
|
|
/* last-read-first order.
|
|
/* DIAGNOSTICS
|
|
/* The code complains if input terminates in the middle of a list.
|
|
/* BUGS
|
|
/* Does not preserve white space at the beginning of a list element
|
|
/* or after the end of a list.
|
|
/* AUTHOR(S)
|
|
/* Wietse Venema
|
|
/* Eindhoven University of Technology
|
|
/* Department of Mathematics and Computer Science
|
|
/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
|
|
/* LAST MODIFICATION
|
|
/* 92/01/15 21:53:02
|
|
/* VERSION/RELEASE
|
|
/* 1.4
|
|
/*--*/
|
|
|
|
static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";
|
|
|
|
/* C library */
|
|
|
|
#include <stdio.h>
|
|
|
|
extern char *strcpy();
|
|
extern long time();
|
|
extern char *ctime();
|
|
|
|
/* Application-specific stuff */
|
|
|
|
#include "error.h"
|
|
#include "vstring.h"
|
|
#include "token.h"
|
|
#include "symbol.h"
|
|
|
|
static struct token *tok_list();
|
|
static void tok_list_struct();
|
|
static void tok_list_append();
|
|
static void tok_strcat();
|
|
static void tok_time();
|
|
static void tok_date();
|
|
static void tok_space_append();
|
|
|
|
#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
|
|
static void tok_void(); /* rewrite void keyword */
|
|
#endif
|
|
|
|
static struct token *tok_buf = 0; /* token push-back storage */
|
|
|
|
/* TOK_PREPEND - add token to LIFO queue, return head */
|
|
|
|
#define TOK_PREPEND(list,t) (t->next = list, list = t)
|
|
|
|
/* tok_space_append - append trailing space except at start of or after list */
|
|
|
|
static void tok_space_append(list, t)
|
|
register struct token *list;
|
|
register struct token *t;
|
|
{
|
|
|
|
/*
|
|
* The head/tail fields of a token do triple duty. They are used to keep
|
|
* track of the members that make up a (list); to keep track of the
|
|
* non-blank tokens that make up one list member; and, finally, to tack
|
|
* whitespace and comment tokens onto the non-blank tokens that make up
|
|
* one list member.
|
|
*
|
|
* Within a (list), white space and comment tokens are always tacked onto
|
|
* the non-blank tokens to avoid parsing complications later on. For this
|
|
* reason, blanks and comments at the beginning of a list member are
|
|
* discarded because there is no token to tack them onto. (Well, we could
|
|
* start each list member with a dummy token, but that would mess up the
|
|
* whole unprototyper).
|
|
*
|
|
* Blanks or comments that follow a (list) are discarded, because the
|
|
* head/tail fields of a (list) are already being used for other
|
|
* purposes.
|
|
*
|
|
* Newlines within a (list) are discarded because they can mess up the
|
|
* output when we rewrite function headers. The output routines will
|
|
* regenerate discarded newlines, anyway.
|
|
*/
|
|
|
|
if (list == 0 || list->tokno == TOK_LIST) {
|
|
tok_free(t);
|
|
} else {
|
|
tok_list_append(list, t);
|
|
}
|
|
}
|
|
|
|
/* tok_class - discriminate single tokens, keywords, and composite tokens */
|
|
|
|
struct token *tok_class()
|
|
{
|
|
register struct token *t;
|
|
register struct symbol *s;
|
|
|
|
/*
|
|
* Use push-back token, if available. Push-back tokens are already
|
|
* canonical and can be passed on to the caller without further
|
|
* inspection.
|
|
*/
|
|
|
|
if (t = tok_buf) {
|
|
tok_buf = t->next;
|
|
t->next = 0;
|
|
return (t);
|
|
}
|
|
/* Read a new token and canonicalize it. */
|
|
|
|
if (t = tok_get()) {
|
|
switch (t->tokno) {
|
|
case '(': /* beginning of list */
|
|
t = tok_list(t);
|
|
break;
|
|
case TOK_WORD: /* look up keyword */
|
|
if ((s = sym_find(t->vstr->str))) {
|
|
switch (s->type) {
|
|
case TOK_TIME: /* map __TIME__ to string */
|
|
tok_time(t);
|
|
tok_strcat(t); /* look for more strings */
|
|
break;
|
|
case TOK_DATE: /* map __DATE__ to string */
|
|
tok_date(t);
|
|
tok_strcat(t); /* look for more strings */
|
|
break;
|
|
#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
|
|
case TOK_VOID: /* optionally map void types */
|
|
tok_void(t);
|
|
break;
|
|
#endif
|
|
default: /* other keyword */
|
|
t->tokno = s->type;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case '"': /* string, look for more */
|
|
tok_strcat(t);
|
|
break;
|
|
}
|
|
}
|
|
return (t);
|
|
}
|
|
|
|
/* tok_list - collect ()-delimited, comma-separated list of tokens */
|
|
|
|
static struct token *tok_list(t)
|
|
struct token *t;
|
|
{
|
|
register struct token *list = tok_alloc();
|
|
char *filename;
|
|
int lineno;
|
|
|
|
/* Save context of '(' for diagnostics. */
|
|
|
|
filename = t->path;
|
|
lineno = t->line;
|
|
|
|
list->tokno = TOK_LIST;
|
|
list->head = list->tail = t;
|
|
list->path = t->path;
|
|
list->line = t->line;
|
|
#ifdef DEBUG
|
|
strcpy(list->vstr->str, "LIST");
|
|
#endif
|
|
|
|
/*
|
|
* Read until the matching ')' is found, accounting for structured stuff
|
|
* (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
|
|
* and try to preserve as much whitespace as possible. Newlines are
|
|
* discarded so that they will not mess up the layout when we rewrite
|
|
* argument lists. The output routines will regenerate discarded
|
|
* newlines.
|
|
*/
|
|
|
|
while (t = tok_class()) { /* skip blanks */
|
|
switch (t->tokno) {
|
|
case ')': /* end of list */
|
|
tok_list_append(list, t);
|
|
return (list);
|
|
case '{': /* struct/union type */
|
|
tok_list_struct(list->tail, t);
|
|
break;
|
|
case TOK_WSPACE: /* preserve trailing blanks */
|
|
tok_space_append(list->tail->tail, t); /* except after list */
|
|
break;
|
|
case '\n': /* fix newlines later */
|
|
tok_free(t);
|
|
break;
|
|
case ',': /* list separator */
|
|
tok_list_append(list, t);
|
|
break;
|
|
default: /* other */
|
|
tok_list_append(list->tail, t);
|
|
break;
|
|
}
|
|
}
|
|
error_where(filename, lineno, "unmatched '('");
|
|
return (list); /* do not waste any data */
|
|
}
|
|
|
|
/* tok_list_struct - collect structured type info within list */
|
|
|
|
static void tok_list_struct(list, t)
|
|
register struct token *list;
|
|
register struct token *t;
|
|
{
|
|
char *filename;
|
|
int lineno;
|
|
|
|
/*
|
|
* Save context of '{' for diagnostics. This routine is called by the one
|
|
* that collects list members. If the '}' is not found, the list
|
|
* collector will not see the closing ')' either.
|
|
*/
|
|
|
|
filename = t->path;
|
|
lineno = t->line;
|
|
|
|
tok_list_append(list, t);
|
|
|
|
/*
|
|
* Collect tokens until the matching '}' is found. Try to preserve as
|
|
* much whitespace as possible. Newlines are discarded so that they do
|
|
* not interfere when rewriting argument lists. The output routines will
|
|
* regenerate discarded newlines.
|
|
*/
|
|
|
|
while (t = tok_class()) {
|
|
switch (t->tokno) {
|
|
case TOK_WSPACE: /* preserve trailing blanks */
|
|
tok_space_append(list->tail, t); /* except after list */
|
|
break;
|
|
case '\n': /* fix newlines later */
|
|
tok_free(t);
|
|
break;
|
|
case '{': /* recurse */
|
|
tok_list_struct(list, t);
|
|
break;
|
|
case '}': /* done */
|
|
tok_list_append(list, t);
|
|
return;
|
|
default: /* other */
|
|
tok_list_append(list, t);
|
|
break;
|
|
}
|
|
}
|
|
error_where(filename, lineno, "unmatched '{'");
|
|
}
|
|
|
|
/* tok_strcat - concatenate multiple string constants */
|
|
|
|
static void tok_strcat(t1)
|
|
register struct token *t1;
|
|
{
|
|
register struct token *t2;
|
|
register struct token *lookahead = 0;
|
|
|
|
/*
|
|
* Read ahead past whitespace, comments and newlines. If we find a string
|
|
* token, concatenate it with the previous one and push back the
|
|
* intervening tokens (thus preserving as much information as possible).
|
|
* If we find something else, push back all lookahead tokens.
|
|
*/
|
|
|
|
#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
|
|
|
|
while (t2 = tok_class()) {
|
|
switch (t2->tokno) {
|
|
case TOK_WSPACE: /* read past comments/blanks */
|
|
case '\n': /* read past newlines */
|
|
TOK_PREPEND(lookahead, t2);
|
|
break;
|
|
case '"': /* concatenate string tokens */
|
|
if (vs_strcpy(t1->vstr,
|
|
t1->vstr->str + strlen(t1->vstr->str) - 1,
|
|
t2->vstr->str + 1) == 0)
|
|
fatal("out of memory");
|
|
tok_free(t2);
|
|
PUSHBACK_AND_RETURN;
|
|
default: /* something else, push back */
|
|
tok_unget(t2);
|
|
PUSHBACK_AND_RETURN;
|
|
}
|
|
}
|
|
PUSHBACK_AND_RETURN; /* hit EOF */
|
|
}
|
|
|
|
#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
|
|
|
|
/* tok_void - support for compilers that have problems with "void" */
|
|
|
|
static void tok_void(t)
|
|
register struct token *t;
|
|
{
|
|
register struct token *t2;
|
|
register struct token *lookahead = 0;
|
|
|
|
/*
|
|
* Look ahead beyond whitespace, comments and newlines until we see a '*'
|
|
* token. If one is found, replace "void" by "char". If we find something
|
|
* else, and if "void" should always be mapped, replace "void" by "int".
|
|
* Always push back the lookahead tokens.
|
|
*
|
|
* XXX The code also replaces the (void) argument list; this must be
|
|
* accounted for later on. The alternative would be to add (in unproto.c)
|
|
* TOK_VOID cases all over the place and that would be too error-prone.
|
|
*/
|
|
|
|
#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
|
|
|
|
while (t2 = tok_class()) {
|
|
switch (TOK_PREPEND(lookahead, t2)->tokno) {
|
|
case TOK_WSPACE: /* read past comments/blanks */
|
|
case '\n': /* read past newline */
|
|
break;
|
|
case '*': /* "void *" -> "char *" */
|
|
if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
|
|
fatal("out of memory");
|
|
PUSHBACK_AND_RETURN;
|
|
default:
|
|
#ifdef MAP_VOID /* plain "void" -> "int" */
|
|
if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
|
|
fatal("out of memory");
|
|
#endif
|
|
PUSHBACK_AND_RETURN;
|
|
}
|
|
}
|
|
PUSHBACK_AND_RETURN; /* hit EOF */
|
|
}
|
|
|
|
#endif
|
|
|
|
/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */
|
|
|
|
static void tok_time(t)
|
|
struct token *t;
|
|
{
|
|
long now;
|
|
char *cp;
|
|
char buf[BUFSIZ];
|
|
|
|
/*
|
|
* Using sprintf() to select parts of a string is gross, but this should
|
|
* be fast enough.
|
|
*/
|
|
|
|
(void) time(&now);
|
|
cp = ctime(&now);
|
|
sprintf(buf, "\"%.8s\"", cp + 11);
|
|
if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
|
|
fatal("out of memory");
|
|
t->tokno = buf[0];
|
|
}
|
|
|
|
/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */
|
|
|
|
static void tok_date(t)
|
|
struct token *t;
|
|
{
|
|
long now;
|
|
char *cp;
|
|
char buf[BUFSIZ];
|
|
|
|
/*
|
|
* Using sprintf() to select parts of a string is gross, but this should
|
|
* be fast enough.
|
|
*/
|
|
|
|
(void) time(&now);
|
|
cp = ctime(&now);
|
|
sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
|
|
if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
|
|
fatal("out of memory");
|
|
t->tokno = buf[0];
|
|
}
|
|
|
|
/* tok_unget - push back one or more possibly composite tokens */
|
|
|
|
void tok_unget(t)
|
|
register struct token *t;
|
|
{
|
|
register struct token *next;
|
|
|
|
do {
|
|
next = t->next;
|
|
TOK_PREPEND(tok_buf, t);
|
|
} while (t = next);
|
|
}
|
|
|
|
/* tok_list_append - append data to list */
|
|
|
|
static void tok_list_append(h, t)
|
|
struct token *h;
|
|
struct token *t;
|
|
{
|
|
if (h->head == 0) {
|
|
h->head = h->tail = t;
|
|
} else {
|
|
h->tail->next = t;
|
|
h->tail = t;
|
|
}
|
|
}
|