/* Copyright (C) 1998, 1999 Cygnus Solutions This file is part of libgcj. This software is copyrighted work licensed under the terms of the Libgcj License. Please consult the file "LIBGCJ_LICENSE" for details. */ package java.io; /** * @author Warren Levy * @date October 25, 1998. */ /* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3 * "The Java Language Specification", ISBN 0-201-63451-1 * plus online API docs for JDK 1.2 beta from http://www.javasoft.com. * Status: Believed complete and correct. */ public class StreamTokenizer { /* A constant indicating that the end of the stream has been read. */ public static final int TT_EOF = -1; /* A constant indicating that the end of the line has been read. */ public static final int TT_EOL = '\n'; /* A constant indicating that a number token has been read. */ public static final int TT_NUMBER = -2; /* A constant indicating that a word token has been read. */ public static final int TT_WORD = -3; /* Contains the type of the token read resulting from a call to nextToken. */ public int ttype; /* The String associated with word and string tokens. */ public String sval; /* The numeric value associated with number tokens. */ public double nval; /* Indicates whether end-of-line is recognized as a token. */ private boolean eolSignificant = false; /* Indicates whether word tokens are automatically made lower case. */ private boolean lowerCase = false; /* Indicates whether C++ style comments are recognized and skipped. */ private boolean slashSlash = false; /* Indicates whether C style comments are recognized and skipped. */ private boolean slashStar = false; /* Attribute tables of each byte from 0x00 to 0xFF. */ private boolean[] whitespace; private boolean[] alphabetic; private boolean[] numeric; private boolean[] quote; private boolean[] comment; /* The Reader associated with this class. */ private PushbackReader in; /* Indicates if a token has been pushed back. */ private boolean pushedBack = false; /* Contains the current line number of the reader. */ private int lineNumber = 1; // Deprecated in JDK 1.1. public StreamTokenizer(InputStream is) { this(new InputStreamReader(is)); } public StreamTokenizer(Reader r) { in = new PushbackReader(r); whitespace = new boolean[256]; alphabetic = new boolean[256]; numeric = new boolean[256]; quote = new boolean[256]; comment = new boolean[256]; for (int i = 0; i < 256; i++) resetChar(i); whitespaceChars(0x00, 0x20); wordChars('A', 'Z'); wordChars('a', 'z'); wordChars(0xA0, 0xFF); commentChar('/'); quoteChar('\''); quoteChar('"'); parseNumbers(); } public void commentChar(int ch) { if (ch >= 0 && ch <= 255) comment[ch] = true; } public void eolIsSignificant(boolean flag) { eolSignificant = flag; } public int lineno() { return lineNumber; } public void lowerCaseMode(boolean flag) { lowerCase = flag; } private boolean isWhitespace(int ch) { if (ch >= 0 && ch <= 255) return whitespace[ch]; return false; } private boolean isAlphabetic(int ch) { if (ch >= 0 && ch <= 255) return alphabetic[ch]; else if (ch > 255) return true; return false; } private boolean isNumeric(int ch) { if (ch >= 0 && ch <= 255) return numeric[ch]; return false; } private boolean isQuote(int ch) { if (ch >= 0 && ch <= 255) return quote[ch]; return false; } private boolean isComment(int ch) { if (ch >= 0 && ch <= 255) return comment[ch]; return false; } public int nextToken() throws IOException { if (pushedBack) { pushedBack = false; return ttype; } sval = null; int ch; // Skip whitespace. Deal with EOL along the way. while (isWhitespace(ch = in.read())) if (ch == '\n' || ch == '\r') { lineNumber++; // Throw away \n if in combination with \r. if (ch == '\r' && (ch = in.read()) != '\n') in.unread(ch); if (eolSignificant) return (ttype = TT_EOL); } if (ch == TT_EOF) ttype = TT_EOF; else if (isNumeric(ch)) { if (ch == '-') { // Read ahead to see if this is an ordinary '-' rather than numeric. ch = in.read(); in.unread(ch); if (isNumeric(ch) && ch != '-') ch = '-'; else return (ttype = '-'); } StringBuffer tokbuf = new StringBuffer(); tokbuf.append((char) ch); int decCount = 0; while (isNumeric(ch = in.read()) && ch != '-') if (ch == '.' && decCount++ > 0) break; else tokbuf.append((char) ch); in.unread(ch); ttype = TT_NUMBER; nval = Double.valueOf(tokbuf.toString()).doubleValue(); } else if (isAlphabetic(ch)) { StringBuffer tokbuf = new StringBuffer(); tokbuf.append((char) ch); while (isAlphabetic(ch = in.read()) || isNumeric(ch)) tokbuf.append((char) ch); in.unread(ch); ttype = TT_WORD; sval = tokbuf.toString(); if (lowerCase) sval.toLowerCase(); } else if (isComment(ch)) { while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF) ; in.unread(ch); return nextToken(); // Recursive, but not too deep in normal cases. } else if (isQuote(ch)) { ttype = ch; StringBuffer tokbuf = new StringBuffer(); while ((ch = in.read()) != ttype && ch != '\n' && ch != '\r' && ch != TT_EOF) { if (ch == '\\') switch (ch = in.read()) { case 'a': ch = 0x7; break; case 'b': ch = '\b'; break; case 'f': ch = 0xC; break; case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'v': ch = 0xB; break; case '\"': case '\'': case '\\': break; default: int ch1, nextch; if ((nextch = ch1 = ch) >= '0' && ch <= '7') { ch -= '0'; if ((nextch = in.read()) >= '0' && nextch <= '7') { ch = ch * 8 + nextch - '0'; if ((nextch = in.read()) >= '0' && nextch <= '7' && ch1 >= '0' && ch1 <= '3') { ch = ch * 8 + nextch - '0'; nextch = in.read(); } } } in.unread(nextch); } tokbuf.append((char) ch); } // Throw away matching quote char. if (ch != ttype) in.unread(ch); sval = tokbuf.toString(); } else { if (ch == '/') if ((ch = in.read()) == '/' && slashSlash) { while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF) ; in.unread(ch); return nextToken(); // Recursive, but not too deep in normal cases } else if (ch == '*' && slashStar) { while (true) { ch = in.read(); if (ch == '*') if ((ch = in.read()) == '/') break; else in.unread(ch); else if (ch == '\n' || ch == '\r') { lineNumber++; if (ch == '\r' && (ch = in.read()) != '\n') in.unread(ch); } else if (ch == TT_EOF) { in.unread(ch); break; } } return nextToken(); // Recursive, but not too deep in normal cases } else { in.unread(ch); ch = '/'; } ttype = ch; } return ttype; } private void resetChar(int ch) { whitespace[ch] = alphabetic[ch] = numeric[ch] = quote[ch] = comment[ch] = false; } public void ordinaryChar(int ch) { if (ch >= 0 && ch <= 255) resetChar(ch); } public void ordinaryChars(int low, int hi) { if (low < 0) low = 0; if (hi > 255) hi = 255; for (int i = low; i <= hi; i++) resetChar(i); } public void parseNumbers() { for (int i = 0; i <= 9; i++) numeric['0' + i] = true; numeric['.'] = true; numeric['-'] = true; } public void pushBack() { // pushBack may cause the lineno method to return an incorrect value // if lineno is called before the next call to nextToken. pushedBack = true; } public void quoteChar(int ch) { if (ch >= 0 && ch <= 255) quote[ch] = true; } public void resetSyntax() { ordinaryChars(0x00, 0xFF); } public void slashSlashComments(boolean flag) { slashSlash = flag; } public void slashStarComments(boolean flag) { slashStar = flag; } public String toString() { String tempstr; if (ttype == TT_EOF) tempstr = "EOF"; else if (ttype == TT_EOL) tempstr = "EOL"; else if (ttype == TT_WORD) tempstr = sval; else if (ttype == TT_NUMBER) tempstr = "n=" + Double.toString(nval); else // must be an ordinary char. tempstr = "\'" + (new Character((char) ttype)).toString() + "\'"; return "Token[" + tempstr + "], line " + Integer.toString(lineno()); } public void whitespaceChars(int low, int hi) { if (low < 0) low = 0; if (hi > 255) hi = 255; for (int i = low; i <= hi; i++) whitespace[i] = true; } public void wordChars(int low, int hi) { if (low < 0) low = 0; if (hi > 255) hi = 255; for (int i = low; i <= hi; i++) alphabetic[i] = true; } }