1
2
3
19
20 package org.xwt.js;
21 import java.io.*;
22
23
24 class Lexer implements Tokens {
25
26
27 public static void main(String[] s) throws Exception {
28 Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
29 int tok = 0;
30 while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
31 }
32
33
34 protected int op;
35
36
37 protected int mostRecentlyReadToken;
38
39
40 protected Number number = null;
41
42
43 protected String string = null;
44
45
46 private int line = 0;
47
48
49 protected int parserLine = 0;
50
51
52 protected int col = 0;
53
54
55 protected String sourceName;
56
57 private SmartReader in;
58 public Lexer(Reader r, String sourceName, int line) throws IOException {
59 this.sourceName = sourceName;
60 this.line = line;
61 this.parserLine = line;
62 in = new SmartReader(r);
63 }
64
65
66
67
68 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
69 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
70 private static int xDigitToInt(int c) {
71 if ('0' <= c && c <= '9') return c - '0';
72 else if ('a' <= c && c <= 'f') return c - ('a' - 10);
73 else if ('A' <= c && c <= 'F') return c - ('A' - 10);
74 else return -1;
75 }
76
77
78
79
80 private int getKeyword(String name) throws IOException {
81
82 case "if": return IF;
83 case "lt": return LT;
84 case "gt": return GT;
85 case "in": return IN;
86 case "do": return DO;
87 case "and": return AND;
88 case "or": return OR;
89 case "for": return FOR;
90 case "int": return RESERVED;
91 case "new": return RESERVED;
92 case "try": return TRY;
93 case "var": return VAR;
94 case "byte": return RESERVED;
95 case "case": return CASE;
96 case "char": return RESERVED;
97 case "else": return ELSE;
98 case "enum": return RESERVED;
99 case "goto": return RESERVED;
100 case "long": return RESERVED;
101 case "null": return NULL;
102 case "true": return TRUE;
103 case "with": return RESERVED;
104 case "void": return RESERVED;
105 case "class": return RESERVED;
106 case "break": return BREAK;
107 case "while": return WHILE;
108 case "false": return FALSE;
109 case "const": return RESERVED;
110 case "final": return RESERVED;
111 case "super": return RESERVED;
112 case "throw": return THROW;
113 case "catch": return CATCH;
114 case "class": return RESERVED;
115 case "delete": return RESERVED;
116 case "return": return RETURN;
117 case "throws": return RESERVED;
118 case "double": return RESERVED;
119 case "assert": return ASSERT;
120 case "public": return RESERVED;
121 case "switch": return SWITCH;
122 case "typeof": return TYPEOF;
123 case "package": return RESERVED;
124 case "default": return DEFAULT;
125 case "finally": return FINALLY;
126 case "boolean": return RESERVED;
127 case "private": return RESERVED;
128 case "extends": return RESERVED;
129 case "abstract": return RESERVED;
130 case "continue": return CONTINUE;
131 case "debugger": return RESERVED;
132 case "function": return FUNCTION;
133 case "volatile": return RESERVED;
134 case "interface": return RESERVED;
135 case "protected": return RESERVED;
136 case "transient": return RESERVED;
137 case "implements": return RESERVED;
138 case "instanceof": return RESERVED;
139 case "synchronized": return RESERVED;
140 //#end
141 return -1;
142 }
143
144 private int getIdentifier(int c) throws IOException {
145 in.startString();
146 while (Character.isJavaIdentifierPart((char)(c = in.read())));
147 in.unread();
148 String str = in.getString();
149 int result = getKeyword(str);
150 if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
151 if (result != -1) return result;
152 this.string = str.intern();
153 return NAME;
154 }
155
156 private int getNumber(int c) throws IOException {
157 int base = 10;
158 in.startString();
159 double dval = Double.NaN;
160 long longval = 0;
161 boolean isInteger = true;
162
163 // figure out what base we're using
164 if (c == '0') {
165 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
166 else if (isDigit(c)) base = 8;
167 }
168
169 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
170 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
171 isInteger = false;
172 if (c == '.') do { c = in.read(); } while (isDigit(c));
173 if (c == 'e' || c == 'E') {
174 c = in.read();
175 if (c == '+' || c == '-') c = in.read();
176 if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
177 do { c = in.read(); } while (isDigit(c));
178 }
179 }
180 in.unread();
181
182 String numString = in.getString();
183 if (base == 10 && !isInteger) {
184 try { dval = (Double.valueOf(numString)).doubleValue(); }
185 catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
186 } else {
187 if (isInteger) {
188 longval = Long.parseLong(numString, base);
189 dval = (double)longval;
190 } else {
191 dval = Double.parseDouble(numString);
192 longval = (long) dval;
193 if (longval == dval) isInteger = true;
194 }
195 }
196
197 if (!isInteger) this.number = JS.N(dval);
198 else this.number = JS.N(longval);
199 return NUMBER;
200 }
201
202 private int getString(int c) throws IOException {
203 StringBuffer stringBuf = null;
204 int quoteChar = c;
205 int val = 0;
206 c = in.read();
207 in.startString(); // start after the first "
208 while(c != quoteChar) {
209 if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
210 if (c == '\\') {
211 if (stringBuf == null) {
212 in.unread(); // Don't include the backslash
213 stringBuf = new StringBuffer(in.getString());
214 in.read();
215 }
216 switch (c = in.read()) {
217 case 'b': c = '\b'; break;
218 case 'f': c = '\f'; break;
219 case 'n': c = '\n'; break;
220 case 'r': c = '\r'; break;
221 case 't': c = '\t'; break;
222 case 'v': c = '\u000B'; break;
223 case '\\': c = '\\'; break;
224 case 'u': {
225 int v = 0;
226 for(int i=0; i<4; i++) {
227 int ci = in.read();
228 if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
229 throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
230 v = (v << 8) | Integer.parseInt(ci + "", 16);
231 }
232 c = (char)v;
233 break;
234 }
235 default:
236 // just use the character that was escaped
237 break;
238 }
239 }
240 if (stringBuf != null) stringBuf.append((char) c);
241 c = in.read();
242 }
243 if (stringBuf != null) this.string = stringBuf.toString().intern();
244 else {
245 in.unread(); // miss the trailing "
246 this.string = in.getString().intern();
247 in.read();
248 }
249 return STRING;
250 }
251
252 private int _getToken() throws IOException {
253 int c;
254 do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
255 if (c == -1) return -1;
256 if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
257 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
258 if (c == '"' || c == '\'') return getString(c);
259 switch (c) {
260 case ';': return SEMI;
261 case '[': return LB;
262 case ']': return RB;
263 case '{': return LC;
264 case '}': return RC;
265 case '(': return LP;
266 case ')': return RP;
267 case ',': return COMMA;
268 case '?': return HOOK;
269 case ':': return !in.match(':') ? COLON : in.match('=') ? GRAMMAR : le(":: is not a valid token");
270 case '.': return DOT;
271 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
272 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
273 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
274 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
275 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
276 case '%': return in.match('=') ? ASSIGN_MOD : MOD;
277 case '~': return BITNOT;
278 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
279 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
280 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
281 case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
282 case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
283 in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
284 case '/':
285 if (in.match('=')) return ASSIGN_DIV;
286 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
287 if (!in.match('*')) return DIV;
288 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
289 if (c == '\n' || c != '/' || !in.match('*')) continue;
290 if (in.match('/')) return getToken();
291 throw new LexerException("nested comments are not permitted");
292 }
293 if (c == -1) throw new LexerException("unterminated comment");
294 return getToken(); // `goto retry'
295 default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
296 }
297 }
298
299 private int le(String s) throws LexerException { if (true) throw new LexerException(s); return 0; }
300
301 // SmartReader ////////////////////////////////////////////////////////////////
302
303 /** a Reader that tracks line numbers and can push back tokens */
304 private class SmartReader {
305 PushbackReader reader = null;
306 int lastread = -1;
307
308 public SmartReader(Reader r) { reader = new PushbackReader(r); }
309 public void unread() throws IOException { unread((char)lastread); }
310 public void unread(char c) throws IOException {
311 reader.unread(c);
312 if(c == '\n') col = -1;
313 else col--;
314 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
315 }
316 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
317 public int peek() throws IOException {
318 int peeked = reader.read();
319 if (peeked != -1) reader.unread((char)peeked);
320 return peeked;
321 }
322 public int read() throws IOException {
323 lastread = reader.read();
324 if (accumulator != null) accumulator.append((char)lastread);
325 if (lastread != '\n' && lastread != '\r') col++;
326 if (lastread == '\n') {
327 // col is -1 if we just unread a newline, this is sort of ugly
328 if (col != -1) parserLine = ++line;
329 col = 0;
330 }
331 return lastread;
332 }
333
334 // FEATURE: could be much more efficient
335 StringBuffer accumulator = null;
336 public void startString() {
337 accumulator = new StringBuffer();
338 accumulator.append((char)lastread);
339 }
340 public String getString() throws IOException {
341 String ret = accumulator.toString().intern();
342 accumulator = null;
343 return ret;
344 }
345 }
346
347
348 // Token PushBack code ////////////////////////////////////////////////////////////
349
350 private int pushBackDepth = 0;
351 private int[] pushBackInts = new int[10];
352 private Object[] pushBackObjects = new Object[10];
353
354 /** push back a token */
355 public final void pushBackToken(int op, Object obj) {
356 if (pushBackDepth >= pushBackInts.length - 1) {
357 int[] newInts = new int[pushBackInts.length * 2];
358 System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
359 pushBackInts = newInts;
360 Object[] newObjects = new Object[pushBackObjects.length * 2];
361 System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
362 pushBackObjects = newObjects;
363 }
364 pushBackInts[pushBackDepth] = op;
365 pushBackObjects[pushBackDepth] = obj;
366 pushBackDepth++;
367 }
368
369 /** push back the most recently read token */
370 public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
371
372 /** read a token but leave it in the stream */
373 public final int peekToken() throws IOException {
374 int ret = getToken();
375 pushBackToken();
376 return ret;
377 }
378
379 /** read a token */
380 public final int getToken() throws IOException {
381 number = null;
382 string = null;
383 if (pushBackDepth == 0) {
384 mostRecentlyReadToken = op;
385 return op = _getToken();
386 }
387 pushBackDepth--;
388 op = pushBackInts[pushBackDepth];
389 if (pushBackObjects[pushBackDepth] != null) {
390 number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
391 string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
392 }
393 return op;
394 }
395
396 class LexerException extends IOException {
397 public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
398 }
399 }
400