1
2
3
4
5
6
7
8 package org.xwt.util;
9
10 import java.io.Reader;
11 import java.io.IOException;
12 import java.io.EOFException;
13
14
50 public abstract class XML
51 {
52
53
54
55
56 public static final int BUFFER_SIZE = 255;
57
58
59 private static final Queue elements = new Queue(30);
60
61 private static final char[] single_amp = new char[] { '&' };
62 private static final char[] single_apos = new char[] { '\'' };
63 private static final char[] single_gt = new char[] { '>' };
64 private static final char[] single_lt = new char[] { '<' };
65 private static final char[] single_quot = new char[] { '"' };
66
67 private int line;
68 private int col;
69
70 private Reader in;
71 private char[] buf;
72 private int off;
73 private int len;
74
75 private Element current;
76
77
78 private char[] singlechar = new char[1];
79
80
81 public XML() { this(BUFFER_SIZE); }
82
83 public XML(int bSize) {
84 buf = new char[bSize];
85
86 current = (Element)elements.remove(false);
87 if (current == null) current = new Element();
88 }
89
90
91
92 public int getLine() { return line; }
93
94
95 public int getCol() { return col; }
96
97
102 public final void parse(Reader reader) throws IOException, Exn {
103 in = reader;
104 off = len = 0;
105 line = col = 1;
106
107 clear();
108
109 try {
110
111 while (true) {
112 if (!buffer(1)) {
113 if (current.qName == null) break;
114 throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol());
115 }
116
117 if (buf[off] == '<') readTag();
118 readChars(current.qName != null);
119 }
120 } finally { clear(); }
121 }
122
123
124 private final void clear() {
125 for (Element last = current; current.parent != null; ) {
126 current = current.parent;
127 last.clear();
128 elements.append(last);
129 }
130 current.clear();
131 }
132
133
134 private final void readTag() throws IOException, Exn {
135
136 boolean starttag = true;
137
138
139 boolean endtag = false;
140
141
142
143
144 int prefix = -1;
145
146 int namelen = 0;
147
148 col++; off++; len--;
149 if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
150
151
152 char s = buf[off];
153 if (s == '!') {
154
155 col++; off++; len--;
156 if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
157
158 boolean bad = false;
159 switch (buf[off]) {
160 case '-':
161 if (buf[off+1] != '-') { bad = true; break; }
162 col += 2; off += 2; len -= 2;
163
164
165 readChars(false, "-->", false);
166 col += 3; off += 3; len -= 3;
167 break;
168
169
170
171 case 'A':
172 if (!buffer(7)
173 || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
174 || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
175 bad = true; break;
176 }
177 col += 7; off += 7; len -= 7;
178
179
180 readChars(false, ">", true);
181 col++; off++; len--;
182 break;
183 case 'D':
184 if (!buffer(7)
185 || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
186 || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
187 bad = true; break;
188 }
189 col += 7; off += 7; len -= 7;
190
191
192 readChars(false, ">", true);
193 col++; off++; len--;
194 break;
195 case 'E':
196 if (!buffer(7)) {
197 bad = true;
198 } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
199 && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
200
201 readChars(false, ">", true);
202 col++; off++; len--;
203
204 } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
205 && buf[off+4] == 'T' && buf[off+5] == 'Y') {
206
207 readChars(false, ">", true);
208 col++; off++; len--;
209
210 } else {
211 bad = true;
212 }
213 break;
214
215 case 'N':
216 if (!buffer(8)
217 || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
218 || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
219 bad = true; break;
220 }
221 col += 8; off += 8; len -= 8;
222
223 readChars(false, ">", true);
224 col++; off++; len--;
225
226 break;
227 default: bad = true;
228 }
229
230 if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol());
231
232 } else if (s == '?') {
233
234 col++; off++; len--;
235 readChars(false, "?>", true);
236 if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
237 col += 2; off += 2; len -= 2;
238
239 } else if (s == '[') {
240 if (!buffer(7)
241 || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
242 || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
243 col++; off--; len++;
244
245 readChars(false, "]]>", false);
246 } else {
247 col += 7; off += 7; len -=7;
248
249 readChars(true, "]]>", false);
250 }
251 col += 3; off += 3; len -= 3;
252 } else {
253 if (s == '/') {
254
255 starttag = false;
256 endtag = true;
257
258 col++; off++; len--;
259 if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
260 s = buf[off];
261 }
262
263 if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol());
264
265
266 for (namelen = 0; ; namelen++) {
267 if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
268
269 s = buf[off+namelen];
270
271 if (S(s) || s == '>') {
272 break;
273 } else if (s == '/') {
274 endtag = true;
275 break;
276 } else if (s == ':' && namelen > 0 && prefix < 1) {
277
278 prefix = namelen;
279 } else if (!NameChar(s)) {
280 throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol());
281 }
282 }
283
284
285 if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol());
286
287
288 String qName = new String(buf, off, namelen);
289
290 col += namelen; off += namelen; len -= namelen;
291
292 if (starttag) {
293
294
295 if (current.qName != null) {
296 Element next = (Element)elements.remove(false);
297 if (next == null) next = new Element();
298
299 next.parent = current;
300 current = next;
301 }
302
303 current.qName = qName;
304
305 if (prefix > 0) {
306 current.prefix = current.qName.substring(0, prefix);
307 current.localName = current.qName.substring(prefix+1);
308 } else {
309 current.prefix = null;
310 current.localName = current.qName;
311 }
312
313
314 readWhitespace();
315 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
316 while (buf[off] != '/' && buf[off] != '>') {
317 readAttribute();
318 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
319 readWhitespace();
320 }
321
322
323 current.uri = current.getUri(current.getPrefix());
324 if (current.getUri().equals("") && current.getPrefix() != null)
325 current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol()));
326
327 } else {
328
329 if (!qName.equals(current.getQName())) throw new Exn(
330 "end tag </"+qName+"> does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol()
331 );
332 }
333
334
335 readWhitespace();
336
337
338 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
339 if (buf[off] == '/') {
340 endtag = true;
341 off++; len--; col++;
342 }
343 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
344 if (buf[off] == '>') {
345 off++; len--; col++;
346 } else {
347 throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol());
348 }
349
350
351 if (starttag) startElement(current);
352 if (endtag) {
353 endElement(current);
354
355
356 if (current.getParent() == null) {
357
358 current.clear();
359 } else {
360 Element last = current;
361 current = current.parent;
362 last.clear();
363 elements.append(last);
364 }
365 }
366 }
367 }
368
369
370 private final void readAttribute() throws IOException, Exn {
371 int ref = 0;
372 int prefix = 0;
373 String n, v, p, u;
374 n = v = p = u = null;
375 char s;
376
377
378 for (ref= 0; ; ref++) {
379 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
380
381 s = buf[off+ref];
382
383 if (s == '=' || S(s)) {
384 break;
385 } else if (s == ':' && ref > 0 && prefix < 1) {
386
387 prefix = ref+1;
388 } else if (!NameChar(s)) {
389 throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol());
390 }
391 }
392
393
394 if (prefix > 0) {
395 p = new String(buf, off, prefix-1);
396 col += prefix; off += prefix; len -= prefix; ref -= prefix;
397 }
398 n = new String(buf, off, ref);
399 col += ref; off += ref; len -= ref;
400
401
402 readWhitespace();
403 if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
404 if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol());
405
406 col++; off++; len--;
407 readWhitespace();
408
409 if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
410
411 char wrap;
412 if (buf[off] == '\'' || buf[off] == '"') {
413 wrap = buf[off];
414 } else {
415 throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol());
416 }
417 col++; off++; len--;
418
419
420 attval: for (ref = 0; ; ref++) {
421 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
422
423 if (buf[off+ref] == wrap) {
424 break attval;
425 } else if (buf[off+ref] == '<') {
426 throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol());
427 }
428 }
429
430 v = new String(buf, off, ref);
431 col += ref; off += ref; len -= ref;
432
433
434 col++; off++; len--;
435
436
437 if (p != null && p.equals("xmlns")) {
438 current.addUri(n, v);
439 } else if (n.equals("xmlns")) {
440 if (current.getUri().equals("")) {
441 current.addUri("", v);
442 } else {
443 current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol()));
444 }
445 } else {
446
447 u = current.getUri(p);
448 if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol()));
449
450
451 for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn(
452 "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol()
453 );
454
455 current.addAttr(n, v, u);
456 }
457 }
458
459
460 private final void readEntity() throws IOException, Exn {
461 off++; len--;
462 if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
463
464 boolean unknown = false;
465 switch (buf[off]) {
466 case '#':
467 off++; len--;
468
469 int radix;
470 if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
471 int c = 0;
472
473
474
475 findchar: while (true) {
476 if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
477 int d = Character.digit(buf[off], radix);
478 if (d == -1) {
479 if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol());
480 off++; len--; col++;
481 break findchar;
482 }
483 c = (c * radix) + d;
484
485 off++; len--;
486 }
487
488 singlechar[0] = Character.forDigit(c, radix);
489 characters(singlechar, 0, 1);
490 break;
491
492 case 'a':
493 if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
494 characters(single_amp, 0, 1);
495 off += 4; len -= 4; col++;
496 } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
497 characters(single_apos, 0, 1);
498 off += 5; len -= 5; col++;
499 } else {
500 unknown = true;
501 }
502 break;
503
504 case 'g':
505 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
506 characters(single_gt, 0, 1);
507 off += 3; len -= 3; col++;
508 } else {
509 unknown = true;
510 }
511 break;
512
513 case 'l':
514 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
515 characters(single_lt, 0, 1);
516 off += 3; len -= 3; col++;
517 } else {
518 unknown = true;
519 }
520 break;
521
522 case 'q':
523 if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
524 characters(single_quot, 0, 1);
525 off += 5; len -= 5; col++;
526 } else {
527 unknown = true;
528 }
529 break;
530
531
532 }
533
534 if (unknown) throw new Exn("unknown entity (<!ENTITY> not supported)", Exn.WFC, getLine(), getCol());
535 }
536
537
538 private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn {
539 int ref;
540 char[] end = match.toCharArray();
541
542 for (boolean more = true; more;) {
543 if (!buffer(1)) return;
544
545 buf: for (ref = 0; ref < len; ref++) {
546 switch (buf[off+ref]) {
547 case '\r':
548
549 buf[off+ref] = '\n'; ref++;
550 if (p) characters(buf, off, ref);
551 off += ref; len -= ref; ref = -1;
552 line++; col = 1;
553
554
555 if (!buffer(1)) return;
556 if (buf[off] == '\n') { off++; len--; }
557 break;
558
559 case '\n':
560 ref++;
561 if (p) characters(buf, off, ref);
562 off += ref; len -= ref; ref = -1;
563 line++; col = 1;
564 break;
565
566 case '&':
567 if (entities) {
568 if (p) {
569 if (ref > 0) characters(buf, off, ref);
570 off += ref; len -= ref; ref = -1;
571 readEntity();
572 }
573 break;
574 }
575
576 default:
577 if (!buffer(ref+end.length)) continue buf;
578 for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
579 more = false;
580 break buf;
581 }
582 }
583
584 if (p && ref > 0) characters(buf, off, ref);
585 off += ref; len -= ref; col += ref;
586 }
587 }
588
589
593 private final void readChars(boolean p) throws IOException, Exn {
594 int ref;
595
596 for (boolean more = true; more;) {
597 if (!buffer(1)) return;
598
599 buf: for (ref = 0; ref < len; ref++) {
600 switch (buf[off+ref]) {
601 case '\r':
602
603 buf[off+ref] = '\n'; ref++;
604 if (p) characters(buf, off, ref);
605 off += ref; len -= ref; ref = -1;
606 line++; col = 1;
607
608
609 if (!buffer(1)) return;
610 if (buf[off] == '\n') { off++; len--; }
611 break;
612
613 case '\n':
614 ref++;
615 if (p) characters(buf, off, ref);
616 off += ref; len -= ref; ref = -1;
617 line++; col = 1;
618 break;
619
620 case '&':
621 if (p) {
622 if (ref > 0) characters(buf, off, ref);
623 off += ref; len -= ref; ref = -1;
624 readEntity();
625 }
626 break;
627
628 case '<':
629 more = false;
630 break buf;
631 }
632 }
633
634 if (p && ref > 0) characters(buf, off, ref);
635 off += ref; len -= ref; col += ref;
636 }
637 }
638
639
640 private final void readWhitespace() throws IOException, Exn {
641 int ref;
642
643 for (boolean more = true; more;) {
644 if (!buffer(1)) return;
645
646 buf: for (ref = 0; ref < len; ref++) {
647 switch (buf[off+ref]) {
648 case '\r':
649
650 buf[off+ref] = '\n';
651 whitespace(buf, off, ++ref);
652 off += ref; len -= ref; ref = -1;
653 line++; col = 1;
654
655
656 if (!buffer(1)) return;
657 if (buf[off] == '\n') { off++; len--; }
658 break;
659
660 case '\n':
661 whitespace(buf, off, ++ref);
662 off += ref; len -= ref; ref = -1;
663 line++; col = 1;
664 break;
665
666 case ' ':
667 case '\t':
668 break;
669
670 default:
671 more = false;
672 break buf;
673 }
674 }
675
676 off += ref; len -= ref; col += ref;
677 }
678 }
679
680
686 private final boolean buffer(int min) throws IOException {
687 if (len > min) return true;
688
689 if (buf.length - (off+len) >= min) {
690
691 } else if (off >= min) {
692
693 System.arraycopy(buf, off, buf, 0, len);
694 off = 0;
695 } else {
696
697 char[] newbuf = new char[buf.length * 2];
698 System.arraycopy(buf, off, newbuf, 0, len);
699 buf = newbuf;
700 off = 0;
701 }
702
703 while (min > len) {
704 int newlen = in.read(buf, off+len, buf.length-(off+len));
705 if (newlen < 0) return false;
706 len += newlen;
707 }
708
709 return true;
710 }
711
712
713
714
715
716
717
723 public abstract void startElement(Element e) throws Exn;
724
725
736 public abstract void characters(char[] ch, int start, int length) throws Exn, IOException;
737
738
739 public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException;
740
741
742 public abstract void endElement(Element e) throws Exn, IOException;
743
744
745
746
747
748
749
757 public static final class Element {
758
759 private static final int DEFAULT_ATTR_SIZE = 10;
760
761 protected Element parent = null;
762
763 protected String uri = null;
764 protected String localName = null;
765 protected String qName = null;
766 protected String prefix = null;
767
768 protected Hash urimap = new Hash(3,3);
769
770 protected String[] keys = new String[DEFAULT_ATTR_SIZE];
771 protected String[] vals = new String[DEFAULT_ATTR_SIZE];
772 protected String[] uris = new String[DEFAULT_ATTR_SIZE];
773 protected int len = 0;
774
775 protected Exn[] errors = new Exn[] {};
776
777
778
779 public Element getParent() { return parent; }
780
781
782 public String getQName() { return qName; }
783
784
785 public String getLocalName() { return localName; }
786
787
788 public String getPrefix() { return prefix; }
789
790
791 public String getUri() { return getUri(prefix); }
792
793
794 public String getUri(String p) {
795 String ret = null;
796 for (Element e = this; e != null && ret == null; e = e.getParent()) {
797 ret = (String)e.urimap.get(p == null ? "" : p);
798 }
799 return ret == null ? "" : ret;
800 }
801
802
803 public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; }
804
805
806 public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; }
807
808
809 public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; }
810
811
812 public int getAttrLen() { return len; }
813
814
815 public Exn[] getErrors() { return errors; }
816
817
818 protected Element() { }
819
820
821 protected void addUri(String name, String value) {
822 urimap.put(name, value);
823 }
824
825
826 protected void addAttr(String key, String val, String uri) {
827 if (len == keys.length) {
828
829 String[] newkeys = new String[keys.length*2];
830 String[] newvals = new String[vals.length*2];
831 String[] newuris = new String[uris.length*2];
832 System.arraycopy(keys, 0, newkeys, 0, keys.length);
833 System.arraycopy(vals, 0, newvals, 0, vals.length);
834 System.arraycopy(uris, 0, newuris, 0, uris.length);
835 keys = newkeys; vals = newvals; uris = newuris;
836 }
837
838 keys[len] = key;
839 vals[len] = val;
840 uris[len] = uri;
841 len++;
842 }
843
844
845 protected void addError(Exn e) {
846
847 Exn[] newe = new Exn[errors.length+1];
848 System.arraycopy(errors, 0, newe, 0, errors.length);
849 newe[errors.length] = e;
850 errors = newe;
851 }
852
853
854 protected void clear() {
855 parent = null;
856 uri = localName = qName = prefix = null;
857 urimap.clear();
858
859 if (keys.length != vals.length || vals.length != uris.length) {
860 keys = new String[DEFAULT_ATTR_SIZE];
861 vals = new String[DEFAULT_ATTR_SIZE];
862 uris = new String[DEFAULT_ATTR_SIZE];
863 } else {
864 for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; };
865 }
866 len = 0;
867
868 errors = new Exn[] {};
869 }
870 }
871
872
873 public static class Exn extends Exception {
874
875 public static final int MARKUP = 1;
876
877
878 public static final int WFC = 2;
879
880
881 public static final int NC = 3;
882
883
884 public static final int SCHEMA = 4;
885
886 private String error;
887 private int type;
888 private int line;
889 private int col;
890
891 public Exn(String e) { this(e, MARKUP, -1, -1); }
892
893 public Exn(String e, int type, int line, int col) {
894 this.error = e;
895 this.type = type;
896 this.line = line;
897 this.col = col;
898 }
899
900 public int getType() { return this.type; }
901 public int getLine() { return this.line; }
902 public int getCol() { return this.col; }
903 public String getMessage() { return this.error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); }
904 }
905
906
907
908
909
910
911
912
913
914 private static final boolean Name(char c) {
915 return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
916 }
917
918
919 private static final boolean NameChar(char c) {
920 return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
921 || Digit(c) || Letter(c) || Extender(c);
922 }
923
924
925 private static final boolean Letter(char c) {
926 return BaseChar(c) || Ideographic(c);
927 }
928
929
930 private static final boolean BaseCharAscii(char c) {
931 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
932 }
933
934
935 private static final boolean Char(char c) {
936
937 return c == '\u0009' || c == '\r' || c == '\n'
938 || (c >= '\u0020' && c <= '\uD7FF')
939 || (c >= '\uE000' && c <= '\uFFFD');
940 }
941
942
943 private static final boolean BaseChar(char c) {
944 return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
945 || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
946 || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
947 || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
948 || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
949 || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
950 || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
951 || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE')
952 || (c == '\u03E0')
953 || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
954 || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
955 || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
956 || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
957 || (c == '\u0559')
958 || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
959 || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
960 || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
961 || (c == '\u06D5')
962 || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
963 || (c == '\u093D')
964 || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
965 || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
966 || (c == '\u09B2')
967 || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
968 || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
969 || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
970 || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
971 || (c == '\u0A5E')
972 || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
973 || (c == '\u0A8D')
974 || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
975 || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
976 || (c == '\u0ABD')
977 || (c == '\u0AE0')
978 || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
979 || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
980 || (c == '\u0B3D')
981 || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
982 || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
983 || (c == '\u0B9C')
984 || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
985 || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
986 || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
987 || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
988 || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
989 || (c >= '\u0CB5' && c <= '\u0CB9')
990 || (c == '\u0CDE')
991 || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
992 || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
993 || (c >= '\u0E01' && c <= '\u0E2E')
994 || (c == '\u0E30')
995 || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
996 || (c == '\u0E84')
997 || (c >= '\u0E87' && c <= '\u0E88')
998 || (c == '\u0E8A')
999 || (c == '\u0E8D')
1000 || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
1001 || (c == '\u0EA5')
1002 || (c == '\u0EA7')
1003 || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
1004 || (c == '\u0EB0')
1005 || (c >= '\u0EB2' && c <= '\u0EB3')
1006 || (c == '\u0EBD')
1007 || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
1008 || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
1009 || (c == '\u1100')
1010 || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
1011 || (c == '\u1109')
1012 || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
1013 || (c == '\u113C')
1014 || (c == '\u113E')
1015 || (c == '\u1140')
1016 || (c == '\u114C')
1017 || (c == '\u114E')
1018 || (c == '\u1150')
1019 || (c >= '\u1154' && c <= '\u1155')
1020 || (c == '\u1159')
1021 || (c >= '\u115F' && c <= '\u1161')
1022 || (c == '\u1163')
1023 || (c == '\u1165')
1024 || (c == '\u1167')
1025 || (c == '\u1169')
1026 || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
1027 || (c == '\u1175')
1028 || (c == '\u119E')
1029 || (c == '\u11A8')
1030 || (c == '\u11AB')
1031 || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
1032 || (c == '\u11BA')
1033 || (c >= '\u11BC' && c <= '\u11C2')
1034 || (c == '\u11EB')
1035 || (c == '\u11F0')
1036 || (c == '\u11F9')
1037 || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
1038 || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
1039 || (c >= '\u1F50' && c <= '\u1F57')
1040 || (c == '\u1F59')
1041 || (c == '\u1F5B')
1042 || (c == '\u1F5D')
1043 || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
1044 || (c == '\u1FBE')
1045 || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
1046 || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
1047 || (c >= '\u1FF6' && c <= '\u1FFC')
1048 || (c == '\u2126')
1049 || (c >= '\u212A' && c <= '\u212B')
1050 || (c == '\u212E')
1051 || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
1052 || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
1053 }
1054
1055
1056 private static final boolean Ideographic(char c) {
1057 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
1058 }
1059
1060
1061
1122
1123
1124 private static final boolean Digit(char c) {
1125 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
1126 || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
1127 || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
1128 || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
1129 || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
1130 }
1131
1132
1133 private static final boolean Extender(char c) {
1134 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
1135 || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
1136 || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
1137 }
1138
1139
1140 private static final boolean S(char c) {
1141 return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';
1142 }
1143 }
1144