001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2011, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.base.io;
017
018 import java.io.File;
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.math.BigInteger;
023
024 import org.qedeq.base.utility.StringUtility;
025
026
027 /**
028 * This class provides convenient methods for parsing input.
029 *
030 * @author Michael Meyling
031 */
032 public class TextInput extends InputStream {
033
034 /** Char marking end of data. */
035 public static final int EOF = -1;
036
037 /** Char marking end of input line. */
038 // public final static char CR = '\n'; // LATER mime 20050613: delete if running on all platforms
039 public static final char CR = '\012';
040
041 /** String for marking current reading position. */
042 private static final String MARKER = "#####";
043
044 /** Holds the data. */
045 private final StringBuffer source;
046
047 /** Current line number (starting with 0). */
048 private int lineNumber = 0;
049
050 /** Current column (starting with 0). */
051 private int column = 0;
052
053 /** Current reading position (starting with 0). */
054 private int position = 0;
055
056 /** Maximum int value as BigInteger. */
057 private BigInteger maxIntValue = BigInteger.valueOf(Integer.MAX_VALUE);
058
059 /**
060 * Constructor using <code>Reader</code> source.
061 *
062 * @param reader Data source. This reader will be closed (even if reading fails).
063 * @throws IOException Reading failed.
064 * @throws NullPointerException Argument was a null pointer.
065 */
066 public TextInput(final Reader reader) throws IOException {
067 try {
068 if (reader == null) {
069 throw new NullPointerException(
070 "no null pointer as argument accepted");
071 }
072 this.source = new StringBuffer();
073 // TODO mime 20080307: optimize reading
074 int c;
075 while (-1 != (c = reader.read())) {
076 this.source.append((char) c);
077 }
078 } finally {
079 IoUtility.close(reader);
080 }
081 }
082
083 /**
084 * Constructor using <code>StringBuffer</code> source.
085 *
086 * @param source data source
087 * @throws NullPointerException Argument was a null pointer.
088 */
089 public TextInput(final StringBuffer source) {
090 if (source == null) {
091 throw new NullPointerException(
092 "no null pointer as argument accepted");
093 }
094 this.source = source;
095 }
096
097 /**
098 * Constructor using <code>String</code> source.
099 *
100 * @param source data source
101 * @throws NullPointerException Argument was a null pointer.
102 */
103 public TextInput(final String source) {
104 if (source == null) {
105 throw new NullPointerException(
106 "no null pointer as argument accepted");
107 }
108 this.source = new StringBuffer(source);
109 }
110
111
112 /**
113 * Constructor using <code>FILE</code> source.
114 *
115 * @param file Data source.
116 * @param encoding Take this encoding for file.
117 * @throws IOException File reading failed.
118 * @throws NullPointerException One argument was a null pointer.
119 */
120 public TextInput(final File file, final String encoding) throws IOException {
121 if (file == null) {
122 throw new NullPointerException(
123 "no null pointer as argument accepted");
124 }
125 this.source = new StringBuffer();
126 IoUtility.loadFile(file, source, encoding);
127 }
128
129 /**
130 * Reads a single character and increments the reading position
131 * by one. If no characters are left, <code>-1</code> is returned.
132 * Otherwise a cast to <code>char</code> gives the character read.
133 *
134 * @return Character read, if there are no more chars
135 * <code>-1</code> is returned.
136 */
137 public final int read() {
138 if (position >= source.length()) {
139 return EOF;
140 }
141 if (getChar() == CR) {
142 lineNumber++;
143 column = 0;
144 } else {
145 column++;
146 }
147 return source.charAt(position++);
148 }
149
150 /**
151 * Decrements the reading position by one and reads a single character.
152 * If no characters are left, <code>-1</code> is returned.
153 * Otherwise a cast to <code>char</code> gives the character read.
154 *
155 * @return Character read, if there are no more chars
156 * <code>-1</code> is returned.
157 */
158 public final int readInverse() {
159 if (position <= 0) {
160 return -1;
161 }
162 final char c = source.charAt(--position);
163 if (c == CR) {
164 lineNumber--;
165 int pos = source.lastIndexOf("" + CR, position - 1);
166 if (pos < 0) {
167 column = position;
168 } else {
169 column = position - 1 - pos;
170 }
171 } else {
172 column--;
173 if (column < 0) {
174 throw new IllegalStateException("column less then 0");
175 }
176 }
177 return c;
178 }
179
180 /**
181 * Reads a given amount of characters and increments the reading position
182 * accordingly.
183 *
184 * @param number amount of characters to read
185 * @return string read
186 */
187 public final String readString(final int number) {
188 final StringBuffer result = new StringBuffer(number);
189 for (int i = 0; i < number; i++) {
190 final int c = read();
191 if (c != -1) {
192 result.append((char) c);
193 } else {
194 break;
195 }
196 }
197 return result.toString();
198 }
199
200 /**
201 * Skips a given amount of characters and increments the reading position
202 * accordingly.
203 *
204 * @param number Amount of characters to read
205 */
206 public final void forward(final int number) {
207 for (int i = 0; i < number; i++) {
208 final int c = read();
209 if (c == -1) {
210 break;
211 }
212 }
213 }
214
215 /**
216 * Skips until a given keyword is reached. The position afterwards is at the start
217 * of the keyword or at the end of the text (if the keyword is not found).
218 *
219 * @param search Look for this keyword.
220 * @return Was the keyword found?
221 */
222 public final boolean forward(final String search) {
223 final int pos = source.indexOf(search, position);
224 if (pos < 0) {
225 setPosition(getMaximumPosition());
226 return false;
227 }
228 setPosition(pos);
229 return true;
230 }
231
232 /**
233 * Reads a single character and does not change the reading
234 * position. If no characters are left, <code>-1</code> is returned.
235 * Otherwise a cast to <code>char</code> gives the character read.
236 *
237 * @return Character read at current position, if there are no more chars
238 * <code>-1</code> is returned
239 */
240 public final int getChar() {
241 if (position >= source.length()) {
242 return -1;
243 }
244 return source.charAt(position);
245 }
246
247 /**
248 * Reads a single character and does not change the reading
249 * position. If offset addition leads out of the source,
250 * <code>-1</code> is returned. Otherwise a cast to <code>char</code>
251 * gives the character read.
252 *
253 * @param skip Offset from current reading position. Maybe negative.
254 * @return Character read, if position is out of scope
255 * <code>-1</code> is returned.
256 */
257 public final int getChar(final int skip) {
258 if (position + skip < 0 || position + skip >= source.length()) {
259 return -1;
260 }
261 return source.charAt(position + skip);
262 }
263
264
265 /**
266 * Reads a substring. Doesn't change reading position.
267 *
268 * @param from Absolute reading position.
269 * @param to Read to this position.
270 * @return Resulting string.
271 * @throws StringIndexOutOfBoundsException If from > to.
272 */
273 public final String getSubstring(final int from, final int to) {
274 final int l = source.length();
275 final int f = (from < 0 ? 0 : (from > l ? l : from));
276 final int t = (to < 0 ? 0 : (to > l ? l : to));
277 return source.substring(f, t);
278 }
279
280 /**
281 * Get complete input source. Doesn't change reading position.
282 *
283 * @return Complete input string.
284 */
285 public final String asString() {
286 return source.toString();
287 }
288
289 /**
290 * Replace given interval with given string.
291 * If the current reading position is in the interval it is set
292 * to the end of the interval.
293 *
294 * @param from Absolute reading position.
295 * @param to Read to this position.
296 * @param replacement Replacement for interval.
297 */
298 public final void replace(final int from, final int to, final String replacement) {
299 source.replace(from, to, replacement);
300 if (position > from && position < to) {
301 setPosition(from + replacement.length());
302 } else if (position > from) { // correct row (and column) information
303 setPosition(position - to + from + replacement.length());
304 }
305 }
306
307 /**
308 * Skips white space, beginning from reading position.
309 * Changes reading position to next non white space
310 * character.
311 */
312 public final void skipWhiteSpace() {
313 while (!isEmpty() && Character.isWhitespace((char) getChar())) {
314 read();
315 }
316 }
317
318 /**
319 * Skips white space, beginning from reading position.
320 * Changes reading position to next non white space
321 * character.
322 */
323 public final void skipWhiteSpaceInverse() {
324 while (getPosition() > 0 && Character.isWhitespace((char) getChar(-1))) {
325 readInverse();
326 }
327 }
328
329 /**
330 * Skip current position back to beginning of an XML tag.
331 * This is mainly something like <code><tagName</code>.
332 *
333 * @throws IllegalArgumentException No begin of XML tag found.
334 */
335 public final void skipBackToBeginOfXmlTag() {
336 if ('<' == getChar()) {
337 return;
338 }
339 boolean quoted = false;
340 do {
341 if (-1 == readInverse()) {
342 throw new IllegalArgumentException("begin of xml tag not found");
343 }
344 if ('\"' == getChar()) {
345 quoted = !quoted;
346 }
347 } while (quoted || '<' != getChar());
348 }
349
350 /**
351 * Skip forward to end of line.
352 */
353 public final void skipToEndOfLine() {
354 int c = 0;
355 do {
356 c = read();
357 } while (!isEmpty() && c != CR);
358 }
359
360 /**
361 * Skip current position forward to end of an XML tag.
362 * This is mainly something like <code>></code>. Quoted data is skipped.
363 *
364 * @throws IllegalArgumentException No end of XML tag found.
365 */
366 public final void skipForwardToEndOfXmlTag() {
367 if ('>' == getChar()) {
368 return;
369 }
370 boolean quoted = false;
371 while ('>' != getChar()) {
372 if ('\"' == getChar()) {
373 quoted = !quoted;
374 }
375 if (!quoted) {
376 if (-1 == read()) {
377 throw new IllegalArgumentException("end of xml tag not found");
378 }
379 }
380 }
381 read(); // skip '>'
382 }
383
384 /**
385 * Reads tag or attribute name out of XML stream. Whitespace is skipped and
386 * characters are read till "=" or ">" or whitespace is found.
387 *
388 * @return Name of tag or attribute.
389 * @throws IllegalArgumentException Next non white space character is "="
390 * or ">".
391 */
392 public final String readNextXmlName() {
393 skipWhiteSpace();
394 if (isEmpty() || '=' == getChar() || '>' == getChar()) {
395 throw new IllegalArgumentException(
396 "begin of attribute expected");
397 }
398 StringBuffer buffer = new StringBuffer();
399 while (!isEmpty() && '=' != getChar() && '>' != getChar()
400 && !Character.isWhitespace((char) getChar())) {
401 buffer.append((char) read());
402 }
403 return buffer.toString();
404 }
405
406 /**
407 * Reads attribute value out of XML stream. Whitespace is skipped and an "="
408 * is expected to follow. Again whitespace is skipped. If no quotation mark follows
409 * characters are read till whitespace or ">" occurs. Otherwise data is
410 * read till an ending quotation mark comes.
411 *
412 * @return Value read.
413 * @throws IllegalArgumentException Following had not one of the following forms:
414 * <pre>
415 * = "value"
416 * </pre>
417 * <pre>
418 * = value
419 * </pre>
420 */
421 public final String readNextAttributeValue() {
422 skipWhiteSpace();
423 if (isEmpty() || '=' != getChar()) {
424 throw new IllegalArgumentException(
425 "\"=\" expected");
426 }
427 read(); // read =
428 skipWhiteSpace();
429 if (isEmpty() || '>' == getChar()) {
430 throw new IllegalArgumentException(
431 "attribute value expected");
432 }
433 StringBuffer buffer = new StringBuffer();
434 if ('\"' == getChar()) {
435 read(); // read "
436 while (!isEmpty() && '\"' != getChar()) {
437 buffer.append((char) read());
438 }
439 if ('\"' != getChar()) {
440 throw new IllegalArgumentException("\" expected");
441 }
442 read(); // read "
443 } else {
444 while (!isEmpty() && '>' != getChar()
445 && !Character.isWhitespace((char) getChar())) {
446 buffer.append((char) read());
447 }
448 }
449 return StringUtility.unescapeXml(buffer.toString());
450 }
451
452 /**
453 * Is there no data left for reading?
454 *
455 * @return is all data read?
456 */
457 public final boolean isEmpty() {
458 return position >= source.length();
459 }
460
461 /**
462 * Is there no data left for reading after skipping?
463 *
464 * @param skip Add this number to current position.
465 * @return Is data empty at that new position?
466 */
467 public final boolean isEmpty(final int skip) {
468 return position + skip >= source.length();
469 }
470
471 /**
472 * Reads the next string containing only letters or digits,
473 * leading whitespace is skipped.
474 * Changes reading position.
475 *
476 * @return read string
477 * @throws IllegalArgumentException if no such characters could
478 * be found
479 */
480 public final String readLetterDigitString() {
481 skipWhiteSpace();
482 if (isEmpty() || !Character.isLetterOrDigit((char) getChar())) {
483 read(); // for showing correct position
484 throw new IllegalArgumentException(
485 "letter or digit expected");
486 }
487 StringBuffer buffer = new StringBuffer();
488 while (!isEmpty() && Character.isLetterOrDigit((char) getChar())) {
489 buffer.append((char) read());
490 }
491 return buffer.toString();
492 }
493
494 /**
495 * Reads the next string until whitespace occurs,
496 * leading whitespace is skipped.
497 * Changes (probably) reading position.
498 *
499 * @return read string
500 */
501 public final String readStringTilWhitespace() {
502 skipWhiteSpace();
503 StringBuffer buffer = new StringBuffer();
504 while (!isEmpty() && !Character.isWhitespace((char) getChar())) {
505 buffer.append((char) read());
506 }
507 return buffer.toString();
508 }
509
510 /**
511 * Reads the next integer, leading whitespace is skipped. Signs like - or + are not
512 * accepted. Resulting integer
513 * Changes reading position.
514 *
515 * @return Read integer.
516 * @throws IllegalArgumentException if no digits where found or
517 * the number was to big for an <code>int</code>
518 */
519 public final int readNonNegativeInt() {
520 skipWhiteSpace();
521 if (isEmpty() || !Character.isDigit((char) getChar())) {
522 read(); // for showing correct position
523 throw new IllegalArgumentException(
524 "digit expected");
525 }
526 StringBuffer buffer = new StringBuffer();
527 while (!isEmpty() && Character.isDigit((char) getChar())) {
528 buffer.append((char) read());
529 }
530 final BigInteger big = new BigInteger(buffer.toString());
531 if (1 == big.compareTo(maxIntValue)) {
532 throw new IllegalArgumentException("this integer is to large! Maximum possible value is "
533 + maxIntValue);
534 }
535 return big.intValue();
536 }
537
538 /**
539 * Reads the next (big) integer, leading whitespace is skipped.
540 * The first character might be a minus sign, the rest must be
541 * digits. Leading zero digits are not allowed, also "-0" is not
542 * accepted. <p>
543 * Changes reading position.
544 *
545 * @return read integer
546 * @throws IllegalArgumentException if no digits where found.
547 */
548 public final String readCounter() {
549 skipWhiteSpace();
550 if (isEmpty()) {
551 throw new IllegalArgumentException("integer expected");
552 }
553 StringBuffer buffer = new StringBuffer();
554 if (getChar() == '-') {
555 buffer.append(read());
556 }
557 final int begin = getPosition();
558 if (!Character.isDigit((char) getChar())) {
559 throw new IllegalArgumentException("digit expected");
560 }
561 while (!isEmpty() && Character.isDigit((char) getChar())) {
562 buffer.append((char) read());
563 }
564 if (buffer.length() >= 2 && ('0' == buffer.charAt(0)
565 || '-' == buffer.charAt(0) && '0' == buffer.charAt(1))) {
566 setPosition(begin); // for showing correct position
567 throw new IllegalArgumentException("no leading zeros allowed");
568 }
569 return buffer.toString();
570 }
571
572 /**
573 * Reads the next quoted string, leading whitespace is skipped.
574 * A correctly quoted string could be created by adding a leading and
575 * a trailing quote character and doubling each other quote character.
576 * The resulting string is dequoted.
577 * Changes reading position.
578 *
579 * @return Dequoted string read.
580 * @throws IllegalArgumentException No correctly quoted string was found.
581 */
582 public final String readQuoted() {
583 skipWhiteSpace();
584 if (isEmpty() || read() != '\"') {
585 throw new IllegalArgumentException(
586 "\" expected");
587 }
588 StringBuffer unquoted = new StringBuffer();
589 char c;
590 do {
591 if (isEmpty()) {
592 throw new IllegalArgumentException(
593 "ending \" expected");
594 }
595 c = (char) read();
596 if (c != '\"') {
597 unquoted.append(c);
598 } else { // c == '\"'
599 if (isEmpty() || getChar() != '\"') {
600 break; // success
601 }
602 unquoted.append((char) read());
603 }
604 } while (true);
605 return unquoted.toString();
606 }
607
608 /**
609 * Returns the current line number.
610 *
611 * @return Current line number (starting with line 1).
612 */
613 public final int getRow() {
614 return lineNumber + 1;
615 }
616
617 /**
618 * Returns the current column number.
619 *
620 * @return Current column number (starting with line 1).
621 */
622 public final int getColumn() {
623 return column + 1;
624 }
625
626 /**
627 * Returns the current line.
628 *
629 * @return Current line.
630 */
631 public final String getLine() {
632 int min = position - 1;
633 while (min >= 0 && source.charAt(min) != CR) {
634 min--;
635 }
636 int max = position;
637 while (max < source.length()
638 && source.charAt(max) != CR) {
639 max++;
640 }
641 if (min + 1 >= max) {
642 return "";
643 }
644 return source.substring(min + 1, max);
645 }
646
647 /**
648 * Returns the current position. Starting with 0. This is the number of characters
649 * from the beginning.
650 *
651 * @return Current position.
652 */
653 public final int getPosition() {
654 return position;
655 }
656
657 /**
658 * Returns the current position.
659 *
660 * @return Current position.
661 */
662 public final SourcePosition getSourcePosition() {
663 return new SourcePosition(getRow(), getColumn());
664 }
665
666 /**
667 * Returns the highest position number possible. This is equal
668 * to the length of the source.
669 *
670 * @return Maximum position.
671 */
672 public final int getMaximumPosition() {
673 return source.length();
674 }
675
676 /**
677 * Sets the current position (and indirectly the row and column number).
678 *
679 * @param position Set current position to this value.
680 */
681 public final void setPosition(final int position) {
682 if (position >= source.length()) {
683 this.position = source.length();
684 } else if (this.position != position) {
685 if (position < this.position) {
686 this.position = 0;
687 this.lineNumber = 0;
688 this.column = 0;
689 for (int i = 0; i < position; i++) { // Q & D
690 read();
691 }
692 } else {
693 for (int i = this.position; i < position; i++) {
694 read();
695 }
696 }
697 }
698 }
699
700 /**
701 * Sets the current position (and indirectly the row and column number).
702 *
703 * @param position Set current position to this value.
704 */
705 public final void setPosition(final SourcePosition position) {
706 setRow(position.getRow());
707 setColumn(position.getColumn());
708 }
709
710 /**
711 * Adds a given position to the current one and changes the row and column number accordingly.
712 * A delta position with one row and one column doesn't change the current position.
713 *
714 * @param delta Add this position to current one.
715 */
716 public final void addPosition(final SourcePosition delta) {
717 addRow(delta.getRow() - 1);
718 addColumn(delta.getColumn() - 1);
719 }
720
721 /**
722 * Sets the current line number (and indirectly the position).
723 *
724 * @param row Move to this line number.
725 */
726 public final void setRow(final int row) {
727 int r = row;
728 // check if row is under lower bound
729 if (r <= 0) {
730 r = 1;
731 }
732 // check if already at wanted position
733 if (getRow() == r) {
734 return;
735 }
736 // check if already at end of file
737 if (getPosition() >= source.length() && getRow() >= r) {
738 return;
739 }
740 if (getRow() > r) {
741 // reset to begin of file
742 this.position = 0;
743 this.lineNumber = 0;
744 this.column = 0;
745 }
746 for (int i = 0; getRow() < r; i++) {
747 if (EOF == read()) {
748 return;
749 }
750 }
751 }
752
753 /**
754 * Get given byte position as {@link SourcePosition}.
755 *
756 * @param find Get row and column information for this byte position.
757 * @return Row and column information.
758 */
759 public final SourcePosition getPosition(final int find) {
760 int r = 0;
761 int c = 0;
762 int i = 0;
763 while (i < source.length() && i < find) {
764 if (CR == source.charAt(i)) {
765 r++;
766 c = 0;
767 } else {
768 c++;
769 }
770 i++;
771 }
772 return new SourcePosition(r + 1, c + 1);
773 }
774
775 /**
776 * Get given byte position as {@link SourcePosition}.
777 *
778 * @param position Get row and column information for this byte position.
779 * @return Row and column information.
780 */
781 public final int getPosition(final SourcePosition position) {
782 int find = 0;
783 int r = 0;
784 while (++r < position.getRow() && -1 < (find = source.indexOf("" + CR, find))) {
785 // nothing to do
786 }
787 if (find < 0) {
788 find = source.length();
789 }
790 find += position.getColumn();
791 if (find > source.length()) {
792 find = source.length();
793 }
794 return find;
795 }
796
797 /**
798 * Get source area as string.
799 *
800 * @param area Get this area as string.
801 * @return Area itself.
802 */
803 public final String getSourceArea(final SourceArea area) {
804 return source.substring(getPosition(area.getStartPosition()),
805 getPosition(area.getEndPosition()));
806 }
807
808 /**
809 * Add the following rows and reset column (if <code>number == 0</code>).
810 *
811 * @param number Add this number of rows.
812 */
813 public final void addRow(final int number) {
814 setRow(getRow() + number);
815 }
816
817 /**
818 * Sets the current column position (and indirectly the position).
819 * If <code>column</code> is out of range the minimum value (1) or the maximum possible column
820 * value is taken.
821 *
822 * @param column Move to this column. First column has the number one.
823 */
824 public final void setColumn(final int column) {
825 int c = column;
826 // check if column is out of lower bound
827 if (c <= 0) {
828 c = 1;
829 }
830 // check if already at wanted position
831 if (getColumn() == c) {
832 return;
833 }
834 if (getColumn() > c) {
835 do {
836 this.position--;
837 this.column--;
838 } while (getColumn() > c);
839 return;
840 }
841 while (getChar() != CR && getChar() != EOF && getColumn() < c) {
842 read();
843 }
844 }
845
846 /**
847 * Add the following columns.
848 *
849 * @param number Add this number of columns.
850 */
851 public final void addColumn(final int number) {
852 setColumn(getColumn() + number);
853 }
854
855 /**
856 * Show reading position.
857 *
858 * @return current line with mark at current reading position
859 */
860 public final String showLinePosition() {
861 final String line = getLine();
862 final StringBuffer buffer = new StringBuffer();
863 final int col = getColumn() - 1;
864 if (col > 0) {
865 if (col < line.length()) {
866 buffer.append(line.substring(0, col));
867 } else {
868 buffer.append(line);
869 }
870 }
871 buffer.append(MARKER);
872 if (col < line.length()) {
873 buffer.append(line.substring(col));
874 }
875 return buffer.toString();
876 }
877
878 // LATER mime 20050608: remove if no use
879 /*
880 public final int findCaretPosition(final int line, final int column, final String source) {
881 if (line == 1) {
882 return 0;
883 }
884 int k = 1;
885 for (int j = 0; j < source.length(); j++) {
886 if (source.charAt(j) == '\n') {
887 k++;
888 }
889 if (k == line) {
890 j += column - 1;
891 if (j > source.length()) {
892 j = source.length();
893 }
894 return j;
895 }
896 }
897 return 0;
898 }
899 */
900
901 }
|