001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.base.io;
017
018 import java.io.File;
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.math.BigInteger;
023
024 import org.qedeq.base.utility.StringUtility;
025
026
027 /**
028 * This class provides convenient methods for parsing input.
029 *
030 * @author Michael Meyling
031 */
032 public class TextInput extends InputStream {
033
034 /** Char marking end of data. */
035 public static final int EOF = -1;
036
037 /** Char marking end of input line. */
038 // public final static char CR = '\n'; // LATER mime 20050613: delete if running on all platforms
039 public static final char CR = '\012';
040
041 /** String for marking current reading position. */
042 private static final String MARKER = "#####";
043
044 /** Holds the data. */
045 private final StringBuffer source;
046
047 /** Current line number (starting with 0). */
048 private int lineNumber = 0;
049
050 /** Current column (starting with 0). */
051 private int column = 0;
052
053 /** Current reading position (starting with 0). */
054 private int position = 0;
055
056 /** Maximum int value as BigInteger. */
057 private BigInteger maxIntValue = BigInteger.valueOf(Integer.MAX_VALUE);
058
059 /**
060 * Constructor using <code>Reader</code> source.
061 *
062 * @param reader Data source. This reader will be closed (even if reading fails).
063 * @throws IOException Reading failed.
064 * @throws NullPointerException Argument was a null pointer.
065 */
066 public TextInput(final Reader reader) throws IOException {
067 try {
068 if (reader == null) {
069 throw new NullPointerException(
070 "no null pointer as argument accepted");
071 }
072 this.source = new StringBuffer();
073 // TODO mime 20080307: optimize reading
074 int c;
075 while (-1 != (c = reader.read())) {
076 this.source.append((char) c);
077 }
078 } finally {
079 IoUtility.close(reader);
080 }
081 }
082
083 /**
084 * Constructor using <code>StringBuffer</code> source.
085 *
086 * @param source data source
087 * @throws NullPointerException Argument was a null pointer.
088 */
089 public TextInput(final StringBuffer source) {
090 if (source == null) {
091 throw new NullPointerException(
092 "no null pointer as argument accepted");
093 }
094 this.source = source;
095 }
096
097 /**
098 * Constructor using <code>String</code> source.
099 *
100 * @param source data source
101 * @throws NullPointerException Argument was a null pointer.
102 */
103 public TextInput(final String source) {
104 if (source == null) {
105 throw new NullPointerException(
106 "no null pointer as argument accepted");
107 }
108 this.source = new StringBuffer(source);
109 }
110
111
112 /**
113 * Constructor using <code>FILE</code> source.
114 *
115 * @param file Data source.
116 * @param encoding Take this encoding for file.
117 * @throws IOException File reading failed.
118 * @throws NullPointerException One argument was a null pointer.
119 */
120 public TextInput(final File file, final String encoding) throws IOException {
121 if (file == null) {
122 throw new NullPointerException(
123 "no null pointer as argument accepted");
124 }
125 this.source = new StringBuffer();
126 IoUtility.loadFile(file, source, encoding);
127 }
128
129 /**
130 * Reads a single character and increments the reading position
131 * by one. If no characters are left, <code>-1</code> is returned.
132 * Otherwise a cast to <code>char</code> gives the character read.
133 *
134 * @return Character read, if there are no more chars
135 * <code>-1</code> is returned.
136 */
137 public final int read() {
138 if (position >= source.length()) {
139 return EOF;
140 }
141 if (getChar() == CR) {
142 lineNumber++;
143 column = 0;
144 } else {
145 column++;
146 }
147 return source.charAt(position++);
148 }
149
150 /**
151 * Decrements the reading position by one and reads a single character.
152 * If no characters are left, <code>-1</code> is returned.
153 * Otherwise a cast to <code>char</code> gives the character read.
154 *
155 * @return Character read, if there are no more chars
156 * <code>-1</code> is returned.
157 */
158 public final int readInverse() {
159 if (position <= 0) {
160 return -1;
161 }
162 final char c = source.charAt(--position);
163 if (c == CR) {
164 lineNumber--;
165 int pos = source.lastIndexOf("" + CR, position - 1);
166 if (pos < 0) {
167 column = position;
168 } else {
169 column = position - 1 - pos;
170 }
171 } else {
172 column--;
173 if (column < 0) {
174 throw new IllegalStateException("column less then 0");
175 }
176 }
177 return c;
178 }
179
180 /**
181 * Reads a given amount of characters and increments the reading position
182 * accordingly.
183 *
184 * @param number amount of characters to read
185 * @return string read
186 */
187 public final String readString(final int number) {
188 final StringBuffer result = new StringBuffer(number);
189 for (int i = 0; i < number; i++) {
190 final int c = read();
191 if (c != -1) {
192 result.append((char) c);
193 } else {
194 break;
195 }
196 }
197 return result.toString();
198 }
199
200 /**
201 * Skips a given amount of characters and increments the reading position
202 * accordingly.
203 *
204 * @param number Amount of characters to read. Must be positive - if not nothing happens.
205 */
206 public final void forward(final int number) {
207 for (int i = 0; i < number; i++) {
208 final int c = read();
209 if (c == -1) {
210 break;
211 }
212 }
213 }
214
215 /**
216 * Skips until a given keyword is reached. The position afterwards is at the start
217 * of the keyword or at the end of the text (if the keyword is not found).
218 *
219 * @param search Look for this keyword.
220 * @return Was the keyword found?
221 */
222 public final boolean forward(final String search) {
223 final int pos = source.indexOf(search, position);
224 if (pos < 0) {
225 setPosition(getMaximumPosition());
226 return false;
227 }
228 setPosition(pos);
229 return true;
230 }
231
232 /**
233 * Reads a single character and does not change the reading
234 * position. If no characters are left, <code>-1</code> is returned.
235 * Otherwise a cast to <code>char</code> gives the character read.
236 *
237 * @return Character read at current position, if there are no more chars
238 * <code>-1</code> is returned
239 */
240 public final int getChar() {
241 if (position >= source.length()) {
242 return -1;
243 }
244 return source.charAt(position);
245 }
246
247 /**
248 * Reads a single character and does not change the reading
249 * position. If offset addition leads out of the source,
250 * <code>-1</code> is returned. Otherwise a cast to <code>char</code>
251 * gives the character read.
252 *
253 * @param skip Offset from current reading position. Maybe negative.
254 * @return Character read, if position is out of scope
255 * <code>-1</code> is returned.
256 */
257 public final int getChar(final int skip) {
258 if (position + skip < 0 || position + skip >= source.length()) {
259 return -1;
260 }
261 return source.charAt(position + skip);
262 }
263
264
265 /**
266 * Reads a substring. Doesn't change reading position. Throws never an Exception.
267 *
268 * @param from Absolute reading position.
269 * @param to Read to this position.
270 * @return Resulting string.
271 */
272 public final String getSubstring(final int from, final int to) {
273 if (from >= to) {
274 return "";
275 }
276 final int l = source.length();
277 final int f = (from < 0 ? 0 : (from > l ? l : from));
278 final int t = (to < 0 ? 0 : (to > l ? l : to));
279 return source.substring(f, t);
280 }
281
282 /**
283 * Get complete input source. Doesn't change reading position.
284 *
285 * @return Complete input string.
286 */
287 public final String asString() {
288 return source.toString();
289 }
290
291 /**
292 * Replace given interval with given string.
293 * If the current reading position is in the interval it is set
294 * to the end of the interval.
295 *
296 * @param from Absolute reading position.
297 * @param to Read to this position.
298 * @param replacement Replacement for interval.
299 */
300 public final void replace(final int from, final int to, final String replacement) {
301 source.replace(from, to, replacement);
302 if (position > from && position < to) {
303 setPosition(from + replacement.length());
304 } else if (position > from) { // correct row (and column) information
305 setPosition(position - to + from + replacement.length());
306 }
307 }
308
309 /**
310 * Skips white space, beginning from reading position.
311 * Changes reading position to next non white space
312 * character.
313 */
314 public final void skipWhiteSpace() {
315 while (!isEmpty() && Character.isWhitespace((char) getChar())) {
316 read();
317 }
318 }
319
320 /**
321 * Skips white space, beginning from reading position.
322 * Changes reading position to next non white space
323 * character.
324 */
325 public final void skipWhiteSpaceInverse() {
326 while (getPosition() > 0 && Character.isWhitespace((char) getChar(-1))) {
327 readInverse();
328 }
329 }
330
331 /**
332 * Skip current position back to beginning of an XML tag.
333 * This is mainly something like <code><tagName</code>.
334 *
335 * @throws IllegalArgumentException No begin of XML tag found.
336 */
337 public final void skipBackToBeginOfXmlTag() {
338 if ('<' == getChar()) {
339 return;
340 }
341 boolean quoted = false;
342 do {
343 if (-1 == readInverse()) {
344 throw new IllegalArgumentException("begin of xml tag not found");
345 }
346 if ('\"' == getChar()) {
347 quoted = !quoted;
348 }
349 } while (quoted || '<' != getChar());
350 }
351
352 /**
353 * Skip forward to end of line.
354 */
355 public final void skipToEndOfLine() {
356 int c = 0;
357 do {
358 c = read();
359 } while (!isEmpty() && c != CR);
360 }
361
362 /**
363 * Skip current position forward to end of an XML tag. It is assumed the current position is
364 * within the the XML tag. Now we search for <code>></code>. Quoted data is skipped.
365 *
366 * @throws IllegalArgumentException No end of XML tag found.
367 */
368 public final void skipForwardToEndOfXmlTag() {
369 if ('>' == getChar()) {
370 return;
371 }
372 boolean quoted = false;
373 while (!isEmpty() && (quoted || '>' != getChar())) {
374 int c = read();
375 if ('\"' == c) {
376 quoted = !quoted;
377 }
378 }
379 if (isEmpty()) {
380 throw new IllegalArgumentException("end of xml tag not found");
381 }
382 read(); // skip '>'
383 }
384
385 /**
386 * Reads tag or attribute name out of XML stream. Whitespace is skipped and
387 * characters are read till "=" or ">" or "< or whitespace is
388 * found. We must be within the tag, so we can not start with something like <.
389 *
390 * @return Name of tag or attribute.
391 * @throws IllegalArgumentException Next non white space character is "="
392 * or ">".
393 */
394 public final String readNextXmlName() {
395 skipWhiteSpace();
396 if (isEmpty() || '=' == getChar() || '>' == getChar() || '<' == getChar()) {
397 throw new IllegalArgumentException(
398 "begin of attribute or tag expected");
399 }
400 StringBuffer buffer = new StringBuffer();
401 while (!isEmpty() && '=' != getChar() && '>' != getChar() && '<' != getChar()
402 && !Character.isWhitespace((char) getChar())) {
403 buffer.append((char) read());
404 }
405 return buffer.toString();
406 }
407
408 /**
409 * Reads attribute value out of XML stream. Whitespace is skipped and an "="
410 * is expected to follow. Again whitespace is skipped. If no quotation mark follows
411 * characters are read till whitespace or ">" occurs. Otherwise data is
412 * read till an ending quotation mark comes.
413 *
414 * @return Value read.
415 * @throws IllegalArgumentException Following had not one of the following forms:
416 * <pre>
417 * = "value"
418 * </pre>
419 * <pre>
420 * = value
421 * </pre>
422 */
423 public final String readNextAttributeValue() {
424 skipWhiteSpace();
425 if (isEmpty() || '=' != getChar()) {
426 throw new IllegalArgumentException(
427 "\"=\" expected");
428 }
429 read(); // read =
430 skipWhiteSpace();
431 if (isEmpty() || '>' == getChar()) {
432 throw new IllegalArgumentException(
433 "attribute value expected");
434 }
435 StringBuffer buffer = new StringBuffer();
436 if ('\"' == getChar()) {
437 read(); // read "
438 while (!isEmpty() && '\"' != getChar()) {
439 buffer.append((char) read());
440 }
441 if ('\"' != getChar()) {
442 throw new IllegalArgumentException("\" expected");
443 }
444 read(); // read "
445 } else {
446 while (!isEmpty() && '>' != getChar()
447 && !Character.isWhitespace((char) getChar())) {
448 buffer.append((char) read());
449 }
450 }
451 return StringUtility.unescapeXml(buffer.toString());
452 }
453
454 /**
455 * Is there no data left for reading?
456 *
457 * @return is all data read?
458 */
459 public final boolean isEmpty() {
460 return position >= source.length();
461 }
462
463 /**
464 * Is there no data left for reading after skipping?
465 *
466 * @param skip Add this number to current position.
467 * @return Is data empty at that new position?
468 */
469 public final boolean isEmpty(final int skip) {
470 return position + skip >= source.length();
471 }
472
473 /**
474 * Reads the next string containing only letters or digits,
475 * leading whitespace is skipped.
476 * Changes reading position.
477 *
478 * @return read string
479 * @throws IllegalArgumentException if no such characters could
480 * be found
481 */
482 public final String readLetterDigitString() {
483 skipWhiteSpace();
484 if (isEmpty() || !Character.isLetterOrDigit((char) getChar())) {
485 read(); // for showing correct position
486 throw new IllegalArgumentException(
487 "letter or digit expected");
488 }
489 StringBuffer buffer = new StringBuffer();
490 while (!isEmpty() && Character.isLetterOrDigit((char) getChar())) {
491 buffer.append((char) read());
492 }
493 return buffer.toString();
494 }
495
496 /**
497 * Reads the next string until whitespace occurs,
498 * leading whitespace is skipped.
499 * Changes (probably) reading position.
500 *
501 * @return read string
502 */
503 public final String readStringTilWhitespace() {
504 skipWhiteSpace();
505 StringBuffer buffer = new StringBuffer();
506 while (!isEmpty() && !Character.isWhitespace((char) getChar())) {
507 buffer.append((char) read());
508 }
509 return buffer.toString();
510 }
511
512 /**
513 * Reads the next integer, leading whitespace is skipped. Signs like - or + are not
514 * accepted. Resulting integer
515 * Changes reading position.
516 *
517 * @return Read integer.
518 * @throws IllegalArgumentException if no digits where found or
519 * the number was to big for an <code>int</code>
520 */
521 public final int readNonNegativeInt() {
522 skipWhiteSpace();
523 if (isEmpty() || !Character.isDigit((char) getChar())) {
524 read(); // for showing correct position
525 throw new IllegalArgumentException(
526 "digit expected");
527 }
528 StringBuffer buffer = new StringBuffer();
529 while (!isEmpty() && Character.isDigit((char) getChar())) {
530 buffer.append((char) read());
531 }
532 final BigInteger big = new BigInteger(buffer.toString());
533 if (1 == big.compareTo(maxIntValue)) {
534 throw new IllegalArgumentException("this integer is to large! Maximum possible value is "
535 + maxIntValue);
536 }
537 return big.intValue();
538 }
539
540 /**
541 * Reads the next (big) integer, leading whitespace is skipped.
542 * The first character might be a minus sign, the rest must be
543 * digits. Leading zero digits are not allowed, also "-0" is not
544 * accepted. <p>
545 * Changes reading position.
546 *
547 * @return read integer
548 * @throws IllegalArgumentException if no digits where found.
549 */
550 public final String readCounter() {
551 skipWhiteSpace();
552 if (isEmpty()) {
553 throw new IllegalArgumentException("integer expected");
554 }
555 StringBuffer buffer = new StringBuffer();
556 if (getChar() == '-') {
557 buffer.append(read());
558 }
559 final int begin = getPosition();
560 if (!Character.isDigit((char) getChar())) {
561 throw new IllegalArgumentException("digit expected");
562 }
563 while (!isEmpty() && Character.isDigit((char) getChar())) {
564 buffer.append((char) read());
565 }
566 if (buffer.length() >= 2 && ('0' == buffer.charAt(0)
567 || '-' == buffer.charAt(0) && '0' == buffer.charAt(1))) {
568 setPosition(begin); // for showing correct position
569 throw new IllegalArgumentException("no leading zeros allowed");
570 }
571 return buffer.toString();
572 }
573
574 /**
575 * Reads the next quoted string, leading whitespace is skipped.
576 * A correctly quoted string could be created by adding a leading and
577 * a trailing quote character and doubling each other quote character.
578 * The resulting string is dequoted.
579 * Changes reading position.
580 *
581 * @return Dequoted string read.
582 * @throws IllegalArgumentException No correctly quoted string was found.
583 */
584 public final String readQuoted() {
585 skipWhiteSpace();
586 if (isEmpty() || read() != '\"') {
587 throw new IllegalArgumentException(
588 "\" expected");
589 }
590 StringBuffer unquoted = new StringBuffer();
591 char c;
592 do {
593 if (isEmpty()) {
594 throw new IllegalArgumentException(
595 "ending \" expected");
596 }
597 c = (char) read();
598 if (c != '\"') {
599 unquoted.append(c);
600 } else { // c == '\"'
601 if (isEmpty() || getChar() != '\"') {
602 break; // success
603 }
604 unquoted.append((char) read());
605 }
606 } while (true);
607 return unquoted.toString();
608 }
609
610 /**
611 * Returns the current line number.
612 *
613 * @return Current line number (starting with line 1).
614 */
615 public final int getRow() {
616 return lineNumber + 1;
617 }
618
619 /**
620 * Returns the current column number.
621 *
622 * @return Current column number (starting with line 1).
623 */
624 public final int getColumn() {
625 return column + 1;
626 }
627
628 /**
629 * Returns the current line.
630 *
631 * @return Current line.
632 */
633 public final String getLine() {
634 int min = position - 1;
635 while (min >= 0 && source.charAt(min) != CR) {
636 min--;
637 }
638 int max = position;
639 while (max < source.length()
640 && source.charAt(max) != CR) {
641 max++;
642 }
643 if (min + 1 >= max) {
644 return "";
645 }
646 return source.substring(min + 1, max);
647 }
648
649 /**
650 * Returns the current position. Starting with 0. This is the number of characters
651 * from the beginning.
652 *
653 * @return Current position.
654 */
655 public final int getPosition() {
656 return position;
657 }
658
659 /**
660 * Returns the current position.
661 *
662 * @return Current position.
663 */
664 public final SourcePosition getSourcePosition() {
665 return new SourcePosition(getRow(), getColumn());
666 }
667
668 /**
669 * Returns the highest position number possible. This is equal
670 * to the length of the source.
671 *
672 * @return Maximum position.
673 */
674 public final int getMaximumPosition() {
675 return source.length();
676 }
677
678 /**
679 * Sets the current position (and indirectly the row and column number).
680 *
681 * @param position Set current position to this value.
682 */
683 public final void setPosition(final int position) {
684 if (position >= source.length()) {
685 this.position = source.length();
686 } else if (this.position != position) {
687 if (position < this.position) {
688 this.position = 0;
689 this.lineNumber = 0;
690 this.column = 0;
691 for (int i = 0; i < position; i++) { // Q & D
692 read();
693 }
694 } else {
695 for (int i = this.position; i < position; i++) {
696 read();
697 }
698 }
699 }
700 }
701
702 /**
703 * Sets the current position (and indirectly the row and column number).
704 *
705 * @param position Set current position to this value.
706 */
707 public final void setPosition(final SourcePosition position) {
708 setRow(position.getRow());
709 setColumn(position.getColumn());
710 }
711
712 /**
713 * Adds a given position to the current one and changes the row and column number accordingly.
714 * A delta position with one row and one column doesn't change the current position.
715 *
716 * @param delta Add this position to current one.
717 */
718 public final void addPosition(final SourcePosition delta) {
719 addRow(delta.getRow() - 1);
720 addColumn(delta.getColumn() - 1);
721 }
722
723 /**
724 * Sets the current line number (and indirectly the position).
725 *
726 * @param row Move to this line number.
727 */
728 public final void setRow(final int row) {
729 int r = row;
730 // check if row is under lower bound
731 if (r <= 0) {
732 r = 1;
733 }
734 // check if already at wanted position
735 if (getRow() == r) {
736 return;
737 }
738 // check if already at end of file
739 if (getPosition() >= source.length() && getRow() >= r) {
740 return;
741 }
742 if (getRow() > r) {
743 // reset to begin of file
744 this.position = 0;
745 this.lineNumber = 0;
746 this.column = 0;
747 }
748 for (int i = 0; getRow() < r; i++) {
749 if (EOF == read()) {
750 return;
751 }
752 }
753 }
754
755 /**
756 * Get given byte position as {@link SourcePosition}.
757 *
758 * @param find Get row and column information for this byte position.
759 * @return Row and column information.
760 */
761 public final SourcePosition getPosition(final int find) {
762 int r = 0;
763 int c = 0;
764 int i = 0;
765 while (i < source.length() && i < find) {
766 if (CR == source.charAt(i)) {
767 r++;
768 c = 0;
769 } else {
770 c++;
771 }
772 i++;
773 }
774 return new SourcePosition(r + 1, c + 1);
775 }
776
777 /**
778 * Get given byte position as {@link SourcePosition}.
779 *
780 * @param position Get row and column information for this byte position.
781 * @return Row and column information.
782 */
783 public final int getPosition(final SourcePosition position) {
784 int find = 0;
785 int r = 0;
786 while (++r < position.getRow() && -1 < (find = source.indexOf("" + CR, find))) {
787 // nothing to do
788 }
789 if (find < 0) {
790 find = source.length();
791 }
792 find += position.getColumn();
793 if (find > source.length()) {
794 find = source.length();
795 }
796 return find;
797 }
798
799 /**
800 * Get source area as string.
801 *
802 * @param area Get this area as string.
803 * @return Area itself.
804 */
805 public final String getSourceArea(final SourceArea area) {
806 return source.substring(getPosition(area.getStartPosition()),
807 getPosition(area.getEndPosition()));
808 }
809
810 /**
811 * Add the following rows and reset column (if <code>number == 0</code>).
812 *
813 * @param number Add this number of rows.
814 */
815 public final void addRow(final int number) {
816 setRow(getRow() + number);
817 }
818
819 /**
820 * Sets the current column position (and indirectly the position).
821 * If <code>column</code> is out of range the minimum value (1) or the maximum possible column
822 * value is taken.
823 *
824 * @param column Move to this column. First column has the number one.
825 */
826 public final void setColumn(final int column) {
827 int c = column;
828 // check if column is out of lower bound
829 if (c <= 0) {
830 c = 1;
831 }
832 // check if already at wanted position
833 if (getColumn() == c) {
834 return;
835 }
836 if (getColumn() > c) {
837 do {
838 this.position--;
839 this.column--;
840 } while (getColumn() > c);
841 return;
842 }
843 while (getChar() != CR && getChar() != EOF && getColumn() < c) {
844 read();
845 }
846 }
847
848 /**
849 * Add the following columns.
850 *
851 * @param number Add this number of columns.
852 */
853 public final void addColumn(final int number) {
854 setColumn(getColumn() + number);
855 }
856
857 /**
858 * Show reading position.
859 *
860 * @return current line with mark at current reading position
861 */
862 public final String showLinePosition() {
863 final String line = getLine();
864 final StringBuffer buffer = new StringBuffer();
865 final int col = getColumn() - 1;
866 if (col > 0) {
867 if (col < line.length()) {
868 buffer.append(line.substring(0, col));
869 } else {
870 buffer.append(line);
871 }
872 }
873 buffer.append(MARKER);
874 if (col < line.length()) {
875 buffer.append(line.substring(col));
876 }
877 return buffer.toString();
878 }
879
880 // LATER mime 20050608: remove if no use
881 /*
882 public final int findCaretPosition(final int line, final int column, final String source) {
883 if (line == 1) {
884 return 0;
885 }
886 int k = 1;
887 for (int j = 0; j < source.length(); j++) {
888 if (source.charAt(j) == '\n') {
889 k++;
890 }
891 if (k == line) {
892 j += column - 1;
893 if (j > source.length()) {
894 j = source.length();
895 }
896 return j;
897 }
898 }
899 return 0;
900 }
901 */
902
903 }
|