001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2011, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.base.io;
017
018 import java.io.File;
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022
023 import org.qedeq.base.utility.StringUtility;
024
025
026 /**
027 * This class provides convenient methods for parsing input.
028 *
029 * @author Michael Meyling
030 */
031 public class TextInput extends InputStream {
032
033 /** Char marking end of data. */
034 public static final int EOF = -1;
035
036 /** Char marking end of input line. */
037 // public final static char CR = '\n'; // LATER mime 20050613: delete if running on all platforms
038 public static final char CR = '\012';
039
040 /** String for marking current reading position. */
041 private static final String MARKER = "#####";
042
043 /** Holds the data. */
044 private final StringBuffer source;
045
046 /** Current line number (starting with 0). */
047 private int lineNumber = 0;
048
049 /** Current column (starting with 0). */
050 private int column = 0;
051
052 /** Current reading position (starting with 0). */
053 private int position = 0;
054
055 /**
056 * Constructor using <code>Reader</code> source.
057 *
058 * @param reader Data source. This reader will be closed (even if reading fails).
059 * @throws IOException Reading failed.
060 * @throws NullPointerException Argument was a null pointer.
061 */
062 public TextInput(final Reader reader) throws IOException {
063 try {
064 if (reader == null) {
065 throw new NullPointerException(
066 "no null pointer as argument accepted");
067 }
068 this.source = new StringBuffer();
069 // TODO mime 20080307: optimize reading
070 int c;
071 while (-1 != (c = reader.read())) {
072 this.source.append((char) c);
073 }
074 } finally {
075 IoUtility.close(reader);
076 }
077 }
078
079 /**
080 * Constructor using <code>StringBuffer</code> source.
081 *
082 * @param source data source
083 * @throws NullPointerException Argument was a null pointer.
084 */
085 public TextInput(final StringBuffer source) {
086 if (source == null) {
087 throw new NullPointerException(
088 "no null pointer as argument accepted");
089 }
090 this.source = source;
091 }
092
093 /**
094 * Constructor using <code>String</code> source.
095 *
096 * @param source data source
097 * @throws NullPointerException Argument was a null pointer.
098 */
099 public TextInput(final String source) {
100 if (source == null) {
101 throw new NullPointerException(
102 "no null pointer as argument accepted");
103 }
104 this.source = new StringBuffer(source);
105 }
106
107
108 /**
109 * Constructor using <code>FILE</code> source.
110 *
111 * @param file Data source.
112 * @param encoding Take this encoding for file.
113 * @throws IOException File reading failed.
114 * @throws NullPointerException One argument was a null pointer.
115 */
116 public TextInput(final File file, final String encoding) throws IOException {
117 if (file == null) {
118 throw new NullPointerException(
119 "no null pointer as argument accepted");
120 }
121 this.source = new StringBuffer();
122 IoUtility.loadFile(file, source, encoding);
123 }
124
125 /**
126 * Reads a single character and increments the reading position
127 * by one. If no characters are left, <code>-1</code> is returned.
128 * Otherwise a cast to <code>char</code> gives the character read.
129 *
130 * @return Character read, if there are no more chars
131 * <code>-1</code> is returned.
132 */
133 public final int read() {
134 if (position >= source.length()) {
135 return EOF;
136 }
137 if (getChar() == CR) {
138 lineNumber++;
139 column = 0;
140 } else {
141 column++;
142 }
143 return source.charAt(position++);
144 }
145
146 /**
147 * Decrements the reading position by one and reads a single character.
148 * If no characters are left, <code>-1</code> is returned.
149 * Otherwise a cast to <code>char</code> gives the character read.
150 *
151 * @return Character read, if there are no more chars
152 * <code>-1</code> is returned.
153 */
154 public final int readInverse() {
155 if (position <= 0) {
156 return -1;
157 }
158 final char c = source.charAt(--position);
159 if (c == CR) {
160 lineNumber--;
161 int pos = source.lastIndexOf("" + CR, position - 1);
162 if (pos < 0) {
163 column = position;
164 } else {
165 column = position - 1 - pos;
166 }
167 } else {
168 column--;
169 if (column < 0) {
170 throw new IllegalStateException("column less then 0");
171 }
172 }
173 return c;
174 }
175
176 /**
177 * Reads a given amount of characters and increments the reading position
178 * accordingly.
179 *
180 * @param number amount of characters to read
181 * @return string read
182 */
183 public final String readString(final int number) {
184 final StringBuffer result = new StringBuffer(number);
185 for (int i = 0; i < number; i++) {
186 final int c = read();
187 if (c != -1) {
188 result.append((char) c);
189 } else {
190 break;
191 }
192 }
193 return result.toString();
194 }
195
196 /**
197 * Skips a given amount of characters and increments the reading position
198 * accordingly.
199 *
200 * @param number Amount of characters to read
201 */
202 public final void forward(final int number) {
203 for (int i = 0; i < number; i++) {
204 final int c = read();
205 if (c == -1) {
206 break;
207 }
208 }
209 }
210
211 /**
212 * Skips until a given keyword is reached. The position afterwards is at the start
213 * of the keyword or at the end of the text (if the keyword is not found).
214 *
215 * @param search Look for this keyword.
216 * @return Was the keyword found?
217 */
218 public final boolean forward(final String search) {
219 final int pos = source.indexOf(search, position);
220 if (pos < 0) {
221 setPosition(getMaximumPosition());
222 return false;
223 }
224 setPosition(pos);
225 return true;
226 }
227
228 /**
229 * Reads a single character and does not change the reading
230 * position. If no characters are left, <code>-1</code> is returned.
231 * Otherwise a cast to <code>char</code> gives the character read.
232 *
233 * @return Character read at current position, if there are no more chars
234 * <code>-1</code> is returned
235 */
236 public final int getChar() {
237 if (position >= source.length()) {
238 return -1;
239 }
240 return source.charAt(position);
241 }
242
243 /**
244 * Reads a single character and does not change the reading
245 * position. If offset addition leads out of the source,
246 * <code>-1</code> is returned. Otherwise a cast to <code>char</code>
247 * gives the character read.
248 *
249 * @param skip Offset from current reading position. Maybe negative.
250 * @return Character read, if position is out of scope
251 * <code>-1</code> is returned.
252 */
253 public final int getChar(final int skip) {
254 if (position + skip < 0 || position + skip >= source.length()) {
255 return -1;
256 }
257 return source.charAt(position + skip);
258 }
259
260
261 /**
262 * Reads a substring. Doesn't change reading position.
263 *
264 * @param from Absolute reading position.
265 * @param to Read to this position.
266 * @return Resulting string.
267 * @throws StringIndexOutOfBoundsException If from > to.
268 */
269 public final String getSubstring(final int from, final int to) {
270 final int l = source.length();
271 final int f = (from < 0 ? 0 : (from > l ? l : from));
272 final int t = (to < 0 ? 0 : (to > l ? l : to));
273 return source.substring(f, t);
274 }
275
276 /**
277 * Get complete input source. Doesn't change reading position.
278 *
279 * @return Complete input string.
280 */
281 public final String asString() {
282 return source.toString();
283 }
284
285 /**
286 * Replace given interval with given string.
287 * If the current reading position is in the interval it is set
288 * to the end of the interval.
289 *
290 * @param from Absolute reading position.
291 * @param to Read to this position.
292 * @param replacement Replacement for interval.
293 */
294 public final void replace(final int from, final int to, final String replacement) {
295 source.replace(from, to, replacement);
296 if (position > from && position < to) {
297 setPosition(from + replacement.length());
298 } else if (position > from) { // correct row (and column) information
299 setPosition(position - to + from + replacement.length());
300 }
301 }
302
303 /**
304 * Skips white space, beginning from reading position.
305 * Changes reading position to next non white space
306 * character.
307 */
308 public final void skipWhiteSpace() {
309 while (!isEmpty() && Character.isWhitespace((char) getChar())) {
310 read();
311 }
312 }
313
314 /**
315 * Skips white space, beginning from reading position.
316 * Changes reading position to next non white space
317 * character.
318 */
319 public final void skipWhiteSpaceInverse() {
320 while (getPosition() > 0 && Character.isWhitespace((char) getChar(-1))) {
321 readInverse();
322 }
323 }
324
325 /**
326 * Skip current position back to beginning of an XML tag.
327 * This is mainly something like <code><tagName</code>.
328 *
329 * @throws IllegalArgumentException No begin of XML tag found.
330 */
331 public final void skipBackToBeginOfXmlTag() {
332 if ('<' == getChar()) {
333 return;
334 }
335 boolean quoted = false;
336 do {
337 if (-1 == readInverse()) {
338 throw new IllegalArgumentException("begin of xml tag not found");
339 }
340 if ('\"' == getChar()) {
341 quoted = !quoted;
342 }
343 } while (quoted || '<' != getChar());
344 }
345
346 /**
347 * Skip current position forward to end of an XML tag.
348 * This is mainly something like <code>></code>. Quoted data is skipped.
349 *
350 * @throws IllegalArgumentException No end of XML tag found.
351 */
352 public final void skipForwardToEndOfXmlTag() {
353 if ('>' == getChar()) {
354 return;
355 }
356 boolean quoted = false;
357 while ('>' != getChar()) {
358 if ('\"' == getChar()) {
359 quoted = !quoted;
360 }
361 if (!quoted) {
362 if (-1 == read()) {
363 throw new IllegalArgumentException("end of xml tag not found");
364 }
365 }
366 }
367 read(); // skip '>'
368 }
369
370 /**
371 * Reads tag or attribute name out of XML stream. Whitespace is skipped and
372 * characters are read till "=" or ">" or whitespace is found.
373 *
374 * @return Name of tag or attribute.
375 * @throws IllegalArgumentException Next non white space character is "="
376 * or ">".
377 */
378 public final String readNextXmlName() {
379 skipWhiteSpace();
380 if (isEmpty() || '=' == getChar() || '>' == getChar()) {
381 throw new IllegalArgumentException(
382 "begin of attribute expected");
383 }
384 StringBuffer buffer = new StringBuffer();
385 while (!isEmpty() && '=' != getChar() && '>' != getChar()
386 && !Character.isWhitespace((char) getChar())) {
387 buffer.append((char) read());
388 }
389 return buffer.toString();
390 }
391
392 /**
393 * Reads attribute value out of XML stream. Whitespace is skipped and an "="
394 * is expected to follow. Again whitespace is skipped. If no quotation mark follows
395 * characters are read till whitespace or ">" occurs. Otherwise data is
396 * read till an ending quotation mark comes.
397 *
398 * @return Value read.
399 * @throws IllegalArgumentException Following had not one of the following forms:
400 * <pre>
401 * = "value"
402 * </pre>
403 * <pre>
404 * = value
405 * </pre>
406 */
407 public final String readNextAttributeValue() {
408 skipWhiteSpace();
409 if (isEmpty() || '=' != getChar()) {
410 throw new IllegalArgumentException(
411 "\"=\" expected");
412 }
413 read(); // read =
414 skipWhiteSpace();
415 if (isEmpty() || '>' == getChar()) {
416 throw new IllegalArgumentException(
417 "attribute value expected");
418 }
419 StringBuffer buffer = new StringBuffer();
420 if ('\"' == getChar()) {
421 read(); // read "
422 while (!isEmpty() && '\"' != getChar()) {
423 buffer.append((char) read());
424 }
425 if ('\"' != getChar()) {
426 throw new IllegalArgumentException("\" expected");
427 }
428 read(); // read "
429 } else {
430 while (!isEmpty() && '>' != getChar()
431 && !Character.isWhitespace((char) getChar())) {
432 buffer.append((char) read());
433 }
434 }
435 return StringUtility.unescapeXml(buffer.toString());
436 }
437
438 /**
439 * Is there no data left for reading?
440 *
441 * @return is all data read?
442 */
443 public final boolean isEmpty() {
444 return position >= source.length();
445 }
446
447 /**
448 * Is there no data left for reading after skipping?
449 *
450 * @param skip Add this number to current position.
451 * @return Is data empty at that new position?
452 */
453 public final boolean isEmpty(final int skip) {
454 return position + skip >= source.length();
455 }
456
457 /**
458 * Reads the next string containing only letters or digits,
459 * leading whitespace is skipped.
460 * Changes reading position.
461 *
462 * @return read string
463 * @throws IllegalArgumentException if no such characters could
464 * be found
465 */
466 public final String readLetterDigitString() {
467 skipWhiteSpace();
468 if (isEmpty() || !Character.isLetterOrDigit((char) getChar())) {
469 read(); // for showing correct position
470 throw new IllegalArgumentException(
471 "letter or digit expected");
472 }
473 StringBuffer buffer = new StringBuffer();
474 while (!isEmpty() && Character.isLetterOrDigit((char) getChar())) {
475 buffer.append((char) read());
476 }
477 return buffer.toString();
478 }
479
480 /**
481 * Reads the next (big) integer, leading whitespace is skipped.
482 * The first character might be a minus sign, the rest must be
483 * digits. Leading zero digits are not allowed, also "-0" is not
484 * accepted. <p>
485 * Changes reading position.
486 *
487 * @return read integer
488 * @throws IllegalArgumentException if no digits where found or
489 * the number was to big for an <code>int</code>
490 */
491 public final String readCounter() {
492 skipWhiteSpace();
493 if (isEmpty()) {
494 throw new IllegalArgumentException("integer expected");
495 }
496 StringBuffer buffer = new StringBuffer();
497 if (getChar() == '-') {
498 buffer.append(read());
499 }
500 final int begin = getPosition();
501 if (!Character.isDigit((char) getChar())) {
502 throw new IllegalArgumentException("digit expected");
503 }
504 while (!isEmpty() && Character.isDigit((char) getChar())) {
505 buffer.append((char) read());
506 }
507 if (buffer.length() >= 2 && ('0' == buffer.charAt(0)
508 || '-' == buffer.charAt(0) && '0' == buffer.charAt(1))) {
509 setPosition(begin); // for showing correct position
510 throw new IllegalArgumentException("no leading zeros allowed");
511 }
512 return buffer.toString();
513 }
514
515 /**
516 * Reads the next quoted string, leading whitespace is skipped.
517 * A correctly quoted string could be created by adding a leading and
518 * a trailing quote character and doubling each other quote character.
519 * The resulting string is dequoted.
520 * Changes reading position.
521 *
522 * @return Dequoted string read.
523 * @throws IllegalArgumentException No correctly quoted string was found.
524 */
525 public final String readQuoted() {
526 skipWhiteSpace();
527 if (isEmpty() || read() != '\"') {
528 throw new IllegalArgumentException(
529 "\" expected");
530 }
531 StringBuffer unquoted = new StringBuffer();
532 char c;
533 do {
534 if (isEmpty()) {
535 throw new IllegalArgumentException(
536 "ending \" expected");
537 }
538 c = (char) read();
539 if (c != '\"') {
540 unquoted.append(c);
541 } else { // c == '\"'
542 if (isEmpty() || getChar() != '\"') {
543 break; // success
544 }
545 unquoted.append((char) read());
546 }
547 } while (true);
548 return unquoted.toString();
549 }
550
551 /**
552 * Returns the current line number.
553 *
554 * @return Current line number (starting with line 1).
555 */
556 public final int getRow() {
557 return lineNumber + 1;
558 }
559
560 /**
561 * Returns the current column number.
562 *
563 * @return Current column number (starting with line 1).
564 */
565 public final int getColumn() {
566 return column + 1;
567 }
568
569 /**
570 * Returns the current line.
571 *
572 * @return Current line.
573 */
574 public final String getLine() {
575 int min = position - 1;
576 while (min >= 0 && source.charAt(min) != CR) {
577 min--;
578 }
579 int max = position;
580 while (max < source.length()
581 && source.charAt(max) != CR) {
582 max++;
583 }
584 if (min + 1 >= max) {
585 return "";
586 }
587 return source.substring(min + 1, max);
588 }
589
590 /**
591 * Returns the current position. Starting with 0. This is the number of characters
592 * from the beginning.
593 *
594 * @return Current position.
595 */
596 public final int getPosition() {
597 return position;
598 }
599
600 /**
601 * Returns the current position.
602 *
603 * @return Current position.
604 */
605 public final SourcePosition getSourcePosition() {
606 return new SourcePosition(getRow(), getColumn());
607 }
608
609 /**
610 * Returns the highest position number possible. This is equal
611 * to the length of the source.
612 *
613 * @return Maximum position.
614 */
615 public final int getMaximumPosition() {
616 return source.length();
617 }
618
619 /**
620 * Sets the current position (and indirectly the row and column number).
621 *
622 * @param position Set current position to this value.
623 */
624 public final void setPosition(final int position) {
625 if (position >= source.length()) {
626 this.position = source.length();
627 } else if (this.position != position) {
628 if (position < this.position) {
629 this.position = 0;
630 this.lineNumber = 0;
631 this.column = 0;
632 for (int i = 0; i < position; i++) { // Q & D
633 read();
634 }
635 } else {
636 for (int i = this.position; i < position; i++) {
637 read();
638 }
639 }
640 }
641 }
642
643 /**
644 * Sets the current position (and indirectly the row and column number).
645 *
646 * @param position Set current position to this value.
647 */
648 public final void setPosition(final SourcePosition position) {
649 setRow(position.getRow());
650 setColumn(position.getColumn());
651 }
652
653 /**
654 * Adds a given position to the current one and changes the row and column number accordingly.
655 * A delta position with one row and one column doesn't change the current position.
656 *
657 * @param delta Add this position to current one.
658 */
659 public final void addPosition(final SourcePosition delta) {
660 addRow(delta.getRow() - 1);
661 addColumn(delta.getColumn() - 1);
662 }
663
664 /**
665 * Sets the current line number (and indirectly the position).
666 *
667 * @param row Move to this line number.
668 */
669 public final void setRow(final int row) {
670 int r = row;
671 // check if row is under lower bound
672 if (r <= 0) {
673 r = 1;
674 }
675 // check if already at wanted position
676 if (getRow() == r) {
677 return;
678 }
679 // check if already at end of file
680 if (getPosition() >= source.length() && getRow() >= r) {
681 return;
682 }
683 if (getRow() > r) {
684 // reset to begin of file
685 this.position = 0;
686 this.lineNumber = 0;
687 this.column = 0;
688 }
689 for (int i = 0; getRow() < r; i++) {
690 if (EOF == read()) {
691 return;
692 }
693 }
694 }
695
696 /**
697 * Get given byte position as {@link SourcePosition}.
698 *
699 * @param find Get row and column information for this byte position.
700 * @return Row and column information.
701 */
702 public final SourcePosition getPosition(final int find) {
703 int r = 0;
704 int c = 0;
705 int i = 0;
706 while (i < source.length() && i < find) {
707 if (CR == source.charAt(i)) {
708 r++;
709 c = 0;
710 } else {
711 c++;
712 }
713 i++;
714 }
715 return new SourcePosition(r + 1, c + 1);
716 }
717
718 /**
719 * Get given byte position as {@link SourcePosition}.
720 *
721 * @param position Get row and column information for this byte position.
722 * @return Row and column information.
723 */
724 public final int getPosition(final SourcePosition position) {
725 int find = 0;
726 int r = 0;
727 while (++r < position.getRow() && -1 < (find = source.indexOf("" + CR, find))) {
728 // nothing to do
729 }
730 if (find < 0) {
731 find = source.length();
732 }
733 find += position.getColumn();
734 if (find > source.length()) {
735 find = source.length();
736 }
737 return find;
738 }
739
740 /**
741 * Get source area as string.
742 *
743 * @param area Get this area as string.
744 * @return Area itself.
745 */
746 public final String getSourceArea(final SourceArea area) {
747 return source.substring(getPosition(area.getStartPosition()),
748 getPosition(area.getEndPosition()));
749 }
750
751 /**
752 * Add the following rows and reset column (if <code>number == 0</code>).
753 *
754 * @param number Add this number of rows.
755 */
756 public final void addRow(final int number) {
757 setRow(getRow() + number);
758 }
759
760 /**
761 * Sets the current column position (and indirectly the position).
762 * If <code>column</code> is out of range the minimum value (1) or the maximum possible column
763 * value is taken.
764 *
765 * @param column Move to this column. First column has the number one.
766 */
767 public final void setColumn(final int column) {
768 int c = column;
769 // check if column is out of lower bound
770 if (c <= 0) {
771 c = 1;
772 }
773 // check if already at wanted position
774 if (getColumn() == c) {
775 return;
776 }
777 if (getColumn() > c) {
778 do {
779 this.position--;
780 this.column--;
781 } while (getColumn() > c);
782 return;
783 }
784 while (getChar() != CR && getChar() != EOF && getColumn() < c) {
785 read();
786 }
787 }
788
789 /**
790 * Add the following columns.
791 *
792 * @param number Add this number of columns.
793 */
794 public final void addColumn(final int number) {
795 setColumn(getColumn() + number);
796 }
797
798 /**
799 * Show reading position.
800 *
801 * @return current line with mark at current reading position
802 */
803 public final String showLinePosition() {
804 final String line = getLine();
805 final StringBuffer buffer = new StringBuffer();
806 final int col = getColumn() - 1;
807 if (col > 0) {
808 if (col < line.length()) {
809 buffer.append(line.substring(0, col));
810 } else {
811 buffer.append(line);
812 }
813 }
814 buffer.append(MARKER);
815 if (col < line.length()) {
816 buffer.append(line.substring(col));
817 }
818 return buffer.toString();
819 }
820
821 // LATER mime 20050608: remove if no use
822 /*
823 public final int findCaretPosition(final int line, final int column, final String source) {
824 if (line == 1) {
825 return 0;
826 }
827 int k = 1;
828 for (int j = 0; j < source.length(); j++) {
829 if (source.charAt(j) == '\n') {
830 k++;
831 }
832 if (k == line) {
833 j += column - 1;
834 if (j > source.length()) {
835 j = source.length();
836 }
837 return j;
838 }
839 }
840 return 0;
841 }
842 */
843
844 }
|