001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.kernel.bo.parser;
017
018 import java.util.ArrayList;
019 import java.util.List;
020
021 import org.qedeq.base.io.TextInput;
022 import org.qedeq.base.trace.Trace;
023
024 /**
025 * Parse term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s.
026 *
027 * @author Michael Meyling
028 */
029 public abstract class MathParser {
030
031 /** This class. */
032 private static final Class CLASS = MathParser.class;
033
034 /** Input source to parse. */
035 private MementoTextInput input;
036
037 /** List of operators. */
038 private List operators;
039
040 /**
041 * Constructor.
042 */
043 public MathParser() {
044 // nothing to do
045 }
046
047 /**
048 * Set input source and defined parameters.
049 *
050 * @param input Input source to parse.
051 * @param operators Operator list.
052 */
053 public void setParameters(final MementoTextInput input, final List operators) {
054 this.input = input;
055 this.operators = operators;
056 }
057
058 /**
059 * Set input source and defined parameters.
060 *
061 * @param input Input source to parse.
062 * @param operators Operator list.
063 */
064 public void setParameters(final TextInput input, final List operators) {
065 setParameters(new MementoTextInput(input), operators);
066 }
067
068 /**
069 * Set input source and defined parameters.
070 *
071 * @param input Input source to parse.
072 * @param operators Operator list.
073 */
074 public void setParameters(final StringBuffer input, final List operators) {
075 setParameters(new TextInput(input), operators);
076 }
077
078 /**
079 * Set input source and defined parameters.
080 *
081 * @param input Input source to parse.
082 * @param operators Operator list.
083 */
084 public void setParameters(final String input, final List operators) {
085 setParameters(new TextInput(input), operators);
086 }
087
088 protected final List getOperators() {
089 return operators;
090 }
091
092 /**
093 * Reads (maximal possible) Term from input.
094 *
095 * @return Read term.
096 * @throws ParserException Parsing failed.
097 */
098 public final Term readTerm() throws ParserException {
099 final String method = "Term readTerm()";
100 Trace.begin(CLASS, this, method);
101 try {
102 final Term term = readMaximalTerm(0);
103 if (eot(getToken())) {
104 readToken();
105 }
106 return term;
107 } finally {
108 Trace.end(CLASS, this, method);
109 }
110 }
111
112 /**
113 * Reads next "maximal" term from input. The following input doesn't make the the term more
114 * complete. Respects the priority of coming operators by comparing it
115 * to the given value.
116 *
117 * @param priority next formula will be the last, if
118 * connecting operator has higher priority
119 * @return Read term.
120 * @throws ParserException Parsing failed.
121 */
122 private final Term readMaximalTerm(final int priority) throws ParserException {
123 final String method = "readMaximalTerm(int)";
124 Trace.begin(CLASS, this, method);
125 Term term = null;
126 try {
127 if (eot(getToken())) {
128 Trace.param(CLASS, this, method, "return term", "null");
129 return null;
130 }
131 term = readPrefixTerm();
132 term = addNextInfixTerms(priority, term);
133 Trace.param(CLASS, this, method,
134 "return term", (term != null ? term.getQedeq() : "null"));
135 return term;
136 } finally {
137 Trace.end(CLASS, this, method);
138 }
139 }
140
141 private Term addNextInfixTerms(final int priority, final Term initialTerm)
142 throws ParserException {
143 final String method = "Term addNextInfixTerms(int, Term)";
144 Trace.begin(CLASS, this, method);
145 Term term = initialTerm;
146 try {
147 Operator newOperator = null;
148 Operator oldOperator = null;
149
150 do {
151 markPosition();
152 newOperator = readOperator(); // we expect an unique infix operator
153 Trace.param(CLASS, this, method,
154 "newOperator", (newOperator != null ? newOperator.getQedeq() : "null"));
155 if (newOperator == null || newOperator.getPriority() <= priority) {
156 Trace.trace(CLASS, this, method, "newOperator is null or of less priority");
157 rewindPosition();
158 Trace.param(CLASS, this, method,
159 "read term", (term != null ? term.getQedeq() : "null"));
160 return term;
161 }
162 if (newOperator.isPrefix()) {
163 Trace.trace(CLASS, this, method, "newOperator is prefix");
164 // TODO mime 20060313: try to read further arguments
165 rewindPosition();
166 Trace.param(CLASS, this, method,
167 "read term", (term != null ? term.getQedeq() : "null"));
168 return term;
169 }
170 if (newOperator.isPostfix()) {
171 rewindPosition();
172 throw new IllegalArgumentException("Postfix Operators not yet supported");
173 }
174 clearMark();
175 if (oldOperator == null || oldOperator.getPriority() >= newOperator.getPriority()) {
176 Trace.trace(CLASS, this, method,
177 "oldOperator is null or has more priority than new");
178 Term term2 = readMaximalTerm(newOperator.getPriority());
179 if (term2 == null) {
180 // TODO mime 20060313: 2 could be false if newOperator == oldOperator
181 throw new TooFewArgumentsException(getPosition(), 2);
182 }
183 if (oldOperator == newOperator) {
184 if (oldOperator.getMax() != -1 && term.size() + 1 >= oldOperator.getMax()) {
185 throw new TooMuchArgumentsException(getPosition(), oldOperator,
186 oldOperator.getMax());
187 }
188 Trace.trace(CLASS, this, method, "new term is added to old term");
189 term.addArgument(term2);
190 } else {
191 // old term is first argument of new operator
192 Trace.trace(CLASS, this, method,
193 "old term is first argument of new operator");
194 term = new Term(newOperator, term);
195 term.addArgument(term2);
196 }
197 } else {
198 Trace.trace(CLASS, this,
199 method, "oldOperator is not null or has less priority than new");
200 Term term2 = readMaximalTerm(newOperator.getPriority());
201 if (term2 == null) {
202 // TODO mime 20060313: 2 could be false if newOperator == oldOperator
203 throw new TooFewArgumentsException(getPosition(), 2);
204 }
205 term = new Term(newOperator, term);
206 term.addArgument(term2);
207 }
208 oldOperator = newOperator;
209 } while (true);
210 } finally {
211 Trace.end(CLASS, this, method);
212 }
213 }
214
215 /**
216 * Read next following term. This is a complete term but some infix operator
217 * or some terms for an infix operator might follow.
218 *
219 * @return Read term.
220 * @throws ParserException Parsing failed.
221 */
222 private final Term readPrefixTerm() throws ParserException {
223 final String method = "readPrefixTerm()";
224 Trace.begin(CLASS, this, method);
225 Term term = null;
226 try {
227 final List readOperators = readOperators(); // there might be several prefix operators
228 if (readOperators != null && readOperators.size() > 0) {
229 Trace.trace(CLASS, this, method, "operators found");
230 term = readPrefixOperator(readOperators);
231 } else { // no operator found
232 Trace.trace(CLASS, this, method, "no operators found");
233 final String token;
234 token = getToken();
235 if (token == null) {
236 Trace.param(CLASS, this, method, "read term", "null");
237 return null;
238 }
239 if ("(".equals(token)) {
240 readToken();
241 Trace.trace(CLASS, this, method, "start bracket found: " + token);
242 term = readMaximalTerm(0);
243 final String lastToken = readToken();
244 if (!")".equals(lastToken)) {
245 throw new ClosingBracketMissingException(getPosition(), "(", lastToken);
246 }
247
248 } else if ("[".equals(token)) {
249 readToken();
250 Trace.trace(CLASS, this, method, "start bracket found: " + token);
251 term = readMaximalTerm(0);
252 final String lastToken = readToken();
253 if (!"]".equals(lastToken)) {
254 throw new ClosingBracketMissingException(getPosition(), "[", lastToken);
255 }
256 } else {
257 readToken();
258 Trace.param(CLASS, this, method, "atom", token);
259 term = new Term(new TermAtom(token));
260 }
261 }
262 Trace.param(CLASS, this, method,
263 "read term", (term != null ? term.getQedeq() : "null"));
264 return term;
265 } finally {
266 Trace.end(CLASS, this, method);
267 }
268 }
269
270 /**
271 * Try to parse an prefix operator and its operands from the input. Tries first operator,
272 * second operator and so on. If the last one fails an appropriate exception is thrown.
273 *
274 * @param operators Prefix operator list.
275 * @return Resulting term.
276 * @throws ParserException Parsing failed.
277 */
278 private Term readPrefixOperator(final List operators) throws ParserException {
279 Term term = null;
280 markPosition();
281 for (int number = 0; number < operators.size(); number++) {
282 rewindPosition();
283 markPosition();
284 Operator operator = (Operator) operators.get(number);
285
286 if (!operator.isPrefix()) {
287 clearMark();
288 throw new UnexpectedOperatorException(getPosition(), operator);
289 }
290
291 term = new Term(operator);
292
293 if (operator.isFunction()) {
294 // constants should have no argument list
295 if (operator.getMax() == 0) {
296 break;
297 }
298 List list = readTupel();
299 if (list == null) { // here we don't distinguish between "a()" and "a"
300 list = new ArrayList();
301 }
302 // doesn't have enough arguments
303 if (list.size() < operator.getMin()) {
304 if (number + 1 < operators.size()) {
305 continue; // try again
306 }
307 clearMark();
308 throw new TooFewArgumentsException(getPosition(),
309 operator.getMin());
310 }
311 // has to much arguments
312 if (operator.getMax() != -1 && list.size() > operator.getMax()) {
313 if (number + 1 < operators.size()) {
314 continue; // try again
315 }
316 clearMark();
317 throw new TooMuchArgumentsException(getPosition(), operator,
318 operator.getMax());
319 }
320 for (int i = 0; i < list.size(); i++) {
321 term.addArgument((Term) list.get(i));
322 }
323 break;
324 }
325
326 int i = 0;
327 while (i < operator.getMin()) {
328 if (i > 0 && operator.getSeparatorSymbol() != null) {
329 final String separator = getToken();
330 if (!operator.getSeparatorSymbol().equals(separator)) {
331 if (number + 1 < operators.size()) {
332 continue;
333 }
334 clearMark();
335 throw new SeparatorNotFoundException(getPosition(),
336 operator.getSeparatorSymbol());
337 }
338 readToken();
339 }
340 final Term add = readMaximalTerm(operator.getPriority());
341 if (add == null) {
342 if (number + 1 < operators.size()) {
343 continue;
344 }
345 clearMark();
346 throw new TooFewArgumentsException(getPosition(), operator.getMin());
347 }
348 term.addArgument(add);
349 i++;
350 }
351 while (operator.getMax() == -1 || i < operator.getMax()) {
352 if (i > 0 && operator.getSeparatorSymbol() != null) {
353 final String separator = getToken();
354 if (!operator.getSeparatorSymbol().equals(separator)) {
355 break;
356 }
357 readToken();
358 }
359 if (operator.getEndSymbol() != null) {
360 final String end = getToken();
361 if (operator.getEndSymbol().equals(end)) {
362 break;
363 }
364 }
365 Term add = null;
366 markPosition();
367 try {
368 add = readMaximalTerm(operator.getPriority());
369 clearMark();
370 } catch (Exception e) {
371 rewindPosition();
372 }
373 if (add == null) {
374 break;
375 }
376 term.addArgument(add);
377 i++;
378 }
379 if (operator.getEndSymbol() != null) {
380 final String end = getToken();
381 if (!operator.getEndSymbol().equals(end)) {
382 if (number + 1 < operators.size()) {
383 continue;
384 }
385 clearMark();
386 throw new EndSymbolNotFoundException(getPosition(),
387 operator.getEndSymbol());
388 }
389 readToken();
390 }
391 break;
392 }
393 clearMark();
394 return term;
395 }
396
397 /**
398 * Read n-tupel. This is a list of terms encapsulated by "(" and ")" and the terms are separated
399 * by ",".
400 *
401 * @return List of terms. <code>null</code> if no bracket followed.
402 * @throws ParserException Parsing failed.
403 */
404 private final List readTupel() throws ParserException {
405 final String method = "List readTupel()";
406 Trace.begin(CLASS, this, method);
407 try {
408 final String firstToken;
409 firstToken = getToken();
410 if (!"(".equals(firstToken)) {
411 Trace.trace(CLASS, this, method, "no start bracket found");
412 return null;
413 }
414 readToken(); // read "("
415 List list = new ArrayList();
416 Term term = null;
417 while (null != (term = readMaximalTerm(0))) {
418 list.add(term);
419 final String separatorToken = getToken();
420 if (!",".equals(separatorToken)) {
421 break;
422 }
423 readToken(); // read ","
424 }
425 final String lastToken = readToken();
426 if (!")".equals(lastToken)) {
427 throw new ClosingBracketMissingException(getPosition(), ")", lastToken);
428 }
429 return list;
430 } finally {
431 Trace.end(CLASS, this, method);
432 }
433 }
434
435 /**
436 * Read next operator from input.
437 *
438 * @return Found operator, maybe <code>null</code>.
439 */
440 private final Operator readOperator() {
441 final String method = "Operator readOperator()";
442 Trace.begin(CLASS, this, method);
443
444 try {
445 markPosition();
446 final String token;
447 token = readToken();
448 if (token == null) {
449 rewindPosition();
450 Trace.trace(CLASS, this, method, "no operator found");
451 return null;
452 }
453 final Operator operator = getOperator(token);
454 if (operator == null) {
455 rewindPosition();
456 Trace.trace(CLASS, this, method, "no operator found");
457 return null;
458 }
459 clearMark();
460 Trace.param(CLASS, this, method, "operator", operator.getQedeq());
461 return operator;
462 } finally {
463 Trace.end(CLASS, this, method);
464 }
465 }
466
467 /**
468 * Read next operators from input. Because the token that identifies an operator might be not
469 * unique we could get multiple operators that start with that token. So this method reads
470 * the operator token (if any) and returns a list of operators that start with that token.
471 *
472 * @return Found operators, maybe <code>null</code> if no matching found..
473 */
474 private final List readOperators() {
475 final String method = "List readOperators()";
476 Trace.begin(CLASS, this, method);
477
478 try {
479 markPosition();
480 final String token;
481 token = readToken();
482 if (token == null) {
483 rewindPosition();
484 Trace.trace(CLASS, this, method, "no operators found");
485 return null;
486 }
487 final List ops = getOperators(token);
488 if (ops == null || ops.size() == 0) {
489 rewindPosition();
490 Trace.trace(CLASS, this, method, "no operators found");
491 return null;
492 }
493 clearMark();
494 for (int i = 0; i < ops.size(); i++) {
495 Trace.param(CLASS, this, method, "operator[" + i + "]", ops.get(i));
496 }
497 return ops;
498 } finally {
499 Trace.end(CLASS, this, method);
500 }
501 }
502
503 /**
504 * Get an operator for that token. If there are more than one possibilities the first matching
505 * is returned.
506 *
507 * @param token Get an operator for this token.
508 * @return Operator. Might be <code>null</code>.
509 */
510 protected abstract Operator getOperator(String token);
511
512 /**
513 * Get operators for that token. If there are more than one possibilities all matching are
514 * returned.
515 *
516 * @param token Get operators for this token.
517 * @return Operators. Might be <code>null</code>.
518 */
519 protected abstract List getOperators(String token);
520
521 /**
522 * Read next token from input and move reading position.
523 * A token is a recognized character sequence. A token is no
524 * elementary whitespace. Also a dosn't start or end with
525 * elementary whitespace.
526 *
527 * @return Token read, is <code>null</code> if end of data reached.
528 */
529 protected abstract String readToken();
530
531 /**
532 * Read next token from input but don't move reading position.
533 * A token is a recognised character sequence. A token is no
534 * elementary whitespace. Also a dosn't start or end with
535 * elementary whitespace.
536 *
537 * @return Token read, is <code>null</code> if end of data reached.
538 */
539 public final String getToken() {
540 markPosition();
541 final String result = readToken();
542 rewindPosition();
543 return result;
544 }
545
546 /**
547 * Remember current position.
548 */
549 protected final void markPosition() {
550 input.markPosition();
551 }
552
553 /**
554 * Rewind to previous marked position. Also clears the mark.
555 *
556 * @return Current position before pop.
557 */
558 protected final long rewindPosition() {
559 return input.rewindPosition();
560 }
561
562 /**
563 * Forget last remembered position.
564 */
565 protected final void clearMark() {
566 input.clearMark();
567 }
568
569 /**
570 * Get byte position.
571 *
572 * @return Position.
573 */
574 private long getPosition() {
575 return input.getPosition();
576 }
577
578 /**
579 * Reads a single character and does not change the reading
580 * position.
581 *
582 * @return character read, if there are no more chars
583 * <code>Character.MAX_VALUE</code> is returned
584 */
585 protected final int getChar() {
586 return input.getChar();
587 }
588
589 /**
590 * Reads a single character and increments the reading position
591 * by one.
592 *
593 * @return character read, if there are no more chars
594 * <code>Character.MAX_VALUE</code> is returned
595 */
596 protected final int readChar() {
597 return input.read();
598 }
599
600 /**
601 * Is this an end of term token?
602 *
603 * @param token Check this token.
604 * @return Is this an end of term token.
605 */
606 protected abstract boolean eot(String token);
607
608 /**
609 * Are there still any characters to read?
610 *
611 * @return Anything left for reading further?
612 */
613 public final boolean eof() {
614 return input.isEmpty();
615 }
616
617 /**
618 * Get rewind stack size.
619 *
620 * @return Rewind stack size.
621 */
622 public final int getRewindStackSize() {
623 return input.getRewindStackSize();
624 }
625
626 }
|