001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.kernel.bo.parser;
017
018 import java.util.ArrayList;
019 import java.util.List;
020
021 import org.qedeq.base.io.TextInput;
022 import org.qedeq.base.trace.Trace;
023
024 /**
025 * Parse term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s.
026 *
027 * @author Michael Meyling
028 */
029 public abstract class MathParser {
030
031 /** This class. */
032 private static final Class CLASS = MathParser.class;
033
034 /** Input source to parse. */
035 private MementoTextInput input;
036
037 /** List of operators. */
038 private List operators;
039
040 /**
041 * Constructor.
042 */
043 public MathParser() {
044 }
045
046 /**
047 * Set input source and defined parameters.
048 *
049 * @param input Input source to parse.
050 * @param operators Operator list.
051 */
052 public void setParameters(final MementoTextInput input, final List operators) {
053 this.input = input;
054 this.operators = operators;
055 }
056
057 /**
058 * Set input source and defined parameters.
059 *
060 * @param input Input source to parse.
061 * @param operators Operator list.
062 */
063 public void setParameters(final TextInput input, final List operators) {
064 setParameters(new MementoTextInput(input), operators);
065 }
066
067 /**
068 * Set input source and defined parameters.
069 *
070 * @param input Input source to parse.
071 * @param operators Operator list.
072 */
073 public void setParameters(final StringBuffer input, final List operators) {
074 setParameters(new TextInput(input), operators);
075 }
076
077 /**
078 * Set input source and defined parameters.
079 *
080 * @param input Input source to parse.
081 * @param operators Operator list.
082 */
083 public void setParameters(final String input, final List operators) {
084 setParameters(new TextInput(input), operators);
085 }
086
087 protected final List getOperators() {
088 return operators;
089 }
090
091 /**
092 * Reads (maximal possible) Term from input.
093 *
094 * @return Read term.
095 * @throws ParserException Parsing failed.
096 */
097 public final Term readTerm() throws ParserException {
098 final String method = "Term readTerm()";
099 Trace.begin(CLASS, this, method);
100 try {
101 final Term term = readMaximalTerm(0);
102 if (eot(getToken())) {
103 readToken();
104 }
105 return term;
106 } finally {
107 Trace.end(CLASS, this, method);
108 }
109 }
110
111 /**
112 * Reads next "maximal" term from input. The following input doesn't make the the term more
113 * complete. Respects the priority of coming operators by comparing it
114 * to the given value.
115 *
116 * @param priority next formula will be the last, if
117 * connecting operator has higher priority
118 * @return Read term.
119 * @throws ParserException Parsing failed.
120 */
121 private final Term readMaximalTerm(final int priority) throws ParserException {
122 final String method = "readMaximalTerm(int)";
123 Trace.begin(CLASS, this, method);
124 Term term = null;
125 try {
126 if (eot(getToken())) {
127 Trace.param(CLASS, this, method, "return term", "null");
128 return null;
129 }
130 term = readPrefixTerm();
131 term = addNextInfixTerms(priority, term);
132 Trace.param(CLASS, this, method,
133 "return term", (term != null ? term.getQedeq() : "null"));
134 return term;
135 } finally {
136 Trace.end(CLASS, this, method);
137 }
138 }
139
140 private Term addNextInfixTerms(final int priority, final Term initialTerm)
141 throws ParserException {
142 final String method = "Term addNextInfixTerms(int, Term)";
143 Trace.begin(CLASS, this, method);
144 Term term = initialTerm;
145 try {
146 Operator newOperator = null;
147 Operator oldOperator = null;
148
149 do {
150 markPosition();
151 newOperator = readOperator(); // we expect an unique infix operator
152 Trace.param(CLASS, this, method,
153 "newOperator", (newOperator != null ? newOperator.getQedeq() : "null"));
154 if (newOperator == null || newOperator.getPriority() <= priority) {
155 Trace.trace(CLASS, this, method, "newOperator is null or of less priority");
156 rewindPosition();
157 Trace.param(CLASS, this, method,
158 "read term", (term != null ? term.getQedeq() : "null"));
159 return term;
160 }
161 if (newOperator.isPrefix()) {
162 Trace.trace(CLASS, this, method, "newOperator is prefix");
163 // TODO mime 20060313: try to read further arguments
164 rewindPosition();
165 Trace.param(CLASS, this, method,
166 "read term", (term != null ? term.getQedeq() : "null"));
167 return term;
168 }
169 if (newOperator.isPostfix()) {
170 rewindPosition();
171 throw new IllegalArgumentException("Postfix Operators not yet supported");
172 }
173 clearMark();
174 if (oldOperator == null || oldOperator.getPriority() >= newOperator.getPriority()) {
175 Trace.trace(CLASS, this, method,
176 "oldOperator is null or has more priority than new");
177 Term term2 = readMaximalTerm(newOperator.getPriority());
178 if (term2 == null) {
179 // TODO mime 20060313: 2 could be false if newOperator == oldOperator
180 throw new TooFewArgumentsException(getPosition(), 2);
181 }
182 if (oldOperator == newOperator) {
183 if (oldOperator.getMax() != -1 && term.size() + 1 >= oldOperator.getMax()) {
184 throw new TooMuchArgumentsException(getPosition(), oldOperator,
185 oldOperator.getMax());
186 }
187 Trace.trace(CLASS, this, method, "new term is added to old term");
188 term.addArgument(term2);
189 } else {
190 // old term is first argument of new operator
191 Trace.trace(CLASS, this, method,
192 "old term is first argument of new operator");
193 term = new Term(newOperator, term);
194 term.addArgument(term2);
195 }
196 } else {
197 Trace.trace(CLASS, this,
198 method, "oldOperator is not null or has less priority than new");
199 Term term2 = readMaximalTerm(newOperator.getPriority());
200 if (term2 == null) {
201 // TODO mime 20060313: 2 could be false if newOperator == oldOperator
202 throw new TooFewArgumentsException(getPosition(), 2);
203 }
204 term = new Term(newOperator, term);
205 term.addArgument(term2);
206 }
207 oldOperator = newOperator;
208 } while (true);
209 } finally {
210 Trace.end(CLASS, this, method);
211 }
212 }
213
214 /**
215 * Read next following term. This is a complete term but some infix operator
216 * or some terms for an infix operator might follow.
217 *
218 * @return Read term.
219 * @throws ParserException Parsing failed.
220 */
221 private final Term readPrefixTerm() throws ParserException {
222 final String method = "readPrefixTerm()";
223 Trace.begin(CLASS, this, method);
224 Term term = null;
225 try {
226 final List readOperators = readOperators(); // there might be several prefix operators
227 if (readOperators != null && readOperators.size() > 0) {
228 Trace.trace(CLASS, this, method, "operators found");
229 term = readPrefixOperator(readOperators);
230 } else { // no operator found
231 Trace.trace(CLASS, this, method, "no operators found");
232 final String token;
233 token = getToken();
234 if (token == null) {
235 Trace.param(CLASS, this, method, "read term", "null");
236 return null;
237 }
238 if ("(".equals(token)) {
239 readToken();
240 Trace.trace(CLASS, this, method, "start bracket found: " + token);
241 term = readMaximalTerm(0);
242 final String lastToken = readToken();
243 if (!")".equals(lastToken)) {
244 throw new ClosingBracketMissingException(getPosition(), "(", lastToken);
245 }
246
247 } else if ("[".equals(token)) {
248 readToken();
249 Trace.trace(CLASS, this, method, "start bracket found: " + token);
250 term = readMaximalTerm(0);
251 final String lastToken = readToken();
252 if (!"]".equals(lastToken)) {
253 throw new ClosingBracketMissingException(getPosition(), "[", lastToken);
254 }
255 } else {
256 readToken();
257 Trace.param(CLASS, this, method, "atom", token);
258 term = new Term(new TermAtom(token));
259 }
260 }
261 Trace.param(CLASS, this, method,
262 "read term", (term != null ? term.getQedeq() : "null"));
263 return term;
264 } finally {
265 Trace.end(CLASS, this, method);
266 }
267 }
268
269 /**
270 * Try to parse an prefix operator and its operands from the input. Tries first operator,
271 * second operator and so on. If the last one fails an appropriate exception is thrown.
272 *
273 * @param operators Prefix operator list.
274 * @return Resulting term.
275 * @throws ParserException Parsing failed.
276 */
277 private Term readPrefixOperator(final List operators) throws ParserException {
278 Term term = null;
279 markPosition();
280 for (int number = 0; number < operators.size(); number++) {
281 rewindPosition();
282 markPosition();
283 Operator operator = (Operator) operators.get(number);
284
285 if (!operator.isPrefix()) {
286 clearMark();
287 throw new UnexpectedOperatorException(getPosition(), operator);
288 }
289
290 term = new Term(operator);
291
292 if (operator.isFunction()) {
293 // constants should have no argument list
294 if (operator.getMax() == 0) {
295 break;
296 }
297 List list = readTupel();
298 if (list == null) { // here we don't distinguish between "a()" and "a"
299 list = new ArrayList();
300 }
301 // doesn't have enough arguments
302 if (list.size() < operator.getMin()) {
303 if (number + 1 < operators.size()) {
304 continue; // try again
305 }
306 clearMark();
307 throw new TooFewArgumentsException(getPosition(),
308 operator.getMin());
309 }
310 // has to much arguments
311 if (operator.getMax() != -1 && list.size() > operator.getMax()) {
312 if (number + 1 < operators.size()) {
313 continue; // try again
314 }
315 clearMark();
316 throw new TooMuchArgumentsException(getPosition(), operator,
317 operator.getMax());
318 }
319 for (int i = 0; i < list.size(); i++) {
320 term.addArgument((Term) list.get(i));
321 }
322 break;
323 }
324
325 int i = 0;
326 while (i < operator.getMin()) {
327 if (i > 0 && operator.getSeparatorSymbol() != null) {
328 final String separator = getToken();
329 if (!operator.getSeparatorSymbol().equals(separator)) {
330 if (number + 1 < operators.size()) {
331 continue;
332 }
333 clearMark();
334 throw new SeparatorNotFoundException(getPosition(),
335 operator.getSeparatorSymbol());
336 }
337 readToken();
338 }
339 final Term add = readMaximalTerm(operator.getPriority());
340 if (add == null) {
341 if (number + 1 < operators.size()) {
342 continue;
343 }
344 clearMark();
345 throw new TooFewArgumentsException(getPosition(), operator.getMin());
346 }
347 term.addArgument(add);
348 i++;
349 }
350 while (operator.getMax() == -1 || i < operator.getMax()) {
351 if (i > 0 && operator.getSeparatorSymbol() != null) {
352 final String separator = getToken();
353 if (!operator.getSeparatorSymbol().equals(separator)) {
354 break;
355 }
356 readToken();
357 }
358 if (operator.getEndSymbol() != null) {
359 final String end = getToken();
360 if (operator.getEndSymbol().equals(end)) {
361 break;
362 }
363 }
364 Term add = null;
365 markPosition();
366 try {
367 add = readMaximalTerm(operator.getPriority());
368 clearMark();
369 } catch (Exception e) {
370 rewindPosition();
371 }
372 if (add == null) {
373 break;
374 }
375 term.addArgument(add);
376 i++;
377 }
378 if (operator.getEndSymbol() != null) {
379 final String end = getToken();
380 if (!operator.getEndSymbol().equals(end)) {
381 if (number + 1 < operators.size()) {
382 continue;
383 }
384 clearMark();
385 throw new EndSymbolNotFoundException(getPosition(),
386 operator.getEndSymbol());
387 }
388 readToken();
389 }
390 break;
391 }
392 clearMark();
393 return term;
394 }
395
396 /**
397 * Read n-tupel. This is a list of terms encapsulated by "(" and ")" and the terms are separated
398 * by ",".
399 *
400 * @return List of terms. <code>null</code> if no bracket followed.
401 * @throws ParserException Parsing failed.
402 */
403 private final List readTupel() throws ParserException {
404 final String method = "List readTupel()";
405 Trace.begin(CLASS, this, method);
406 try {
407 final String firstToken;
408 firstToken = getToken();
409 if (!"(".equals(firstToken)) {
410 Trace.trace(CLASS, this, method, "no start bracket found");
411 return null;
412 }
413 readToken(); // read "("
414 List list = new ArrayList();
415 Term term = null;
416 while (null != (term = readMaximalTerm(0))) {
417 list.add(term);
418 final String separatorToken = getToken();
419 if (!",".equals(separatorToken)) {
420 break;
421 }
422 readToken(); // read ","
423 }
424 final String lastToken = readToken();
425 if (!")".equals(lastToken)) {
426 throw new ClosingBracketMissingException(getPosition(), ")", lastToken);
427 }
428 return list;
429 } finally {
430 Trace.end(CLASS, this, method);
431 }
432 }
433
434 /**
435 * Read next operator from input.
436 *
437 * @return Found operator, maybe <code>null</code>.
438 */
439 private final Operator readOperator() {
440 final String method = "Operator readOperator()";
441 Trace.begin(CLASS, this, method);
442
443 try {
444 markPosition();
445 final String token;
446 token = readToken();
447 if (token == null) {
448 rewindPosition();
449 Trace.trace(CLASS, this, method, "no operator found");
450 return null;
451 }
452 final Operator operator = getOperator(token);
453 if (operator == null) {
454 rewindPosition();
455 Trace.trace(CLASS, this, method, "no operator found");
456 return null;
457 }
458 clearMark();
459 Trace.param(CLASS, this, method, "operator", operator.getQedeq());
460 return operator;
461 } finally {
462 Trace.end(CLASS, this, method);
463 }
464 }
465
466 /**
467 * Read next operators from input. Because the token that identifies an operator might be not
468 * unique we could get multiple operators that start with that token. So this method reads
469 * the operator token (if any) and returns a list of operators that start with that token.
470 *
471 * @return Found operators, maybe <code>null</code> if no matching found..
472 */
473 private final List readOperators() {
474 final String method = "List readOperators()";
475 Trace.begin(CLASS, this, method);
476
477 try {
478 markPosition();
479 final String token;
480 token = readToken();
481 if (token == null) {
482 rewindPosition();
483 Trace.trace(CLASS, this, method, "no operators found");
484 return null;
485 }
486 final List ops = getOperators(token);
487 if (ops == null || ops.size() == 0) {
488 rewindPosition();
489 Trace.trace(CLASS, this, method, "no operators found");
490 return null;
491 }
492 clearMark();
493 for (int i = 0; i < ops.size(); i++) {
494 Trace.param(CLASS, this, method, "operator[" + i + "]", ops.get(i));
495 }
496 return ops;
497 } finally {
498 Trace.end(CLASS, this, method);
499 }
500 }
501
502 /**
503 * Get an operator for that token. If there are more than one possibilities the first matching
504 * is returned.
505 *
506 * @param token Get an operator for this token.
507 * @return Operator. Might be <code>null</code>.
508 */
509 protected abstract Operator getOperator(String token);
510
511 /**
512 * Get operators for that token. If there are more than one possibilities all matching are
513 * returned.
514 *
515 * @param token Get operators for this token.
516 * @return Operators. Might be <code>null</code>.
517 */
518 protected abstract List getOperators(String token);
519
520 /**
521 * Read next token from input and move reading position.
522 * A token is a recognized character sequence. A token is no
523 * elementary whitespace. Also a dosn't start or end with
524 * elementary whitespace.
525 *
526 * @return Token read, is <code>null</code> if end of data reached.
527 */
528 protected abstract String readToken();
529
530 /**
531 * Read next token from input but don't move reading position.
532 * A token is a recognised character sequence. A token is no
533 * elementary whitespace. Also a dosn't start or end with
534 * elementary whitespace.
535 *
536 * @return Token read, is <code>null</code> if end of data reached.
537 */
538 public final String getToken() {
539 markPosition();
540 final String result = readToken();
541 rewindPosition();
542 return result;
543 }
544
545 /**
546 * Remember current position.
547 */
548 protected final void markPosition() {
549 input.markPosition();
550 }
551
552 /**
553 * Rewind to previous marked position. Also clears the mark.
554 *
555 * @return Current position before pop.
556 */
557 protected final long rewindPosition() {
558 return input.rewindPosition();
559 }
560
561 /**
562 * Forget last remembered position.
563 */
564 protected final void clearMark() {
565 input.clearMark();
566 }
567
568 /**
569 * Get byte position.
570 *
571 * @return Position.
572 */
573 private long getPosition() {
574 return input.getPosition();
575 }
576
577 /**
578 * Reads a single character and does not change the reading
579 * position.
580 *
581 * @return character read, if there are no more chars
582 * <code>Character.MAX_VALUE</code> is returned
583 */
584 protected final int getChar() {
585 return input.getChar();
586 }
587
588 /**
589 * Reads a single character and increments the reading position
590 * by one.
591 *
592 * @return character read, if there are no more chars
593 * <code>Character.MAX_VALUE</code> is returned
594 */
595 protected final int readChar() {
596 return input.read();
597 }
598
599 /**
600 * Is this an end of term token?
601 *
602 * @param token Check this token.
603 * @return Is this an end of term token.
604 */
605 protected abstract boolean eot(String token);
606
607 /**
608 * Are there still any characters to read?
609 *
610 * @return Anything left for reading further?
611 */
612 public final boolean eof() {
613 return input.isEmpty();
614 }
615
616 /**
617 * Get rewind stack size.
618 *
619 * @return Rewind stack size.
620 */
621 public final int getRewindStackSize() {
622 return input.getRewindStackSize();
623 }
624
625 }
|