1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
2 | * |
3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
4 | * |
5 | * "Hilbert II" is free software; you can redistribute |
6 | * it and/or modify it under the terms of the GNU General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | */ |
15 | |
16 | package org.qedeq.kernel.bo.parser; |
17 | |
18 | import java.util.ArrayList; |
19 | import java.util.List; |
20 | |
21 | import org.qedeq.base.io.TextInput; |
22 | import org.qedeq.base.trace.Trace; |
23 | |
24 | /** |
25 | * Parse term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s. |
26 | * |
27 | * @author Michael Meyling |
28 | */ |
29 | public abstract class MathParser { |
30 | |
31 | /** This class. */ |
32 | private static final Class CLASS = MathParser.class; |
33 | |
34 | /** Input source to parse. */ |
35 | private MementoTextInput input; |
36 | |
37 | /** List of operators. */ |
38 | private List operators; |
39 | |
40 | /** |
41 | * Constructor. |
42 | */ |
43 | public MathParser() { |
44 | // nothing to do |
45 | } |
46 | |
47 | /** |
48 | * Set input source and defined parameters. |
49 | * |
50 | * @param input Input source to parse. |
51 | * @param operators Operator list. |
52 | */ |
53 | public void setParameters(final MementoTextInput input, final List operators) { |
54 | this.input = input; |
55 | this.operators = operators; |
56 | } |
57 | |
58 | /** |
59 | * Set input source and defined parameters. |
60 | * |
61 | * @param input Input source to parse. |
62 | * @param operators Operator list. |
63 | */ |
64 | public void setParameters(final TextInput input, final List operators) { |
65 | setParameters(new MementoTextInput(input), operators); |
66 | } |
67 | |
68 | /** |
69 | * Set input source and defined parameters. |
70 | * |
71 | * @param input Input source to parse. |
72 | * @param operators Operator list. |
73 | */ |
74 | public void setParameters(final StringBuffer input, final List operators) { |
75 | setParameters(new TextInput(input), operators); |
76 | } |
77 | |
78 | /** |
79 | * Set input source and defined parameters. |
80 | * |
81 | * @param input Input source to parse. |
82 | * @param operators Operator list. |
83 | */ |
84 | public void setParameters(final String input, final List operators) { |
85 | setParameters(new TextInput(input), operators); |
86 | } |
87 | |
88 | protected final List getOperators() { |
89 | return operators; |
90 | } |
91 | |
92 | /** |
93 | * Reads (maximal possible) Term from input. |
94 | * |
95 | * @return Read term. |
96 | * @throws ParserException Parsing failed. |
97 | */ |
98 | public final Term readTerm() throws ParserException { |
99 | final String method = "Term readTerm()"; |
100 | Trace.begin(CLASS, this, method); |
101 | try { |
102 | final Term term = readMaximalTerm(0); |
103 | if (eot(getToken())) { |
104 | readToken(); |
105 | } |
106 | return term; |
107 | } finally { |
108 | Trace.end(CLASS, this, method); |
109 | } |
110 | } |
111 | |
112 | /** |
113 | * Reads next "maximal" term from input. The following input doesn't make the the term more |
114 | * complete. Respects the priority of coming operators by comparing it |
115 | * to the given value. |
116 | * |
117 | * @param priority next formula will be the last, if |
118 | * connecting operator has higher priority |
119 | * @return Read term. |
120 | * @throws ParserException Parsing failed. |
121 | */ |
122 | private final Term readMaximalTerm(final int priority) throws ParserException { |
123 | final String method = "readMaximalTerm(int)"; |
124 | Trace.begin(CLASS, this, method); |
125 | Term term = null; |
126 | try { |
127 | if (eot(getToken())) { |
128 | Trace.param(CLASS, this, method, "return term", "null"); |
129 | return null; |
130 | } |
131 | term = readPrefixTerm(); |
132 | term = addNextInfixTerms(priority, term); |
133 | Trace.param(CLASS, this, method, |
134 | "return term", (term != null ? term.getQedeq() : "null")); |
135 | return term; |
136 | } finally { |
137 | Trace.end(CLASS, this, method); |
138 | } |
139 | } |
140 | |
141 | private Term addNextInfixTerms(final int priority, final Term initialTerm) |
142 | throws ParserException { |
143 | final String method = "Term addNextInfixTerms(int, Term)"; |
144 | Trace.begin(CLASS, this, method); |
145 | Term term = initialTerm; |
146 | try { |
147 | Operator newOperator = null; |
148 | Operator oldOperator = null; |
149 | |
150 | do { |
151 | markPosition(); |
152 | newOperator = readOperator(); // we expect an unique infix operator |
153 | Trace.param(CLASS, this, method, |
154 | "newOperator", (newOperator != null ? newOperator.getQedeq() : "null")); |
155 | if (newOperator == null || newOperator.getPriority() <= priority) { |
156 | Trace.trace(CLASS, this, method, "newOperator is null or of less priority"); |
157 | rewindPosition(); |
158 | Trace.param(CLASS, this, method, |
159 | "read term", (term != null ? term.getQedeq() : "null")); |
160 | return term; |
161 | } |
162 | if (newOperator.isPrefix()) { |
163 | Trace.trace(CLASS, this, method, "newOperator is prefix"); |
164 | // TODO mime 20060313: try to read further arguments |
165 | rewindPosition(); |
166 | Trace.param(CLASS, this, method, |
167 | "read term", (term != null ? term.getQedeq() : "null")); |
168 | return term; |
169 | } |
170 | if (newOperator.isPostfix()) { |
171 | rewindPosition(); |
172 | throw new IllegalArgumentException("Postfix Operators not yet supported"); |
173 | } |
174 | clearMark(); |
175 | if (oldOperator == null || oldOperator.getPriority() >= newOperator.getPriority()) { |
176 | Trace.trace(CLASS, this, method, |
177 | "oldOperator is null or has more priority than new"); |
178 | Term term2 = readMaximalTerm(newOperator.getPriority()); |
179 | if (term2 == null) { |
180 | // TODO mime 20060313: 2 could be false if newOperator == oldOperator |
181 | throw new TooFewArgumentsException(getPosition(), 2); |
182 | } |
183 | if (oldOperator == newOperator) { |
184 | if (oldOperator.getMax() != -1 && term.size() + 1 >= oldOperator.getMax()) { |
185 | throw new TooMuchArgumentsException(getPosition(), oldOperator, |
186 | oldOperator.getMax()); |
187 | } |
188 | Trace.trace(CLASS, this, method, "new term is added to old term"); |
189 | term.addArgument(term2); |
190 | } else { |
191 | // old term is first argument of new operator |
192 | Trace.trace(CLASS, this, method, |
193 | "old term is first argument of new operator"); |
194 | term = new Term(newOperator, term); |
195 | term.addArgument(term2); |
196 | } |
197 | } else { |
198 | Trace.trace(CLASS, this, |
199 | method, "oldOperator is not null or has less priority than new"); |
200 | Term term2 = readMaximalTerm(newOperator.getPriority()); |
201 | if (term2 == null) { |
202 | // TODO mime 20060313: 2 could be false if newOperator == oldOperator |
203 | throw new TooFewArgumentsException(getPosition(), 2); |
204 | } |
205 | term = new Term(newOperator, term); |
206 | term.addArgument(term2); |
207 | } |
208 | oldOperator = newOperator; |
209 | } while (true); |
210 | } finally { |
211 | Trace.end(CLASS, this, method); |
212 | } |
213 | } |
214 | |
215 | /** |
216 | * Read next following term. This is a complete term but some infix operator |
217 | * or some terms for an infix operator might follow. |
218 | * |
219 | * @return Read term. |
220 | * @throws ParserException Parsing failed. |
221 | */ |
222 | private final Term readPrefixTerm() throws ParserException { |
223 | final String method = "readPrefixTerm()"; |
224 | Trace.begin(CLASS, this, method); |
225 | Term term = null; |
226 | try { |
227 | final List readOperators = readOperators(); // there might be several prefix operators |
228 | if (readOperators != null && readOperators.size() > 0) { |
229 | Trace.trace(CLASS, this, method, "operators found"); |
230 | term = readPrefixOperator(readOperators); |
231 | } else { // no operator found |
232 | Trace.trace(CLASS, this, method, "no operators found"); |
233 | final String token; |
234 | token = getToken(); |
235 | if (token == null) { |
236 | Trace.param(CLASS, this, method, "read term", "null"); |
237 | return null; |
238 | } |
239 | if ("(".equals(token)) { |
240 | readToken(); |
241 | Trace.trace(CLASS, this, method, "start bracket found: " + token); |
242 | term = readMaximalTerm(0); |
243 | final String lastToken = readToken(); |
244 | if (!")".equals(lastToken)) { |
245 | throw new ClosingBracketMissingException(getPosition(), "(", lastToken); |
246 | } |
247 | |
248 | } else if ("[".equals(token)) { |
249 | readToken(); |
250 | Trace.trace(CLASS, this, method, "start bracket found: " + token); |
251 | term = readMaximalTerm(0); |
252 | final String lastToken = readToken(); |
253 | if (!"]".equals(lastToken)) { |
254 | throw new ClosingBracketMissingException(getPosition(), "[", lastToken); |
255 | } |
256 | } else { |
257 | readToken(); |
258 | Trace.param(CLASS, this, method, "atom", token); |
259 | term = new Term(new TermAtom(token)); |
260 | } |
261 | } |
262 | Trace.param(CLASS, this, method, |
263 | "read term", (term != null ? term.getQedeq() : "null")); |
264 | return term; |
265 | } finally { |
266 | Trace.end(CLASS, this, method); |
267 | } |
268 | } |
269 | |
270 | /** |
271 | * Try to parse an prefix operator and its operands from the input. Tries first operator, |
272 | * second operator and so on. If the last one fails an appropriate exception is thrown. |
273 | * |
274 | * @param operators Prefix operator list. |
275 | * @return Resulting term. |
276 | * @throws ParserException Parsing failed. |
277 | */ |
278 | private Term readPrefixOperator(final List operators) throws ParserException { |
279 | Term term = null; |
280 | markPosition(); |
281 | for (int number = 0; number < operators.size(); number++) { |
282 | rewindPosition(); |
283 | markPosition(); |
284 | Operator operator = (Operator) operators.get(number); |
285 | |
286 | if (!operator.isPrefix()) { |
287 | clearMark(); |
288 | throw new UnexpectedOperatorException(getPosition(), operator); |
289 | } |
290 | |
291 | term = new Term(operator); |
292 | |
293 | if (operator.isFunction()) { |
294 | // constants should have no argument list |
295 | if (operator.getMax() == 0) { |
296 | break; |
297 | } |
298 | List list = readTupel(); |
299 | if (list == null) { // here we don't distinguish between "a()" and "a" |
300 | list = new ArrayList(); |
301 | } |
302 | // doesn't have enough arguments |
303 | if (list.size() < operator.getMin()) { |
304 | if (number + 1 < operators.size()) { |
305 | continue; // try again |
306 | } |
307 | clearMark(); |
308 | throw new TooFewArgumentsException(getPosition(), |
309 | operator.getMin()); |
310 | } |
311 | // has to much arguments |
312 | if (operator.getMax() != -1 && list.size() > operator.getMax()) { |
313 | if (number + 1 < operators.size()) { |
314 | continue; // try again |
315 | } |
316 | clearMark(); |
317 | throw new TooMuchArgumentsException(getPosition(), operator, |
318 | operator.getMax()); |
319 | } |
320 | for (int i = 0; i < list.size(); i++) { |
321 | term.addArgument((Term) list.get(i)); |
322 | } |
323 | break; |
324 | } |
325 | |
326 | int i = 0; |
327 | while (i < operator.getMin()) { |
328 | if (i > 0 && operator.getSeparatorSymbol() != null) { |
329 | final String separator = getToken(); |
330 | if (!operator.getSeparatorSymbol().equals(separator)) { |
331 | if (number + 1 < operators.size()) { |
332 | continue; |
333 | } |
334 | clearMark(); |
335 | throw new SeparatorNotFoundException(getPosition(), |
336 | operator.getSeparatorSymbol()); |
337 | } |
338 | readToken(); |
339 | } |
340 | final Term add = readMaximalTerm(operator.getPriority()); |
341 | if (add == null) { |
342 | if (number + 1 < operators.size()) { |
343 | continue; |
344 | } |
345 | clearMark(); |
346 | throw new TooFewArgumentsException(getPosition(), operator.getMin()); |
347 | } |
348 | term.addArgument(add); |
349 | i++; |
350 | } |
351 | while (operator.getMax() == -1 || i < operator.getMax()) { |
352 | if (i > 0 && operator.getSeparatorSymbol() != null) { |
353 | final String separator = getToken(); |
354 | if (!operator.getSeparatorSymbol().equals(separator)) { |
355 | break; |
356 | } |
357 | readToken(); |
358 | } |
359 | if (operator.getEndSymbol() != null) { |
360 | final String end = getToken(); |
361 | if (operator.getEndSymbol().equals(end)) { |
362 | break; |
363 | } |
364 | } |
365 | Term add = null; |
366 | markPosition(); |
367 | try { |
368 | add = readMaximalTerm(operator.getPriority()); |
369 | clearMark(); |
370 | } catch (Exception e) { |
371 | rewindPosition(); |
372 | } |
373 | if (add == null) { |
374 | break; |
375 | } |
376 | term.addArgument(add); |
377 | i++; |
378 | } |
379 | if (operator.getEndSymbol() != null) { |
380 | final String end = getToken(); |
381 | if (!operator.getEndSymbol().equals(end)) { |
382 | if (number + 1 < operators.size()) { |
383 | continue; |
384 | } |
385 | clearMark(); |
386 | throw new EndSymbolNotFoundException(getPosition(), |
387 | operator.getEndSymbol()); |
388 | } |
389 | readToken(); |
390 | } |
391 | break; |
392 | } |
393 | clearMark(); |
394 | return term; |
395 | } |
396 | |
397 | /** |
398 | * Read n-tupel. This is a list of terms encapsulated by "(" and ")" and the terms are separated |
399 | * by ",". |
400 | * |
401 | * @return List of terms. <code>null</code> if no bracket followed. |
402 | * @throws ParserException Parsing failed. |
403 | */ |
404 | private final List readTupel() throws ParserException { |
405 | final String method = "List readTupel()"; |
406 | Trace.begin(CLASS, this, method); |
407 | try { |
408 | final String firstToken; |
409 | firstToken = getToken(); |
410 | if (!"(".equals(firstToken)) { |
411 | Trace.trace(CLASS, this, method, "no start bracket found"); |
412 | return null; |
413 | } |
414 | readToken(); // read "(" |
415 | List list = new ArrayList(); |
416 | Term term = null; |
417 | while (null != (term = readMaximalTerm(0))) { |
418 | list.add(term); |
419 | final String separatorToken = getToken(); |
420 | if (!",".equals(separatorToken)) { |
421 | break; |
422 | } |
423 | readToken(); // read "," |
424 | } |
425 | final String lastToken = readToken(); |
426 | if (!")".equals(lastToken)) { |
427 | throw new ClosingBracketMissingException(getPosition(), ")", lastToken); |
428 | } |
429 | return list; |
430 | } finally { |
431 | Trace.end(CLASS, this, method); |
432 | } |
433 | } |
434 | |
435 | /** |
436 | * Read next operator from input. |
437 | * |
438 | * @return Found operator, maybe <code>null</code>. |
439 | */ |
440 | private final Operator readOperator() { |
441 | final String method = "Operator readOperator()"; |
442 | Trace.begin(CLASS, this, method); |
443 | |
444 | try { |
445 | markPosition(); |
446 | final String token; |
447 | token = readToken(); |
448 | if (token == null) { |
449 | rewindPosition(); |
450 | Trace.trace(CLASS, this, method, "no operator found"); |
451 | return null; |
452 | } |
453 | final Operator operator = getOperator(token); |
454 | if (operator == null) { |
455 | rewindPosition(); |
456 | Trace.trace(CLASS, this, method, "no operator found"); |
457 | return null; |
458 | } |
459 | clearMark(); |
460 | Trace.param(CLASS, this, method, "operator", operator.getQedeq()); |
461 | return operator; |
462 | } finally { |
463 | Trace.end(CLASS, this, method); |
464 | } |
465 | } |
466 | |
467 | /** |
468 | * Read next operators from input. Because the token that identifies an operator might be not |
469 | * unique we could get multiple operators that start with that token. So this method reads |
470 | * the operator token (if any) and returns a list of operators that start with that token. |
471 | * |
472 | * @return Found operators, maybe <code>null</code> if no matching found.. |
473 | */ |
474 | private final List readOperators() { |
475 | final String method = "List readOperators()"; |
476 | Trace.begin(CLASS, this, method); |
477 | |
478 | try { |
479 | markPosition(); |
480 | final String token; |
481 | token = readToken(); |
482 | if (token == null) { |
483 | rewindPosition(); |
484 | Trace.trace(CLASS, this, method, "no operators found"); |
485 | return null; |
486 | } |
487 | final List ops = getOperators(token); |
488 | if (ops == null || ops.size() == 0) { |
489 | rewindPosition(); |
490 | Trace.trace(CLASS, this, method, "no operators found"); |
491 | return null; |
492 | } |
493 | clearMark(); |
494 | for (int i = 0; i < ops.size(); i++) { |
495 | Trace.param(CLASS, this, method, "operator[" + i + "]", ops.get(i)); |
496 | } |
497 | return ops; |
498 | } finally { |
499 | Trace.end(CLASS, this, method); |
500 | } |
501 | } |
502 | |
503 | /** |
504 | * Get an operator for that token. If there are more than one possibilities the first matching |
505 | * is returned. |
506 | * |
507 | * @param token Get an operator for this token. |
508 | * @return Operator. Might be <code>null</code>. |
509 | */ |
510 | protected abstract Operator getOperator(String token); |
511 | |
512 | /** |
513 | * Get operators for that token. If there are more than one possibilities all matching are |
514 | * returned. |
515 | * |
516 | * @param token Get operators for this token. |
517 | * @return Operators. Might be <code>null</code>. |
518 | */ |
519 | protected abstract List getOperators(String token); |
520 | |
521 | /** |
522 | * Read next token from input and move reading position. |
523 | * A token is a recognized character sequence. A token is no |
524 | * elementary whitespace. Also a dosn't start or end with |
525 | * elementary whitespace. |
526 | * |
527 | * @return Token read, is <code>null</code> if end of data reached. |
528 | */ |
529 | protected abstract String readToken(); |
530 | |
531 | /** |
532 | * Read next token from input but don't move reading position. |
533 | * A token is a recognised character sequence. A token is no |
534 | * elementary whitespace. Also a dosn't start or end with |
535 | * elementary whitespace. |
536 | * |
537 | * @return Token read, is <code>null</code> if end of data reached. |
538 | */ |
539 | public final String getToken() { |
540 | markPosition(); |
541 | final String result = readToken(); |
542 | rewindPosition(); |
543 | return result; |
544 | } |
545 | |
546 | /** |
547 | * Remember current position. |
548 | */ |
549 | protected final void markPosition() { |
550 | input.markPosition(); |
551 | } |
552 | |
553 | /** |
554 | * Rewind to previous marked position. Also clears the mark. |
555 | * |
556 | * @return Current position before pop. |
557 | */ |
558 | protected final long rewindPosition() { |
559 | return input.rewindPosition(); |
560 | } |
561 | |
562 | /** |
563 | * Forget last remembered position. |
564 | */ |
565 | protected final void clearMark() { |
566 | input.clearMark(); |
567 | } |
568 | |
569 | /** |
570 | * Get byte position. |
571 | * |
572 | * @return Position. |
573 | */ |
574 | private long getPosition() { |
575 | return input.getPosition(); |
576 | } |
577 | |
578 | /** |
579 | * Reads a single character and does not change the reading |
580 | * position. |
581 | * |
582 | * @return character read, if there are no more chars |
583 | * <code>Character.MAX_VALUE</code> is returned |
584 | */ |
585 | protected final int getChar() { |
586 | return input.getChar(); |
587 | } |
588 | |
589 | /** |
590 | * Reads a single character and increments the reading position |
591 | * by one. |
592 | * |
593 | * @return character read, if there are no more chars |
594 | * <code>Character.MAX_VALUE</code> is returned |
595 | */ |
596 | protected final int readChar() { |
597 | return input.read(); |
598 | } |
599 | |
600 | /** |
601 | * Is this an end of term token? |
602 | * |
603 | * @param token Check this token. |
604 | * @return Is this an end of term token. |
605 | */ |
606 | protected abstract boolean eot(String token); |
607 | |
608 | /** |
609 | * Are there still any characters to read? |
610 | * |
611 | * @return Anything left for reading further? |
612 | */ |
613 | public final boolean eof() { |
614 | return input.isEmpty(); |
615 | } |
616 | |
617 | /** |
618 | * Get rewind stack size. |
619 | * |
620 | * @return Rewind stack size. |
621 | */ |
622 | public final int getRewindStackSize() { |
623 | return input.getRewindStackSize(); |
624 | } |
625 | |
626 | } |