| 1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
| 2 | * |
| 3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
| 4 | * |
| 5 | * "Hilbert II" is free software; you can redistribute |
| 6 | * it and/or modify it under the terms of the GNU General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | */ |
| 15 | |
| 16 | package org.qedeq.kernel.bo.parser; |
| 17 | |
| 18 | import java.util.ArrayList; |
| 19 | import java.util.List; |
| 20 | |
| 21 | import org.qedeq.base.io.TextInput; |
| 22 | import org.qedeq.base.trace.Trace; |
| 23 | |
| 24 | /** |
| 25 | * Parse term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s. |
| 26 | * |
| 27 | * @author Michael Meyling |
| 28 | */ |
| 29 | public abstract class MathParser { |
| 30 | |
| 31 | /** This class. */ |
| 32 | private static final Class CLASS = MathParser.class; |
| 33 | |
| 34 | /** Input source to parse. */ |
| 35 | private MementoTextInput input; |
| 36 | |
| 37 | /** List of operators. */ |
| 38 | private List operators; |
| 39 | |
| 40 | /** |
| 41 | * Constructor. |
| 42 | */ |
| 43 | public MathParser() { |
| 44 | // nothing to do |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Set input source and defined parameters. |
| 49 | * |
| 50 | * @param input Input source to parse. |
| 51 | * @param operators Operator list. |
| 52 | */ |
| 53 | public void setParameters(final MementoTextInput input, final List operators) { |
| 54 | this.input = input; |
| 55 | this.operators = operators; |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * Set input source and defined parameters. |
| 60 | * |
| 61 | * @param input Input source to parse. |
| 62 | * @param operators Operator list. |
| 63 | */ |
| 64 | public void setParameters(final TextInput input, final List operators) { |
| 65 | setParameters(new MementoTextInput(input), operators); |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * Set input source and defined parameters. |
| 70 | * |
| 71 | * @param input Input source to parse. |
| 72 | * @param operators Operator list. |
| 73 | */ |
| 74 | public void setParameters(final StringBuffer input, final List operators) { |
| 75 | setParameters(new TextInput(input), operators); |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Set input source and defined parameters. |
| 80 | * |
| 81 | * @param input Input source to parse. |
| 82 | * @param operators Operator list. |
| 83 | */ |
| 84 | public void setParameters(final String input, final List operators) { |
| 85 | setParameters(new TextInput(input), operators); |
| 86 | } |
| 87 | |
| 88 | protected final List getOperators() { |
| 89 | return operators; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * Reads (maximal possible) Term from input. |
| 94 | * |
| 95 | * @return Read term. |
| 96 | * @throws ParserException Parsing failed. |
| 97 | */ |
| 98 | public final Term readTerm() throws ParserException { |
| 99 | final String method = "Term readTerm()"; |
| 100 | Trace.begin(CLASS, this, method); |
| 101 | try { |
| 102 | final Term term = readMaximalTerm(0); |
| 103 | if (eot(getToken())) { |
| 104 | readToken(); |
| 105 | } |
| 106 | return term; |
| 107 | } finally { |
| 108 | Trace.end(CLASS, this, method); |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | /** |
| 113 | * Reads next "maximal" term from input. The following input doesn't make the the term more |
| 114 | * complete. Respects the priority of coming operators by comparing it |
| 115 | * to the given value. |
| 116 | * |
| 117 | * @param priority next formula will be the last, if |
| 118 | * connecting operator has higher priority |
| 119 | * @return Read term. |
| 120 | * @throws ParserException Parsing failed. |
| 121 | */ |
| 122 | private final Term readMaximalTerm(final int priority) throws ParserException { |
| 123 | final String method = "readMaximalTerm(int)"; |
| 124 | Trace.begin(CLASS, this, method); |
| 125 | Term term = null; |
| 126 | try { |
| 127 | if (eot(getToken())) { |
| 128 | Trace.param(CLASS, this, method, "return term", "null"); |
| 129 | return null; |
| 130 | } |
| 131 | term = readPrefixTerm(); |
| 132 | term = addNextInfixTerms(priority, term); |
| 133 | Trace.param(CLASS, this, method, |
| 134 | "return term", (term != null ? term.getQedeq() : "null")); |
| 135 | return term; |
| 136 | } finally { |
| 137 | Trace.end(CLASS, this, method); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | private Term addNextInfixTerms(final int priority, final Term initialTerm) |
| 142 | throws ParserException { |
| 143 | final String method = "Term addNextInfixTerms(int, Term)"; |
| 144 | Trace.begin(CLASS, this, method); |
| 145 | Term term = initialTerm; |
| 146 | try { |
| 147 | Operator newOperator = null; |
| 148 | Operator oldOperator = null; |
| 149 | |
| 150 | do { |
| 151 | markPosition(); |
| 152 | newOperator = readOperator(); // we expect an unique infix operator |
| 153 | Trace.param(CLASS, this, method, |
| 154 | "newOperator", (newOperator != null ? newOperator.getQedeq() : "null")); |
| 155 | if (newOperator == null || newOperator.getPriority() <= priority) { |
| 156 | Trace.trace(CLASS, this, method, "newOperator is null or of less priority"); |
| 157 | rewindPosition(); |
| 158 | Trace.param(CLASS, this, method, |
| 159 | "read term", (term != null ? term.getQedeq() : "null")); |
| 160 | return term; |
| 161 | } |
| 162 | if (newOperator.isPrefix()) { |
| 163 | Trace.trace(CLASS, this, method, "newOperator is prefix"); |
| 164 | // TODO mime 20060313: try to read further arguments |
| 165 | rewindPosition(); |
| 166 | Trace.param(CLASS, this, method, |
| 167 | "read term", (term != null ? term.getQedeq() : "null")); |
| 168 | return term; |
| 169 | } |
| 170 | if (newOperator.isPostfix()) { |
| 171 | rewindPosition(); |
| 172 | throw new IllegalArgumentException("Postfix Operators not yet supported"); |
| 173 | } |
| 174 | clearMark(); |
| 175 | if (oldOperator == null || oldOperator.getPriority() >= newOperator.getPriority()) { |
| 176 | Trace.trace(CLASS, this, method, |
| 177 | "oldOperator is null or has more priority than new"); |
| 178 | Term term2 = readMaximalTerm(newOperator.getPriority()); |
| 179 | if (term2 == null) { |
| 180 | // TODO mime 20060313: 2 could be false if newOperator == oldOperator |
| 181 | throw new TooFewArgumentsException(getPosition(), 2); |
| 182 | } |
| 183 | if (oldOperator == newOperator) { |
| 184 | if (oldOperator.getMax() != -1 && term.size() + 1 >= oldOperator.getMax()) { |
| 185 | throw new TooMuchArgumentsException(getPosition(), oldOperator, |
| 186 | oldOperator.getMax()); |
| 187 | } |
| 188 | Trace.trace(CLASS, this, method, "new term is added to old term"); |
| 189 | term.addArgument(term2); |
| 190 | } else { |
| 191 | // old term is first argument of new operator |
| 192 | Trace.trace(CLASS, this, method, |
| 193 | "old term is first argument of new operator"); |
| 194 | term = new Term(newOperator, term); |
| 195 | term.addArgument(term2); |
| 196 | } |
| 197 | } else { |
| 198 | Trace.trace(CLASS, this, |
| 199 | method, "oldOperator is not null or has less priority than new"); |
| 200 | Term term2 = readMaximalTerm(newOperator.getPriority()); |
| 201 | if (term2 == null) { |
| 202 | // TODO mime 20060313: 2 could be false if newOperator == oldOperator |
| 203 | throw new TooFewArgumentsException(getPosition(), 2); |
| 204 | } |
| 205 | term = new Term(newOperator, term); |
| 206 | term.addArgument(term2); |
| 207 | } |
| 208 | oldOperator = newOperator; |
| 209 | } while (true); |
| 210 | } finally { |
| 211 | Trace.end(CLASS, this, method); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /** |
| 216 | * Read next following term. This is a complete term but some infix operator |
| 217 | * or some terms for an infix operator might follow. |
| 218 | * |
| 219 | * @return Read term. |
| 220 | * @throws ParserException Parsing failed. |
| 221 | */ |
| 222 | private final Term readPrefixTerm() throws ParserException { |
| 223 | final String method = "readPrefixTerm()"; |
| 224 | Trace.begin(CLASS, this, method); |
| 225 | Term term = null; |
| 226 | try { |
| 227 | final List readOperators = readOperators(); // there might be several prefix operators |
| 228 | if (readOperators != null && readOperators.size() > 0) { |
| 229 | Trace.trace(CLASS, this, method, "operators found"); |
| 230 | term = readPrefixOperator(readOperators); |
| 231 | } else { // no operator found |
| 232 | Trace.trace(CLASS, this, method, "no operators found"); |
| 233 | final String token; |
| 234 | token = getToken(); |
| 235 | if (token == null) { |
| 236 | Trace.param(CLASS, this, method, "read term", "null"); |
| 237 | return null; |
| 238 | } |
| 239 | if ("(".equals(token)) { |
| 240 | readToken(); |
| 241 | Trace.trace(CLASS, this, method, "start bracket found: " + token); |
| 242 | term = readMaximalTerm(0); |
| 243 | final String lastToken = readToken(); |
| 244 | if (!")".equals(lastToken)) { |
| 245 | throw new ClosingBracketMissingException(getPosition(), "(", lastToken); |
| 246 | } |
| 247 | |
| 248 | } else if ("[".equals(token)) { |
| 249 | readToken(); |
| 250 | Trace.trace(CLASS, this, method, "start bracket found: " + token); |
| 251 | term = readMaximalTerm(0); |
| 252 | final String lastToken = readToken(); |
| 253 | if (!"]".equals(lastToken)) { |
| 254 | throw new ClosingBracketMissingException(getPosition(), "[", lastToken); |
| 255 | } |
| 256 | } else { |
| 257 | readToken(); |
| 258 | Trace.param(CLASS, this, method, "atom", token); |
| 259 | term = new Term(new TermAtom(token)); |
| 260 | } |
| 261 | } |
| 262 | Trace.param(CLASS, this, method, |
| 263 | "read term", (term != null ? term.getQedeq() : "null")); |
| 264 | return term; |
| 265 | } finally { |
| 266 | Trace.end(CLASS, this, method); |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | /** |
| 271 | * Try to parse an prefix operator and its operands from the input. Tries first operator, |
| 272 | * second operator and so on. If the last one fails an appropriate exception is thrown. |
| 273 | * |
| 274 | * @param operators Prefix operator list. |
| 275 | * @return Resulting term. |
| 276 | * @throws ParserException Parsing failed. |
| 277 | */ |
| 278 | private Term readPrefixOperator(final List operators) throws ParserException { |
| 279 | Term term = null; |
| 280 | markPosition(); |
| 281 | for (int number = 0; number < operators.size(); number++) { |
| 282 | rewindPosition(); |
| 283 | markPosition(); |
| 284 | Operator operator = (Operator) operators.get(number); |
| 285 | |
| 286 | if (!operator.isPrefix()) { |
| 287 | clearMark(); |
| 288 | throw new UnexpectedOperatorException(getPosition(), operator); |
| 289 | } |
| 290 | |
| 291 | term = new Term(operator); |
| 292 | |
| 293 | if (operator.isFunction()) { |
| 294 | // constants should have no argument list |
| 295 | if (operator.getMax() == 0) { |
| 296 | break; |
| 297 | } |
| 298 | List list = readTupel(); |
| 299 | if (list == null) { // here we don't distinguish between "a()" and "a" |
| 300 | list = new ArrayList(); |
| 301 | } |
| 302 | // doesn't have enough arguments |
| 303 | if (list.size() < operator.getMin()) { |
| 304 | if (number + 1 < operators.size()) { |
| 305 | continue; // try again |
| 306 | } |
| 307 | clearMark(); |
| 308 | throw new TooFewArgumentsException(getPosition(), |
| 309 | operator.getMin()); |
| 310 | } |
| 311 | // has to much arguments |
| 312 | if (operator.getMax() != -1 && list.size() > operator.getMax()) { |
| 313 | if (number + 1 < operators.size()) { |
| 314 | continue; // try again |
| 315 | } |
| 316 | clearMark(); |
| 317 | throw new TooMuchArgumentsException(getPosition(), operator, |
| 318 | operator.getMax()); |
| 319 | } |
| 320 | for (int i = 0; i < list.size(); i++) { |
| 321 | term.addArgument((Term) list.get(i)); |
| 322 | } |
| 323 | break; |
| 324 | } |
| 325 | |
| 326 | int i = 0; |
| 327 | while (i < operator.getMin()) { |
| 328 | if (i > 0 && operator.getSeparatorSymbol() != null) { |
| 329 | final String separator = getToken(); |
| 330 | if (!operator.getSeparatorSymbol().equals(separator)) { |
| 331 | if (number + 1 < operators.size()) { |
| 332 | continue; |
| 333 | } |
| 334 | clearMark(); |
| 335 | throw new SeparatorNotFoundException(getPosition(), |
| 336 | operator.getSeparatorSymbol()); |
| 337 | } |
| 338 | readToken(); |
| 339 | } |
| 340 | final Term add = readMaximalTerm(operator.getPriority()); |
| 341 | if (add == null) { |
| 342 | if (number + 1 < operators.size()) { |
| 343 | continue; |
| 344 | } |
| 345 | clearMark(); |
| 346 | throw new TooFewArgumentsException(getPosition(), operator.getMin()); |
| 347 | } |
| 348 | term.addArgument(add); |
| 349 | i++; |
| 350 | } |
| 351 | while (operator.getMax() == -1 || i < operator.getMax()) { |
| 352 | if (i > 0 && operator.getSeparatorSymbol() != null) { |
| 353 | final String separator = getToken(); |
| 354 | if (!operator.getSeparatorSymbol().equals(separator)) { |
| 355 | break; |
| 356 | } |
| 357 | readToken(); |
| 358 | } |
| 359 | if (operator.getEndSymbol() != null) { |
| 360 | final String end = getToken(); |
| 361 | if (operator.getEndSymbol().equals(end)) { |
| 362 | break; |
| 363 | } |
| 364 | } |
| 365 | Term add = null; |
| 366 | markPosition(); |
| 367 | try { |
| 368 | add = readMaximalTerm(operator.getPriority()); |
| 369 | clearMark(); |
| 370 | } catch (Exception e) { |
| 371 | rewindPosition(); |
| 372 | } |
| 373 | if (add == null) { |
| 374 | break; |
| 375 | } |
| 376 | term.addArgument(add); |
| 377 | i++; |
| 378 | } |
| 379 | if (operator.getEndSymbol() != null) { |
| 380 | final String end = getToken(); |
| 381 | if (!operator.getEndSymbol().equals(end)) { |
| 382 | if (number + 1 < operators.size()) { |
| 383 | continue; |
| 384 | } |
| 385 | clearMark(); |
| 386 | throw new EndSymbolNotFoundException(getPosition(), |
| 387 | operator.getEndSymbol()); |
| 388 | } |
| 389 | readToken(); |
| 390 | } |
| 391 | break; |
| 392 | } |
| 393 | clearMark(); |
| 394 | return term; |
| 395 | } |
| 396 | |
| 397 | /** |
| 398 | * Read n-tupel. This is a list of terms encapsulated by "(" and ")" and the terms are separated |
| 399 | * by ",". |
| 400 | * |
| 401 | * @return List of terms. <code>null</code> if no bracket followed. |
| 402 | * @throws ParserException Parsing failed. |
| 403 | */ |
| 404 | private final List readTupel() throws ParserException { |
| 405 | final String method = "List readTupel()"; |
| 406 | Trace.begin(CLASS, this, method); |
| 407 | try { |
| 408 | final String firstToken; |
| 409 | firstToken = getToken(); |
| 410 | if (!"(".equals(firstToken)) { |
| 411 | Trace.trace(CLASS, this, method, "no start bracket found"); |
| 412 | return null; |
| 413 | } |
| 414 | readToken(); // read "(" |
| 415 | List list = new ArrayList(); |
| 416 | Term term = null; |
| 417 | while (null != (term = readMaximalTerm(0))) { |
| 418 | list.add(term); |
| 419 | final String separatorToken = getToken(); |
| 420 | if (!",".equals(separatorToken)) { |
| 421 | break; |
| 422 | } |
| 423 | readToken(); // read "," |
| 424 | } |
| 425 | final String lastToken = readToken(); |
| 426 | if (!")".equals(lastToken)) { |
| 427 | throw new ClosingBracketMissingException(getPosition(), ")", lastToken); |
| 428 | } |
| 429 | return list; |
| 430 | } finally { |
| 431 | Trace.end(CLASS, this, method); |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | /** |
| 436 | * Read next operator from input. |
| 437 | * |
| 438 | * @return Found operator, maybe <code>null</code>. |
| 439 | */ |
| 440 | private final Operator readOperator() { |
| 441 | final String method = "Operator readOperator()"; |
| 442 | Trace.begin(CLASS, this, method); |
| 443 | |
| 444 | try { |
| 445 | markPosition(); |
| 446 | final String token; |
| 447 | token = readToken(); |
| 448 | if (token == null) { |
| 449 | rewindPosition(); |
| 450 | Trace.trace(CLASS, this, method, "no operator found"); |
| 451 | return null; |
| 452 | } |
| 453 | final Operator operator = getOperator(token); |
| 454 | if (operator == null) { |
| 455 | rewindPosition(); |
| 456 | Trace.trace(CLASS, this, method, "no operator found"); |
| 457 | return null; |
| 458 | } |
| 459 | clearMark(); |
| 460 | Trace.param(CLASS, this, method, "operator", operator.getQedeq()); |
| 461 | return operator; |
| 462 | } finally { |
| 463 | Trace.end(CLASS, this, method); |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | /** |
| 468 | * Read next operators from input. Because the token that identifies an operator might be not |
| 469 | * unique we could get multiple operators that start with that token. So this method reads |
| 470 | * the operator token (if any) and returns a list of operators that start with that token. |
| 471 | * |
| 472 | * @return Found operators, maybe <code>null</code> if no matching found.. |
| 473 | */ |
| 474 | private final List readOperators() { |
| 475 | final String method = "List readOperators()"; |
| 476 | Trace.begin(CLASS, this, method); |
| 477 | |
| 478 | try { |
| 479 | markPosition(); |
| 480 | final String token; |
| 481 | token = readToken(); |
| 482 | if (token == null) { |
| 483 | rewindPosition(); |
| 484 | Trace.trace(CLASS, this, method, "no operators found"); |
| 485 | return null; |
| 486 | } |
| 487 | final List ops = getOperators(token); |
| 488 | if (ops == null || ops.size() == 0) { |
| 489 | rewindPosition(); |
| 490 | Trace.trace(CLASS, this, method, "no operators found"); |
| 491 | return null; |
| 492 | } |
| 493 | clearMark(); |
| 494 | for (int i = 0; i < ops.size(); i++) { |
| 495 | Trace.param(CLASS, this, method, "operator[" + i + "]", ops.get(i)); |
| 496 | } |
| 497 | return ops; |
| 498 | } finally { |
| 499 | Trace.end(CLASS, this, method); |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | /** |
| 504 | * Get an operator for that token. If there are more than one possibilities the first matching |
| 505 | * is returned. |
| 506 | * |
| 507 | * @param token Get an operator for this token. |
| 508 | * @return Operator. Might be <code>null</code>. |
| 509 | */ |
| 510 | protected abstract Operator getOperator(String token); |
| 511 | |
| 512 | /** |
| 513 | * Get operators for that token. If there are more than one possibilities all matching are |
| 514 | * returned. |
| 515 | * |
| 516 | * @param token Get operators for this token. |
| 517 | * @return Operators. Might be <code>null</code>. |
| 518 | */ |
| 519 | protected abstract List getOperators(String token); |
| 520 | |
| 521 | /** |
| 522 | * Read next token from input and move reading position. |
| 523 | * A token is a recognized character sequence. A token is no |
| 524 | * elementary whitespace. Also a dosn't start or end with |
| 525 | * elementary whitespace. |
| 526 | * |
| 527 | * @return Token read, is <code>null</code> if end of data reached. |
| 528 | */ |
| 529 | protected abstract String readToken(); |
| 530 | |
| 531 | /** |
| 532 | * Read next token from input but don't move reading position. |
| 533 | * A token is a recognised character sequence. A token is no |
| 534 | * elementary whitespace. Also a dosn't start or end with |
| 535 | * elementary whitespace. |
| 536 | * |
| 537 | * @return Token read, is <code>null</code> if end of data reached. |
| 538 | */ |
| 539 | public final String getToken() { |
| 540 | markPosition(); |
| 541 | final String result = readToken(); |
| 542 | rewindPosition(); |
| 543 | return result; |
| 544 | } |
| 545 | |
| 546 | /** |
| 547 | * Remember current position. |
| 548 | */ |
| 549 | protected final void markPosition() { |
| 550 | input.markPosition(); |
| 551 | } |
| 552 | |
| 553 | /** |
| 554 | * Rewind to previous marked position. Also clears the mark. |
| 555 | * |
| 556 | * @return Current position before pop. |
| 557 | */ |
| 558 | protected final long rewindPosition() { |
| 559 | return input.rewindPosition(); |
| 560 | } |
| 561 | |
| 562 | /** |
| 563 | * Forget last remembered position. |
| 564 | */ |
| 565 | protected final void clearMark() { |
| 566 | input.clearMark(); |
| 567 | } |
| 568 | |
| 569 | /** |
| 570 | * Get byte position. |
| 571 | * |
| 572 | * @return Position. |
| 573 | */ |
| 574 | private long getPosition() { |
| 575 | return input.getPosition(); |
| 576 | } |
| 577 | |
| 578 | /** |
| 579 | * Reads a single character and does not change the reading |
| 580 | * position. |
| 581 | * |
| 582 | * @return character read, if there are no more chars |
| 583 | * <code>Character.MAX_VALUE</code> is returned |
| 584 | */ |
| 585 | protected final int getChar() { |
| 586 | return input.getChar(); |
| 587 | } |
| 588 | |
| 589 | /** |
| 590 | * Reads a single character and increments the reading position |
| 591 | * by one. |
| 592 | * |
| 593 | * @return character read, if there are no more chars |
| 594 | * <code>Character.MAX_VALUE</code> is returned |
| 595 | */ |
| 596 | protected final int readChar() { |
| 597 | return input.read(); |
| 598 | } |
| 599 | |
| 600 | /** |
| 601 | * Is this an end of term token? |
| 602 | * |
| 603 | * @param token Check this token. |
| 604 | * @return Is this an end of term token. |
| 605 | */ |
| 606 | protected abstract boolean eot(String token); |
| 607 | |
| 608 | /** |
| 609 | * Are there still any characters to read? |
| 610 | * |
| 611 | * @return Anything left for reading further? |
| 612 | */ |
| 613 | public final boolean eof() { |
| 614 | return input.isEmpty(); |
| 615 | } |
| 616 | |
| 617 | /** |
| 618 | * Get rewind stack size. |
| 619 | * |
| 620 | * @return Rewind stack size. |
| 621 | */ |
| 622 | public final int getRewindStackSize() { |
| 623 | return input.getRewindStackSize(); |
| 624 | } |
| 625 | |
| 626 | } |