0001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
0002 *
0003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
0004 *
0005 * "Hilbert II" is free software; you can redistribute
0006 * it and/or modify it under the terms of the GNU General Public
0007 * License as published by the Free Software Foundation; either
0008 * version 2 of the License, or (at your option) any later version.
0009 *
0010 * This program is distributed in the hope that it will be useful,
0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0013 * GNU General Public License for more details.
0014 */
0015
0016 package org.qedeq.kernel.bo.service.unicode;
0017
0018 import java.util.Stack;
0019
0020 import org.qedeq.base.io.AbstractOutput;
0021 import org.qedeq.base.io.SourcePosition;
0022 import org.qedeq.base.io.StringOutput;
0023 import org.qedeq.base.io.SubTextInput;
0024 import org.qedeq.base.io.TextInput;
0025 import org.qedeq.base.trace.Trace;
0026 import org.qedeq.kernel.bo.service.latex.LatexErrorCodes;
0027
0028 /**
0029 * Transform LaTeX into Unicode format.
0030 *
0031 * @author Michael Meyling
0032 */
0033 public final class Latex2UnicodeParser {
0034
0035 /** This class. */
0036 private static final Class CLASS = Latex2UnicodeParser.class;
0037
0038 /** These characters get a special treatment in LaTeX. */
0039 private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-";
0040
0041 /** Herein goes our output. */
0042 private final AbstractOutput output;
0043
0044 /** Resolver for references. */
0045 private final ReferenceFinder finder;
0046
0047 /** This is our current input stream .*/
0048 private SubTextInput input;
0049
0050 /** Math mode on? */
0051 private boolean mathMode = false;
0052
0053 /** Mathfrak mode on? */
0054 private boolean mathfrak = false;
0055
0056 /** Emphasize on? */
0057 private boolean emph = false;
0058
0059 /** Bold on? */
0060 private boolean bold = false;
0061
0062 /** Mathbb on? */
0063 private boolean mathbb = false;
0064
0065 /** Stack for input parser. */
0066 private Stack inputStack = new Stack();
0067
0068 /** Stack for math mode. */
0069 private Stack mathModeStack = new Stack();
0070
0071 /** Stack for mathfrak mode. */
0072 private Stack mathfrakStack = new Stack();
0073
0074 /** Stack for emphasize mode. */
0075 private Stack emphStack = new Stack();
0076
0077 /** Stack for bold mode. */
0078 private Stack boldStack = new Stack();
0079
0080 /** Stack for mathbb mode. */
0081 private Stack mathbbStack = new Stack();
0082
0083 /** Stack for skipWhitspace mode. */
0084 private Stack skipWhitespaceStack = new Stack();
0085
0086 /** Should I skip whitespace before printing the next token. */
0087 private boolean skipWhitespace;
0088
0089 /** Here the last read token begins. This is an absolute position. */
0090 private int tokenBegin;
0091
0092 /** Here the last read token ends. This is an absolute position. */
0093 private int tokenEnd;
0094
0095 /** Current item number. */
0096 private int itemNumber;
0097
0098 /**
0099 * Parse LaTeX text into QEDEQ module string.
0100 *
0101 * @param finder Finder for references.
0102 * @param input Parse this input.
0103 * @param columns Maximum column number. Break (if possible) before.
0104 * @return QEDEQ module string.
0105 */
0106 public static final String transform(final ReferenceFinder finder, final String input,
0107 final int columns) {
0108 final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder);
0109 parser.output.setColumns(columns);
0110 return parser.getUtf8(input);
0111 }
0112
0113 /**
0114 * Constructor.
0115 *
0116 * @param finder Finder for references.
0117 */
0118 private Latex2UnicodeParser(final ReferenceFinder finder) {
0119 // use dummy implementation if finder is null
0120 if (finder == null) {
0121 this.finder = new ReferenceFinder() {
0122 public String getReferenceLink(final String reference,
0123 final SourcePosition startDelta, final SourcePosition endDelta) {
0124 return "[" + reference + "]";
0125 }
0126
0127 public void addWarning(final int code, final String msg,
0128 final SourcePosition startDelta, final SourcePosition endDelta) {
0129 }
0130 };
0131 } else {
0132 this.finder = finder;
0133 }
0134 this.output = new StringOutput();
0135 }
0136
0137 /**
0138 * Get UTF-8 String out of LaTeX text.
0139 *
0140 * @param text LaTeX.
0141 * @return UTF-8.
0142 */
0143 private String getUtf8(final String text) {
0144 skipWhitespace = true;
0145 this.input = new SubTextInput(text);
0146 parseAndPrint(this.input);
0147 return output.toString();
0148 }
0149
0150 /**
0151 * Do parsing and print result.
0152 *
0153 * @param input Parse this LaTeX text and print UTF-8 into output.
0154 */
0155 private void parseAndPrint(final SubTextInput input) {
0156 // remember old:
0157 inputStack.push(this.input);
0158 mathModeStack.push(Boolean.valueOf(mathMode));
0159 mathfrakStack.push(Boolean.valueOf(mathfrak));
0160 emphStack.push(Boolean.valueOf(emph));
0161 boldStack.push(Boolean.valueOf(bold));
0162 mathbbStack.push(Boolean.valueOf(mathbb));
0163 skipWhitespaceStack.push(Boolean.valueOf(skipWhitespace));
0164 try {
0165 this.input = input;
0166 boolean whitespace = false;
0167 while (!eof()) {
0168 String token = readToken();
0169 if (!token.startsWith("\\")) {
0170 token = token.trim();
0171 }
0172 if (token.length() == 0) {
0173 whitespace = true;
0174 continue;
0175 }
0176 if (whitespace && !"\\par".equals(token)) {
0177 print(" ");
0178 whitespace = false;
0179 }
0180 if ("\\begin".equals(token)) {
0181 parseBegin();
0182 } else if ("\\footnote".equals(token)) {
0183 parseFootnote();
0184 } else if ("\\qref".equals(token)) {
0185 parseQref();
0186 } else if ("$$".equals(token)) {
0187 mathMode = true;
0188 final SubTextInput content = readTilToken(token);
0189 println();
0190 parseAndPrint(content);
0191 println();
0192 mathMode = false;
0193 } else if ("$".equals(token)) {
0194 mathMode = true;
0195 final SubTextInput content = readTilToken(token);
0196 parseAndPrint(content);
0197 mathMode = false;
0198 } else if ("\\mathfrak".equals(token)) {
0199 if ('{' == getChar()) {
0200 mathfrak = true;
0201 final SubTextInput content = readCurlyBraceContents();
0202 parseAndPrint(content);
0203 mathfrak = false;
0204 } else {
0205 mathfrak = true;
0206 }
0207 } else if ("\\mathbb".equals(token)) {
0208 if ('{' == getChar()) {
0209 mathbb = true;
0210 final SubTextInput content = readCurlyBraceContents();
0211 parseAndPrint(content);
0212 mathbb = false;
0213 } else {
0214 mathbb = true;
0215 }
0216 } else if ("\\emph".equals(token)) {
0217 if ('{' == getChar()) {
0218 emph = true;
0219 final SubTextInput content = readCurlyBraceContents();
0220 parseAndPrint(content);
0221 // output.addWs("\u2006");
0222 output.addWs(" ");
0223 emph = false;
0224 } else {
0225 emph = true;
0226 }
0227 } else if ("\\textbf".equals(token)) {
0228 if ('{' == getChar()) {
0229 bold = true;
0230 final SubTextInput content = readCurlyBraceContents();
0231 parseAndPrint(content);
0232 bold = false;
0233 } else {
0234 bold = true;
0235 }
0236 } else if ("\\cite".equals(token)) {
0237 if ('{' == getChar()) {
0238 final SubTextInput content = readCurlyBraceContents();
0239 output.addToken("[" + content.asString() + "]");
0240 }
0241 } else if ("\\tag".equals(token)) {
0242 if ('{' == getChar()) {
0243 final SubTextInput content = readCurlyBraceContents();
0244 output.addToken("(" + content.asString() + ")");
0245 }
0246 } else if ("\\mbox".equals(token)) {
0247 if ('{' == getChar()) {
0248 final SubTextInput content = readCurlyBraceContents();
0249 parseAndPrint(content);
0250 }
0251 } else if ("\\cline".equals(token)) {
0252 if ('{' == getChar()) {
0253 readCurlyBraceContents();
0254 // ignore
0255 }
0256 output.addToken("_______________________________________");
0257 println();
0258 } else if ("\\item".equals(token)) {
0259 output.popLevel(3);
0260 itemNumber++;
0261 output.println();
0262 output.addToken(itemNumber + ".");
0263 output.addWs("");
0264 output.pushLevel(" ");
0265 output.setTabLevel();
0266 } else if ("{".equals(token)) {
0267 input.readInverse();
0268 final SubTextInput content = readCurlyBraceContents();
0269 parseAndPrint(content);
0270 } else if ("\\url".equals(token)) {
0271 final SubTextInput content = readCurlyBraceContents();
0272 output.addToken(" " + content.asString() + " ");
0273 } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token)
0274 || token.equals("\\vspace") || token.equals("\\hspace")
0275 || token.equals("\\vspace*") || token.equals("\\hspace*"))) {
0276 // ignore content
0277 readCurlyBraceContents();
0278 } else if ("_".equals(token) || "^".equals(token)) {
0279 if (mathMode) {
0280 String content;
0281 if ('{' == getChar()) {
0282 content = readCurlyBraceContents().asString();
0283 } else {
0284 content = readToken();
0285 }
0286 if ("_".equals(token)) {
0287 printSubscript(content);
0288 } else {
0289 printSuperscript(content);
0290 }
0291 } else {
0292 print(token);
0293 }
0294 } else {
0295 print(token);
0296 }
0297 }
0298 } finally {
0299 this.input = (SubTextInput) inputStack.pop();
0300 mathMode = ((Boolean) mathModeStack.pop()).booleanValue();
0301 mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue();
0302 emph = ((Boolean) emphStack.pop()).booleanValue();
0303 bold = ((Boolean) boldStack.pop()).booleanValue();
0304 skipWhitespace = ((Boolean) skipWhitespaceStack.pop()).booleanValue();
0305 output.flush();
0306 }
0307 }
0308
0309 /**
0310 * Parse after \footnote.
0311 */
0312 private void parseFootnote() {
0313 if ('{' == getChar()) {
0314 final SubTextInput content = readCurlyBraceContents();
0315 println();
0316 output.printWithoutSplit(" \u250C");
0317 output.pushLevel();
0318 output.pushLevel();
0319 output.pushLevel();
0320 output.pushLevel();
0321 output.pushLevel();
0322 output.pushLevel("\u2502 ");
0323 println();
0324 parseAndPrint(content);
0325 output.popLevel();
0326 output.popLevel();
0327 output.popLevel();
0328 output.popLevel();
0329 output.popLevel();
0330 output.popLevel();
0331 println();
0332 output.printWithoutSplit(" \u2514");
0333 println();
0334 }
0335 }
0336
0337 /**
0338 * Transform <code>\qref{key}</code> entries into common LaTeX code.
0339 *
0340 * @param text Work on this text.
0341 * @return Result of transforming \qref into text.
0342 */
0343 /**
0344 * Parse after \footnote.
0345 */
0346 private void parseQref() {
0347 final String method = "parseQref()";
0348 final int localStart1 = input.getAbsolutePosition();
0349 if ('{' == getChar()) {
0350 final SubTextInput content = readCurlyBraceContents();
0351 String ref = content.asString().trim();
0352 Trace.param(CLASS, this, method, "ref", ref);
0353 if (ref.length() == 0) {
0354 addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT,
0355 localStart1, input.getAbsolutePosition());
0356 return;
0357 }
0358 if (ref.length() > 1024) {
0359 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
0360 LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
0361 localStart1, input.getAbsolutePosition());
0362 return;
0363 }
0364 if (ref.indexOf("{") >= 0) {
0365 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
0366 LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
0367 localStart1, input.getAbsolutePosition());
0368 input.setAbsolutePosition(localStart1);
0369 return;
0370 }
0371
0372 String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1),
0373 getAbsoluteSourcePosition(input.getAbsolutePosition()));
0374 output.addToken(display);
0375 }
0376 }
0377
0378
0379 /**
0380 * Parse after \begin.
0381 */
0382 private void parseBegin() {
0383 final String kind = readCurlyBraceContents().asString(); // ignore
0384 final SubTextInput content = readSection(kind);
0385 if ("eqnarray".equals(kind)
0386 || "eqnarray*".equals(kind)
0387 || "equation*".equals(kind)) {
0388 mathMode = true;
0389 skipWhitespace = false;
0390 parseAndPrint(content);
0391 println();
0392 mathMode = false;
0393 } else if ("quote".equals(kind)) {
0394 output.pushLevel();
0395 output.pushLevel();
0396 output.pushLevel();
0397 println();
0398 parseAndPrint(content);
0399 println();
0400 output.popLevel();
0401 output.popLevel();
0402 output.popLevel();
0403 } else if ("tabularx".equals(kind)) {
0404 skipWhitespace = false;
0405 parseAndPrint(content);
0406 } else if ("enumerate".equals(kind)) {
0407 itemNumber = 0;
0408 output.pushLevel(" ");
0409 parseAndPrint(content);
0410 output.popLevel(3);
0411 } else if ("verbatim".equals(kind)) {
0412 final String level = output.getLevel();
0413 output.setLevel("");
0414 print(content.asString());
0415 output.setLevel(level);
0416 } else {
0417 parseAndPrint(content);
0418 }
0419 }
0420
0421 private void printSubscript(final String content) {
0422 output.addToken(Latex2UnicodeSpecials.transform2Subscript(content));
0423 }
0424
0425 private void printSuperscript(final String content) {
0426 output.addToken(Latex2UnicodeSpecials.transform2Superscript(content));
0427 }
0428
0429 /**
0430 * Read until section ends with \{kind}.
0431 *
0432 * @param kind Look for the end of this.
0433 * @return Read text.
0434 */
0435 private SubTextInput readSection(final String kind) {
0436 if ('{' == getChar()) { // skip content
0437 readCurlyBraceContents();
0438 }
0439 if ('{' == getChar()) { // skip content
0440 readCurlyBraceContents();
0441 }
0442 final int localStart = input.getAbsolutePosition();
0443 int current = localStart;
0444 do {
0445 current = input.getAbsolutePosition();
0446 final String item = readToken();
0447 if (item == null) {
0448 Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}",
0449 new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition()
0450 + input.getPosition()));
0451 break;
0452 }
0453 if ("\\end".equals(item)) {
0454 final String curly2 = readCurlyBraceContents().asString();
0455 if (kind.equals(curly2)) {
0456 break;
0457 }
0458 }
0459 } while (true);
0460 return input.getSubTextInput(localStart, current);
0461 }
0462
0463 /**
0464 * Get text till <code>token</code> occurs.
0465 *
0466 * @param token Terminator token.
0467 * @return Read text before token.
0468 */
0469 private SubTextInput readTilToken(final String token) {
0470 final int localStart = input.getAbsolutePosition();
0471 final StringBuffer buffer = new StringBuffer();
0472 int current = localStart;
0473 do {
0474 current = input.getAbsolutePosition();
0475 final String item = readToken();
0476 if (item == null) {
0477 Trace.fatal(CLASS, this, "readSection", "not found: " + token,
0478 new IllegalArgumentException("from " + localStart + " to " + current
0479 + input.getAbsolutePosition()));
0480 break;
0481 }
0482 if (token.equals(item)) {
0483 break;
0484 } else {
0485 buffer.append(item);
0486 }
0487 } while (true);
0488 return input.getSubTextInput(localStart, current);
0489 }
0490
0491 /**
0492 * Read next token from input stream.
0493 *
0494 * @return Read token.
0495 */
0496 protected final String readToken() {
0497 final String method = "readToken()";
0498 Trace.begin(CLASS, this, method);
0499 tokenBegin = input.getAbsolutePosition();
0500 StringBuffer token = new StringBuffer();
0501 try {
0502 do {
0503 if (eof()) {
0504 if (token.length() <= 0) {
0505 token = null;
0506 }
0507 break;
0508 }
0509 final char c = (char) getChar();
0510 if (Character.isDigit(c)) {
0511 token.append((char) read());
0512 if (Character.isDigit((char) getChar())) {
0513 continue;
0514 }
0515 break;
0516 }
0517 if (Character.isLetter((char) c)) {
0518 token.append((char) read());
0519 if (Character.isLetter((char) getChar())) {
0520 continue;
0521 }
0522 break;
0523 }
0524 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
0525 switch (c) {
0526 case '&':
0527 case '{':
0528 case '}':
0529 case '~':
0530 case '_':
0531 case '^':
0532 token.append((char) read());
0533 break;
0534 case '$':
0535 case '\'':
0536 case '`':
0537 case '-':
0538 token.append((char) read());
0539 if (c == getChar()) {
0540 continue;
0541 }
0542 break;
0543 case '%':
0544 token.append((char) read());
0545 if (c == getChar()) {
0546 // we must skip till end of line
0547 token.append(readln());
0548 // System.out.println("skipping comment:");
0549 // System.out.println(token);
0550 token.setLength(0);
0551 continue;
0552 }
0553 break;
0554 case '\\':
0555 if (' ' == getChar()) {
0556 token.append("\\");
0557 token.append((char) read());
0558 break;
0559 }
0560 final String t = readBackslashToken();
0561 token.append(t);
0562 break;
0563 default:
0564 read();
0565 token.append((char) c);
0566 }
0567 break;
0568 }
0569 token.append((char) read());
0570 if ('_' == getChar() || '^' == getChar()) {
0571 token.append((char) read());
0572 continue;
0573 }
0574 break;
0575 } while (!eof());
0576 Trace.param(CLASS, this, method, "Read token", token);
0577 // System.out.println("< " + token);
0578 tokenEnd = input.getAbsolutePosition();
0579 return (token != null ? token.toString() : null);
0580 } finally {
0581 Trace.end(CLASS, this, method);
0582 }
0583 }
0584
0585 /**
0586 * Get token that starts with a backlash.
0587 *
0588 * @return Token with backslash.
0589 */
0590 private String readBackslashToken() {
0591 final String method = "readBackslashToken()";
0592 Trace.begin(CLASS, this, method);
0593 if (getChar() != '\\') {
0594 throw new IllegalArgumentException("\\ expected");
0595 }
0596 read(); // read \
0597 if (eof()) {
0598 Trace.param(CLASS, this, method, "return", null);
0599 Trace.end(CLASS, this, method);
0600 return null;
0601 }
0602 if (!Character.isLetter((char) getChar())) {
0603 Trace.param(CLASS, this, method, "return", (char) getChar());
0604 Trace.end(CLASS, this, method);
0605 return "\\" + ((char) read());
0606 }
0607 final StringBuffer buffer = new StringBuffer("\\");
0608 do {
0609 buffer.append((char) read());
0610 } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar()));
0611 Trace.param(CLASS, this, method, "return", buffer.toString());
0612 Trace.end(CLASS, this, method);
0613 return buffer.toString();
0614 }
0615
0616 /**
0617 * Read contents that is within { .. }.
0618 *
0619 * @return Contents.
0620 */
0621 private SubTextInput readCurlyBraceContents() {
0622 final int localStart = input.getAbsolutePosition();
0623 final String first = readToken();
0624 if (!"{".equals(first)) {
0625 addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE,
0626 LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT,
0627 localStart, input.getAbsolutePosition());
0628 throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");
0629 }
0630 final int curlyStart = input.getAbsolutePosition();
0631 int curlyEnd = curlyStart;
0632 final StringBuffer buffer = new StringBuffer();
0633 String next = "";
0634 int level = 1;
0635 while (level > 0 && getChar() != TextInput.EOF) {
0636 next = readToken();
0637 if ("{".equals(next)) {
0638 level++;
0639 } else if ("}".equals(next)) {
0640 level--;
0641 }
0642 if (level <= 0) {
0643 break;
0644 }
0645 buffer.append(next);
0646 curlyEnd = input.getAbsolutePosition();
0647 }
0648 if (!"}".equals(next)) {
0649 addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE,
0650 LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT,
0651 localStart, input.getAbsolutePosition());
0652 buffer.setLength(0);
0653 input.setAbsolutePosition(curlyStart);
0654 curlyEnd = curlyStart;
0655 }
0656 return input.getSubTextInput(curlyStart, curlyEnd);
0657 }
0658
0659 /**
0660 * Print <code>token</code> to output stream.
0661 *
0662 * @param token Print this for UTF-8.
0663 */
0664 private final void print(final String token) {
0665 // System.out.println("> " + token);
0666 if (token.trim().length() == 0) {
0667 if (skipWhitespace) {
0668 return;
0669 }
0670 }
0671 skipWhitespace = false;
0672 if (token.equals("\\par")) {
0673 println();
0674 println();
0675 skipWhitespace = true;
0676 } else if (token.equals("\\\\")) {
0677 println();
0678 } else if (token.equals("&")) {
0679 output.addWs(" ");
0680 } else if (token.equals("\\-")) {
0681 // ignore
0682 } else if (token.equals("--")) {
0683 output.addToken("\u2012");
0684 } else if (token.equals("`")) {
0685 output.addWs("\u2018");
0686 } else if (token.equals("'")) {
0687 output.addToken("\u2019");
0688 } else if (token.equals("\\neq")) {
0689 output.addToken("\u2260");
0690 } else if (token.equals("\\in")) {
0691 output.addToken("\u2208");
0692 } else if (token.equals("\\forall")) {
0693 output.addToken("\u2200");
0694 } else if (token.equals("\\exists")) {
0695 output.addToken("\u2203");
0696 } else if (token.equals("\\emptyset")) {
0697 output.addToken("\u2205");
0698 } else if (token.equals("\\rightarrow")) {
0699 output.addToken("\u2192");
0700 } else if (token.equals("\\Rightarrow")) {
0701 output.addToken("\u21D2");
0702 } else if (token.equals("\\leftrightarrow")) {
0703 output.addToken("\u2194");
0704 } else if (token.equals("\\Leftarrow")) {
0705 output.addToken("\u21D0");
0706 } else if (token.equals("\\Leftrightarrow")) {
0707 output.addToken("\u21D4");
0708 } else if (token.equals("\\langle")) {
0709 output.addToken("\u2329");
0710 } else if (token.equals("\\rangle")) {
0711 output.addToken("\u232A");
0712 } else if (token.equals("\\land") || token.equals("\\vee")) {
0713 output.addToken("\u2227");
0714 } else if (token.equals("\\lor") || token.equals("\\wedge")) {
0715 output.addToken("\u2228");
0716 } else if (token.equals("\\bar")) {
0717 output.addToken("\u203E");
0718 } else if (token.equals("\\bigcap")) {
0719 output.addToken("\u22C2");
0720 } else if (token.equals("\\cap")) {
0721 output.addToken("\u2229");
0722 } else if (token.equals("\\bigcup")) {
0723 output.addToken("\u22C3");
0724 } else if (token.equals("\\cup")) {
0725 output.addToken("\u222A");
0726 } else if (token.equals("\\in")) {
0727 output.addToken("\u2208");
0728 } else if (token.equals("\\notin")) {
0729 output.addToken("\u2209");
0730 } else if (token.equals("\\Alpha")) {
0731 output.addToken("\u0391");
0732 } else if (token.equals("\\alpha")) {
0733 output.addToken("\u03B1");
0734 } else if (token.equals("\\Beta")) {
0735 output.addToken("\u0392");
0736 } else if (token.equals("\\beta")) {
0737 output.addToken("\u03B2");
0738 } else if (token.equals("\\Gamma")) {
0739 output.addToken("\u0393");
0740 } else if (token.equals("\\gamma")) {
0741 output.addToken("\u03B3");
0742 } else if (token.equals("\\Delta")) {
0743 output.addToken("\u0394");
0744 } else if (token.equals("\\delta")) {
0745 output.addToken("\u03B4");
0746 } else if (token.equals("\\Epslilon")) {
0747 output.addToken("\u0395");
0748 } else if (token.equals("\\epsilon")) {
0749 output.addToken("\u03B5");
0750 } else if (token.equals("\\Zeta")) {
0751 output.addToken("\u0396");
0752 } else if (token.equals("\\zeta")) {
0753 output.addToken("\u03B6");
0754 } else if (token.equals("\\Eta")) {
0755 output.addToken("\u0397");
0756 } else if (token.equals("\\eta")) {
0757 output.addToken("\u03B7");
0758 } else if (token.equals("\\Theta")) {
0759 output.addToken("\u0398");
0760 } else if (token.equals("\\theta")) {
0761 output.addToken("\u03B8");
0762 } else if (token.equals("\\Iota")) {
0763 output.addToken("\u0399");
0764 } else if (token.equals("\\iota")) {
0765 output.addToken("\u03B9");
0766 } else if (token.equals("\\Kappa")) {
0767 output.addToken("\u039A");
0768 } else if (token.equals("\\kappa")) {
0769 output.addToken("\u03BA");
0770 } else if (token.equals("\\Lamda")) {
0771 output.addToken("\u039B");
0772 } else if (token.equals("\\lamda")) {
0773 output.addToken("\u03BB");
0774 } else if (token.equals("\\Mu")) {
0775 output.addToken("\u039C");
0776 } else if (token.equals("\\mu")) {
0777 output.addToken("\u03BC");
0778 } else if (token.equals("\\Nu")) {
0779 output.addToken("\u039D");
0780 } else if (token.equals("\\nu")) {
0781 output.addToken("\u03BD");
0782 } else if (token.equals("\\Xi")) {
0783 output.addToken("\u039E");
0784 } else if (token.equals("\\xi")) {
0785 output.addToken("\u03BE");
0786 } else if (token.equals("\\Omikron")) {
0787 output.addToken("\u039F");
0788 } else if (token.equals("\\omikron")) {
0789 output.addToken("\u03BF");
0790 } else if (token.equals("\\Pi")) {
0791 output.addToken("\u03A0");
0792 } else if (token.equals("\\pi")) {
0793 output.addToken("\u03C0");
0794 } else if (token.equals("\\Rho")) {
0795 output.addToken("\u03A1");
0796 } else if (token.equals("\\rho")) {
0797 output.addToken("\u03C1");
0798 } else if (token.equals("\\Sigma")) {
0799 output.addToken("\u03A3");
0800 } else if (token.equals("\\sigma")) {
0801 output.addToken("\u03C3");
0802 } else if (token.equals("\\Tau")) {
0803 output.addToken("\u03A4");
0804 } else if (token.equals("\\tau")) {
0805 output.addToken("\u03C4");
0806 } else if (token.equals("\\Upsilon")) {
0807 output.addToken("\u03A5");
0808 } else if (token.equals("\\upsilon")) {
0809 output.addToken("\u03C5");
0810 } else if (token.equals("\\Phi")) {
0811 output.addToken("\u03A6");
0812 } else if (token.equals("\\phi")) {
0813 output.addToken("\u03C6");
0814 } else if (token.equals("\\Chi")) {
0815 output.addToken("\u03A6");
0816 } else if (token.equals("\\chi")) {
0817 output.addToken("\u03C7");
0818 } else if (token.equals("\\Psi")) {
0819 output.addToken("\u03A8");
0820 } else if (token.equals("\\psi")) {
0821 output.addToken("\u03C8");
0822 } else if (token.equals("\\Omega")) {
0823 output.addToken("\u03A9");
0824 } else if (token.equals("\\omega")) {
0825 output.addToken("\u03C9");
0826 } else if (token.equals("\\subset")) {
0827 output.addToken("\u2282");
0828 } else if (token.equals("\\supset")) {
0829 output.addToken("\u2283");
0830 } else if (token.equals("\\subseteq")) {
0831 output.addToken("\u2286");
0832 } else if (token.equals("\\supseteq")) {
0833 output.addToken("\u2287");
0834 } else if (token.equals("\\{")) {
0835 output.addToken("{");
0836 } else if (token.equals("\\}")) {
0837 output.addToken("}");
0838 } else if (token.equals("\\&")) {
0839 output.addToken("&");
0840 } else if (token.equals("\\ ")) {
0841 output.addWs(" ");
0842 } else if (token.equals("\\S")) {
0843 output.addToken("\u00A7");
0844 } else if (token.equals("\\tt")) {
0845 // ignore
0846 } else if (token.equals("\\tiny")) {
0847 // ignore
0848 } else if (token.equals("\\nonumber")) {
0849 // ignore
0850 } else if (token.equals("\\LaTeX")) {
0851 output.addToken("LaTeX");
0852 } else if (token.equals("\\vdash")) {
0853 output.addToken("\u22A2");
0854 } else if (token.equals("\\dashv")) {
0855 output.addToken("\u22A3");
0856 } else if (token.equals("\\times")) {
0857 output.addToken("\u00D7");
0858 } else if (token.equals("~")) {
0859 output.addToken("\u00A0");
0860 } else if (token.equals("\\quad")) {
0861 // output.addWs("\u2000");
0862 output.addWs(" ");
0863 } else if (token.equals("\\qquad")) {
0864 // output.addWs("\u2000\u2000");
0865 output.addWs(" ");
0866 } else if (token.equals("\\,")) {
0867 // output.addWs("\u2009");
0868 output.addWs(" ");
0869 } else if (token.equals("\\neg") || token.equals("\\not")) {
0870 output.addToken("\u00AC");
0871 } else if (token.equals("\\bot")) {
0872 output.addToken("\u22A5");
0873 } else if (token.equals("\\top")) {
0874 output.addToken("\u22A4");
0875 } else if (token.equals("''") || token.equals("\\grqq")) {
0876 output.addToken("\u201D");
0877 } else if (token.equals("``") || token.equals("\\glqq")) {
0878 skipWhitespace = true;
0879 output.addToken("\u201E");
0880 } else if (token.equals("\\ldots")) {
0881 output.addToken("...");
0882 } else if (token.equals("\\overline")) { // TODO 20101018 m31: we assume set complement
0883 output.addToken("\u2201");
0884 } else if (token.startsWith("\\")) {
0885 addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE,
0886 LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd);
0887 } else {
0888 if (mathfrak) {
0889 mathfrak(token);
0890 } else if (mathbb) {
0891 mathbb(token);
0892 } else if (emph) {
0893 emph(token);
0894 } else if (bold) {
0895 bold(token);
0896 } else {
0897 if (isWs(token)) {
0898 output.addWs(token);
0899 } else {
0900 output.addToken(token);
0901 }
0902 }
0903 }
0904 }
0905
0906 /**
0907 * Write token chars in mathbb mode.
0908 *
0909 * @param token Chars to write.
0910 */
0911 private void emph(final String token) {
0912 if (isWs(token)) {
0913 output.addWs(Latex2UnicodeSpecials.transform2Emph(token));
0914 } else {
0915 output.addToken(Latex2UnicodeSpecials.transform2Emph(token));
0916 }
0917 }
0918
0919 /**
0920 * Write token chars in mathbb mode.
0921 *
0922 * @param token Chars to write.
0923 */
0924 private void mathbb(final String token) {
0925 for (int i = 0; i < token.length(); i++) {
0926 final char c = token.charAt(i);
0927 switch (c) {
0928 case 'C': output.addToken("\u2102");
0929 break;
0930 case 'H': output.addToken("\u210D");
0931 break;
0932 case 'N': output.addToken("\u2115");
0933 break;
0934 case 'P': output.addToken("\u2119");
0935 break;
0936 case 'Q': output.addToken("\u211A");
0937 break;
0938 case 'R': output.addToken("\u211D");
0939 break;
0940 case 'Z': output.addToken("\u2124");
0941 break;
0942 default:
0943 if (Character.isWhitespace(c)) {
0944 output.addWs("" + c);
0945 } else {
0946 output.addToken("" + c);
0947 }
0948 }
0949 }
0950 }
0951
0952 private boolean isWs(final String token) {
0953 return token == null || token.trim().length() == 0;
0954 }
0955
0956 /**
0957 * Write token chars in mathfrak mode.
0958 *
0959 * @param token Chars to write.
0960 */
0961 private void mathfrak(final String token) {
0962 if (isWs(token)) {
0963 output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token));
0964 } else {
0965 output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token));
0966 }
0967 }
0968
0969 /**
0970 * Write token in bold mode.
0971 *
0972 * @param token Chars to write.
0973 */
0974 private void bold(final String token) {
0975 if (isWs(token)) {
0976 output.addWs(Latex2UnicodeSpecials.transform2Bold(token));
0977 } else {
0978 output.addToken(Latex2UnicodeSpecials.transform2Bold(token));
0979 }
0980 }
0981
0982 /**
0983 * Print end of line.
0984 */
0985 private final void println() {
0986 output.println();
0987 }
0988
0989 /**
0990 * Reads a single character and does not change the reading
0991 * position.
0992 *
0993 * @return character read, if there are no more chars
0994 * <code>-1</code> is returned
0995 */
0996 protected final int getChar() {
0997 return input.getChar();
0998 }
0999
1000 /**
1001 * Reads a single character and increments the reading position
1002 * by one.
1003 *
1004 * @return character read, if there are no more chars
1005 * <code>-1</code> is returned
1006 */
1007 protected final int read() {
1008 return input.read();
1009 }
1010
1011 /**
1012 * Read until end of line.
1013 *
1014 * @return Characters read.
1015 */
1016 protected final String readln() {
1017 StringBuffer result = new StringBuffer();
1018 int c;
1019 while (TextInput.EOF != (c = read())) {
1020 if (c == '\n') {
1021 break;
1022 }
1023 result.append((char) c);
1024 }
1025 return result.toString();
1026 }
1027
1028 /**
1029 * Are there still any characters to read?
1030 *
1031 * @return Anything left for reading further?
1032 */
1033 public final boolean eof() {
1034 return input.isEmpty();
1035 }
1036
1037 /**
1038 * Convert character position into row and column information.
1039 *
1040 * @param absolutePosition Find this character position.
1041 * @return Row and column information.
1042 */
1043 public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) {
1044 return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition);
1045 }
1046
1047 /**
1048 * Add warning message.
1049 *
1050 * @param code Message code.
1051 * @param message Message.
1052 * @param from Absolute character position of problem start.
1053 * @param to Absolute character position of problem end.
1054 */
1055 private void addWarning(final int code, final String message, final int from, final int to) {
1056 finder.addWarning(code, message, getAbsoluteSourcePosition(from),
1057 getAbsoluteSourcePosition(to));
1058 }
1059
1060
1061
1062 }
|