Latex2UnicodeParser.java


0001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org

0002  *

0003  * Copyright 2000-2011,  Michael Meyling <mime@qedeq.org>.

0004  *

0005  * "Hilbert II" is free software; you can redistribute

0006  * it and/or modify it under the terms of the GNU General Public

0007  * License as published by the Free Software Foundation; either

0008  * version 2 of the License, or (at your option) any later version.

0009  *

0010  * This program is distributed in the hope that it will be useful,

0011  * but WITHOUT ANY WARRANTY; without even the implied warranty of

0012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

0013  * GNU General Public License for more details.

0014  */

0015 

0016 package org.qedeq.kernel.bo.service.unicode;

0017 

0018 import java.util.Stack;

0019 

0020 import org.qedeq.base.io.AbstractOutput;

0021 import org.qedeq.base.io.SourcePosition;

0022 import org.qedeq.base.io.StringOutput;

0023 import org.qedeq.base.io.SubTextInput;

0024 import org.qedeq.base.io.TextInput;

0025 import org.qedeq.base.trace.Trace;

0026 import org.qedeq.kernel.bo.service.latex.LatexErrorCodes;

0027 

0028 /**

0029  * Transform LaTeX into Unicode format.

0030  *

0031  * @author  Michael Meyling

0032  */

0033 public final class Latex2UnicodeParser {

0034 

0035     /** This class. */

0036     private static final Class CLASS = Latex2UnicodeParser.class;

0037 

0038     /** These characters get a special treatment in LaTeX. */

0039     private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-";

0040 

0041     /** Herein goes our output. */

0042     private final AbstractOutput output;

0043 

0044     /** Resolver for references. */

0045     private final ReferenceFinder finder;

0046 

0047     /** This is our current input stream .*/

0048     private SubTextInput input;

0049 

0050     /** Math mode on? */

0051     private boolean mathMode = false;

0052 

0053     /** Mathfrak mode on? */

0054     private boolean mathfrak = false;

0055 

0056     /** Emphasize on? */

0057     private boolean emph = false;

0058 

0059     /** Bold on? */

0060     private boolean bold = false;

0061 

0062     /** Mathbb on? */

0063     private boolean mathbb = false;

0064 

0065     /** Stack for input parser. */

0066     private Stack inputStack = new Stack();

0067 

0068     /** Stack for math mode. */

0069     private Stack mathModeStack = new Stack();

0070 

0071     /** Stack for mathfrak mode. */

0072     private Stack mathfrakStack = new Stack();

0073 

0074     /** Stack for emphasize mode. */

0075     private Stack emphStack = new Stack();

0076 

0077     /** Stack for bold mode. */

0078     private Stack boldStack = new Stack();

0079 

0080     /** Stack for mathbb mode. */

0081     private Stack mathbbStack = new Stack();

0082 

0083     /** Should I skip whitespace before printing the next token. */

0084     private boolean skipWhitespace;

0085 

0086     /** Here the last read token begins. This is an absolute position. */

0087     private int tokenBegin;

0088 

0089     /** Here the last read token ends. This is an absolute position. */

0090     private int tokenEnd;

0091 

0092     /** Current item number. */

0093     private int itemNumber;

0094 

0095     /**

0096      * Parse LaTeX text into QEDEQ module string.

0097      *

0098      * @param   finder  Finder for references.

0099      * @param   input   Parse this input.

0100      * @param   columns Maximum column number. Break (if possible) before.

0101      * @return  QEDEQ module string.

0102      */

0103     public static final String transform(final ReferenceFinder finder, final String input,

0104             final int columns) {

0105         final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder);

0106         parser.output.setColumns(columns);

0107         return parser.getUtf8(input);

0108     }

0109 

0110     /**

0111      * Constructor.

0112      *

0113      * @param   finder  Finder for references.

0114      */

0115     private Latex2UnicodeParser(final ReferenceFinder finder) {

0116         // use dummy implementation if finder is null

0117         if (finder == null) {

0118             this.finder = new ReferenceFinder() {

0119                 public String getReferenceLink(final String reference,

0120                         final SourcePosition startDelta, final SourcePosition endDelta) {

0121                     return "[" + reference + "]";

0122                 }

0123 

0124                 public void addWarning(final int code, final String msg,

0125                         final SourcePosition startDelta, final SourcePosition endDelta) {

0126                 }

0127             };

0128         } else {

0129             this.finder = finder;

0130         }

0131         this.output = new StringOutput();

0132     }

0133 

0134     /**

0135      * Get UTF-8 String out of LaTeX text.

0136      *

0137      * @param   text    LaTeX.

0138      * @return  UTF-8.

0139      */

0140     private String getUtf8(final String text) {

0141         skipWhitespace = true;

0142         this.input = new SubTextInput(text);

0143         parseAndPrint(this.input);

0144         output.flush();

0145         return output.toString();

0146     }

0147 

0148     /**

0149      * Do parsing and print result.

0150      *

0151      * @param   input   Parse this LaTeX text and print UTF-8 into output.

0152      */

0153     private void parseAndPrint(final SubTextInput input) {

0154         // remember old:

0155         inputStack.push(this.input);

0156         mathModeStack.push(Boolean.valueOf(mathMode));

0157         mathfrakStack.push(Boolean.valueOf(mathfrak));

0158         emphStack.push(Boolean.valueOf(emph));

0159         boldStack.push(Boolean.valueOf(bold));

0160         mathbbStack.push(Boolean.valueOf(mathbb));

0161         try {

0162             this.input = input;

0163             boolean whitespace = false;

0164             while (!eof()) {

0165                 String token = readToken();

0166                 if (!token.startsWith("\\")) {

0167                     token = token.trim();

0168                 }

0169                 if (token.length() == 0) {

0170                     whitespace = true;

0171                     continue;

0172                 }

0173                 if (whitespace) {

0174                     print(" ");

0175                     whitespace = false;

0176                 }

0177                 if ("\\begin".equals(token)) {

0178                     parseBegin();

0179                 } else if ("\\footnote".equals(token)) {

0180                     parseFootnote();

0181                 } else if ("\\qref".equals(token)) {

0182                     parseQref();

0183                 } else if ("$$".equals(token)) {

0184                     mathMode = true;

0185                     final SubTextInput content = readTilToken(token);

0186                     println();

0187                     parseAndPrint(content);

0188                     println();

0189                     mathMode = false;

0190                 } else if ("$".equals(token)) {

0191                     mathMode = true;

0192                     final SubTextInput content = readTilToken(token);

0193                     parseAndPrint(content);

0194                     mathMode = false;

0195                 } else if ("\\mathfrak".equals(token)) {

0196                     if ('{' == getChar()) {

0197                         mathfrak = true;

0198                         final SubTextInput content = readCurlyBraceContents();

0199                         parseAndPrint(content);

0200                         mathfrak = false;

0201                     } else {

0202                         mathfrak = true;

0203                     }

0204                 } else if ("\\mathbb".equals(token)) {

0205                     if ('{' == getChar()) {

0206                         mathbb = true;

0207                         final SubTextInput content = readCurlyBraceContents();

0208                         parseAndPrint(content);

0209                         mathbb = false;

0210                     } else {

0211                         mathbb = true;

0212                     }

0213                 } else if ("\\emph".equals(token)) {

0214                     if ('{' == getChar()) {

0215                         emph = true;

0216                         final SubTextInput content = readCurlyBraceContents();

0217                         parseAndPrint(content);

0218 //                        output.addWs("\u2006");

0219                         output.addWs(" ");

0220                         emph = false;

0221                     } else {

0222                         emph = true;

0223                     }

0224                 } else if ("\\textbf".equals(token)) {

0225                     if ('{' == getChar()) {

0226                         bold = true;

0227                         final SubTextInput content = readCurlyBraceContents();

0228                         parseAndPrint(content);

0229                         bold = false;

0230                     } else {

0231                         bold = true;

0232                     }

0233                 } else if ("\\cite".equals(token)) {

0234                     if ('{' == getChar()) {

0235                         final SubTextInput content = readCurlyBraceContents();

0236                         output.addToken("[" + content.asString() + "]");

0237                     }

0238                 } else if ("\\tag".equals(token)) {

0239                     if ('{' == getChar()) {

0240                         final SubTextInput content = readCurlyBraceContents();

0241                         output.addToken("(" + content.asString() + ")");

0242                     }

0243                 } else if ("\\mbox".equals(token)) {

0244                     if ('{' == getChar()) {

0245                         final SubTextInput content = readCurlyBraceContents();

0246                         parseAndPrint(content);

0247                     }

0248                 } else if ("\\cline".equals(token)) {

0249                     if ('{' == getChar()) {

0250                         readCurlyBraceContents();

0251                         // ignore

0252                     }

0253                     output.println("_______________________________________");

0254                     println();

0255                 } else if ("\\item".equals(token)) {

0256                     output.popLevel(3);

0257                     itemNumber++;

0258                     output.println();

0259                     output.addToken(itemNumber + ".");

0260                     output.addWs("");

0261                     output.pushLevel("   ");

0262                     output.setTabLevel();

0263                 } else if ("{".equals(token)) {

0264                     input.readInverse();

0265                     final SubTextInput content = readCurlyBraceContents();

0266                     parseAndPrint(content);

0267                 } else if ("\\url".equals(token)) {

0268                     final SubTextInput content = readCurlyBraceContents();

0269                     output.addToken(" " + content.asString() + " ");

0270                 } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token)

0271                         || token.equals("\\vspace") || token.equals("\\hspace")

0272                         || token.equals("\\vspace*") || token.equals("\\hspace*"))) {

0273                     // ignore content

0274                     readCurlyBraceContents();

0275                 } else if ("_".equals(token) || "^".equals(token)) {

0276                     if (mathMode) {

0277                         String content;

0278                         if ('{' == getChar()) {

0279                             content = readCurlyBraceContents().asString();

0280                         } else {

0281                             content = readToken();

0282                         }

0283                         if ("_".equals(token)) {

0284                             printSubscript(content);

0285                         } else {

0286                             printSuperscript(content);

0287                         }

0288                     } else {

0289                         print(token);

0290                     }

0291                 } else {

0292                     print(token);

0293                 }

0294             }

0295         } finally {

0296             this.input = (SubTextInput) inputStack.pop();

0297             mathMode = ((Boolean) mathModeStack.pop()).booleanValue();

0298             mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue();

0299             emph = ((Boolean) emphStack.pop()).booleanValue();

0300             bold = ((Boolean) boldStack.pop()).booleanValue();

0301         }

0302     }

0303 

0304     /**

0305      * Parse after \footnote.

0306      */

0307     private void parseFootnote() {

0308         if ('{' == getChar()) {

0309             final SubTextInput content = readCurlyBraceContents();

0310             println();

0311             output.printWithoutSplit("          \u250C");

0312             output.pushLevel();

0313             output.pushLevel();

0314             output.pushLevel();

0315             output.pushLevel();

0316             output.pushLevel();

0317             output.pushLevel("\u2502 ");

0318             println();

0319             parseAndPrint(content);

0320             output.popLevel();

0321             output.popLevel();

0322             output.popLevel();

0323             output.popLevel();

0324             output.popLevel();

0325             output.popLevel();

0326             println();

0327             output.printWithoutSplit("          \u2514");

0328             println();

0329         }

0330     }

0331 

0332     /**

0333      * Transform <code>\qref{key}</code> entries into common LaTeX code.

0334      *

0335      * @param   text    Work on this text.

0336      * @return  Result of transforming \qref into text.

0337      */

0338     /**

0339      * Parse after \footnote.

0340      */

0341     private void parseQref() {

0342         final String method = "parseQref()";

0343         final int localStart1 = input.getAbsolutePosition();

0344         if ('{' == getChar()) {

0345             final SubTextInput content = readCurlyBraceContents();

0346             String ref = content.asString().trim();

0347             Trace.param(CLASS, this, method, "ref", ref);

0348             if (ref.length() == 0) {

0349                 addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT,

0350                     localStart1, input.getAbsolutePosition());

0351                 return;

0352             }

0353             if (ref.length() > 1024) {

0354                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,

0355                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,

0356                     localStart1, input.getAbsolutePosition());

0357                 return;

0358             }

0359             if (ref.indexOf("{") >= 0) {

0360                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,

0361                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,

0362                     localStart1, input.getAbsolutePosition());

0363                 input.setAbsolutePosition(localStart1);

0364                 return;

0365             }

0366 

0367             String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1),

0368                 getAbsoluteSourcePosition(input.getAbsolutePosition()));

0369             output.addToken(display);

0370         }

0371     }

0372 

0373 

0374     /**

0375      * Parse after \begin.

0376      */

0377     private void parseBegin() {

0378         final String kind = readCurlyBraceContents().asString();   // ignore

0379         final SubTextInput content = readSection(kind);

0380         if ("eqnarray".equals(kind)

0381             || "eqnarray*".equals(kind)

0382             || "equation*".equals(kind)) {

0383             mathMode = true;

0384             parseAndPrint(content);

0385             println();

0386             mathMode = false;

0387         } else if ("quote".equals(kind)) {

0388             output.pushLevel();

0389             output.pushLevel();

0390             output.pushLevel();

0391             println();

0392             parseAndPrint(content);

0393             println();

0394             output.popLevel();

0395             output.popLevel();

0396             output.popLevel();

0397         } else if ("tabularx".equals(kind)) {

0398             skipWhitespace = false;

0399             parseAndPrint(content);

0400         } else if ("enumerate".equals(kind)) {

0401             itemNumber = 0;

0402             output.pushLevel("   ");

0403             parseAndPrint(content);

0404             output.popLevel(3);

0405         } else if ("verbatim".equals(kind)) {

0406             final String level = output.getLevel();

0407             output.setLevel("");

0408             print(content.asString());

0409             output.setLevel(level);

0410         } else {

0411             parseAndPrint(content);

0412         }

0413     }

0414 

0415     private void printSubscript(final String content) {

0416         output.addToken(Latex2UnicodeSpecials.transform2Subscript(content));

0417     }

0418 

0419     private void printSuperscript(final String content) {

0420         output.addToken(Latex2UnicodeSpecials.transform2Superscript(content));

0421     }

0422 

0423     /**

0424      * Read until section ends with \{kind}.

0425      *

0426      * @param   kind    Look for the end of this.

0427      * @return  Read text.

0428      */

0429     private SubTextInput readSection(final String kind) {

0430         if ('{' == getChar()) { // skip content

0431             readCurlyBraceContents();

0432         }

0433         if ('{' == getChar()) { // skip content

0434             readCurlyBraceContents();

0435         }

0436         final int localStart = input.getAbsolutePosition();

0437         int current = localStart;

0438         do {

0439             current = input.getAbsolutePosition();

0440             final String item = readToken();

0441             if (item == null) {

0442                 Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}",

0443                     new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition()

0444                     + input.getPosition()));

0445                 break;

0446             }

0447             if ("\\end".equals(item)) {

0448                 final String curly2 = readCurlyBraceContents().asString();

0449                 if (kind.equals(curly2)) {

0450                     break;

0451                 }

0452             }

0453         } while (true);

0454         return input.getSubTextInput(localStart, current);

0455     }

0456 

0457     /**

0458      * Get text till <code>token</code> occurs.

0459      *

0460      * @param   token   Terminator token.

0461      * @return  Read text before token.

0462      */

0463     private SubTextInput readTilToken(final String token) {

0464         final int localStart = input.getAbsolutePosition();

0465         final StringBuffer buffer = new StringBuffer();

0466         int current = localStart;

0467         do {

0468             current = input.getAbsolutePosition();

0469             final String item = readToken();

0470             if (item == null) {

0471                 Trace.fatal(CLASS, this, "readSection", "not found: " + token,

0472                     new IllegalArgumentException("from " + localStart + " to " + current

0473                     + input.getAbsolutePosition()));

0474                 break;

0475             }

0476             if (token.equals(item)) {

0477                 break;

0478             } else {

0479                 buffer.append(item);

0480             }

0481         } while (true);

0482         return input.getSubTextInput(localStart, current);

0483     }

0484 

0485     /**

0486      * Read next token from input stream.

0487      *

0488      * @return  Read token.

0489      */

0490     protected final String readToken() {

0491         final String method = "readToken()";

0492         Trace.begin(CLASS, this, method);

0493         tokenBegin = input.getAbsolutePosition();

0494         StringBuffer token = new StringBuffer();

0495         try {

0496             do {

0497                 if (eof()) {

0498                     if (token.length() <= 0) {

0499                         token = null;

0500                     }

0501                     break;

0502                 }

0503                 final char c = (char) getChar();

0504                 if (Character.isDigit(c)) {

0505                     token.append((char) read());

0506                     if (Character.isDigit((char) getChar())) {

0507                         continue;

0508                     }

0509                     break;

0510                 }

0511                 if (Character.isLetter((char) c)) {

0512                     token.append((char) read());

0513                     if (Character.isLetter((char) getChar())) {

0514                         continue;

0515                     }

0516                     break;

0517                 }

0518                 if (SPECIALCHARACTERS.indexOf(c) >= 0) {

0519                     switch (c) {

0520                     case '&':

0521                     case '{':

0522                     case '}':

0523                     case '~':

0524                     case '_':

0525                     case '^':

0526                         token.append((char) read());

0527                         break;

0528                     case '$':

0529                     case '\'':

0530                     case '`':

0531                     case '-':

0532                         token.append((char) read());

0533                         if (c == getChar()) {

0534                             continue;

0535                         }

0536                         break;

0537                     case '%':

0538                         token.append((char) read());

0539                         if (c == getChar()) {

0540                             // we must skip till end of line

0541                             token.append(readln());

0542 //                            System.out.println("skipping comment:");

0543 //                            System.out.println(token);

0544                             token.setLength(0);

0545                             continue;

0546                         }

0547                         break;

0548                     case '\\':

0549                         if (' ' == getChar()) {

0550                             token.append("\\");

0551                             token.append((char) read());

0552                             break;

0553                         }

0554                         final String t = readBackslashToken();

0555                         token.append(t);

0556                         break;

0557                     default:

0558                         read();

0559                         token.append((char) c);

0560                     }

0561                     break;

0562                 }

0563                 token.append((char) read());

0564                 if ('_' == getChar() || '^' == getChar()) {

0565                     token.append((char) read());

0566                     continue;

0567                 }

0568                 break;

0569             } while (!eof());

0570             Trace.param(CLASS, this, method, "Read token", token);

0571 //            System.out.println("< " + token);

0572             tokenEnd = input.getAbsolutePosition();

0573             return (token != null ? token.toString() : null);

0574         } finally {

0575             Trace.end(CLASS, this, method);

0576         }

0577     }

0578 

0579     /**

0580      * Get token that starts with a backlash.

0581      *

0582      * @return  Token with backslash.

0583      */

0584     private String readBackslashToken() {

0585         final String method = "readBackslashToken()";

0586         Trace.begin(CLASS, this, method);

0587         if (getChar() != '\\') {

0588             throw new IllegalArgumentException("\\ expected");

0589         }

0590         read(); // read \

0591         if (eof()) {

0592             Trace.param(CLASS, this, method, "return", null);

0593             Trace.end(CLASS, this, method);

0594             return null;

0595         }

0596         if (!Character.isLetter((char) getChar())) {

0597             Trace.param(CLASS, this, method, "return", (char) getChar());

0598             Trace.end(CLASS, this, method);

0599             return "\\" + ((char) read());

0600         }

0601         final StringBuffer buffer = new StringBuffer("\\");

0602         do {

0603             buffer.append((char) read());

0604         } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar()));

0605         Trace.param(CLASS, this, method, "return", buffer.toString());

0606         Trace.end(CLASS, this, method);

0607         return buffer.toString();

0608     }

0609 

0610     /**

0611      * Read contents that is within { .. }.

0612      *

0613      * @return  Contents.

0614      */

0615     private SubTextInput readCurlyBraceContents() {

0616         final int localStart = input.getAbsolutePosition();

0617         final String first = readToken();

0618         if (!"{".equals(first)) {

0619             addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE,

0620                     LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT,

0621                     localStart, input.getAbsolutePosition());

0622             throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");

0623         }

0624         final int curlyStart = input.getAbsolutePosition();

0625         int curlyEnd = curlyStart;

0626         final StringBuffer buffer = new StringBuffer();

0627         String next = "";

0628         int level = 1;

0629         while (level > 0 && getChar() != TextInput.EOF) {

0630             next = readToken();

0631             if ("{".equals(next)) {

0632                 level++;

0633             } else if ("}".equals(next)) {

0634                 level--;

0635             }

0636             if (level <= 0) {

0637                 break;

0638             }

0639             buffer.append(next);

0640             curlyEnd = input.getAbsolutePosition();

0641         }

0642         if (!"}".equals(next)) {

0643             addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE,

0644                 LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT,

0645                 localStart, input.getAbsolutePosition());

0646             buffer.setLength(0);

0647             input.setAbsolutePosition(curlyStart);

0648             curlyEnd = curlyStart;

0649         }

0650         return input.getSubTextInput(curlyStart, curlyEnd);

0651     }

0652 

0653     /**

0654      * Print <code>token</code> to output stream.

0655      *

0656      * @param   token    Print this for UTF-8.

0657      */

0658     private final void print(final String token) {

0659 //        System.out.println("> " + token);

0660         if (token.trim().length() == 0) {

0661             if (skipWhitespace) {

0662                 return;

0663             }

0664         }

0665         skipWhitespace = false;

0666         if (token.equals("\\par")) {

0667             println();

0668             println();

0669             skipWhitespace = true;

0670         } else if (token.equals("\\\\")) {

0671             println();

0672         } else if (token.equals("&")) {

0673             output.addWs(" ");

0674         } else if (token.equals("\\-")) {

0675             // ignore

0676         } else if (token.equals("--")) {

0677             output.addToken("\u2012");

0678         } else if (token.equals("`")) {

0679             output.addWs("\u2018");

0680         } else if (token.equals("'")) {

0681             output.addToken("\u2019");

0682         } else if (token.equals("\\neq")) {

0683             output.addToken("\u2260");

0684         } else if (token.equals("\\in")) {

0685             output.addToken("\u2208");

0686         } else if (token.equals("\\forall")) {

0687             output.addToken("\u2200");

0688         } else if (token.equals("\\exists")) {

0689             output.addToken("\u2203");

0690         } else if (token.equals("\\emptyset")) {

0691             output.addToken("\u2205");

0692         } else if (token.equals("\\rightarrow")) {

0693             output.addToken("\u2192");

0694         } else if (token.equals("\\Rightarrow")) {

0695             output.addToken("\u21D2");

0696         } else if (token.equals("\\leftrightarrow")) {

0697             output.addToken("\u2194");

0698         } else if (token.equals("\\Leftarrow")) {

0699             output.addToken("\u21D0");

0700         } else if (token.equals("\\Leftrightarrow")) {

0701             output.addToken("\u21D4");

0702         } else if (token.equals("\\langle")) {

0703             output.addToken("\u2329");

0704         } else if (token.equals("\\rangle")) {

0705             output.addToken("\u232A");

0706         } else if (token.equals("\\land") || token.equals("\\vee")) {

0707             output.addToken("\u2227");

0708         } else if (token.equals("\\lor") || token.equals("\\wedge")) {

0709             output.addToken("\u2228");

0710         } else if (token.equals("\\bar")) {

0711             output.addToken("\u203E");

0712         } else if (token.equals("\\bigcap")) {

0713             output.addToken("\u22C2");

0714         } else if (token.equals("\\cap")) {

0715             output.addToken("\u2229");

0716         } else if (token.equals("\\bigcup")) {

0717             output.addToken("\u22C3");

0718         } else if (token.equals("\\cup")) {

0719             output.addToken("\u222A");

0720         } else if (token.equals("\\in")) {

0721             output.addToken("\u2208");

0722         } else if (token.equals("\\notin")) {

0723             output.addToken("\u2209");

0724         } else if (token.equals("\\Alpha")) {

0725             output.addToken("\u0391");

0726         } else if (token.equals("\\alpha")) {

0727             output.addToken("\u03B1");

0728         } else if (token.equals("\\Beta")) {

0729             output.addToken("\u0392");

0730         } else if (token.equals("\\beta")) {

0731             output.addToken("\u03B2");

0732         } else if (token.equals("\\Gamma")) {

0733             output.addToken("\u0393");

0734         } else if (token.equals("\\gamma")) {

0735             output.addToken("\u03B3");

0736         } else if (token.equals("\\Delta")) {

0737             output.addToken("\u0394");

0738         } else if (token.equals("\\delta")) {

0739             output.addToken("\u03B4");

0740         } else if (token.equals("\\Epslilon")) {

0741             output.addToken("\u0395");

0742         } else if (token.equals("\\epsilon")) {

0743             output.addToken("\u03B5");

0744         } else if (token.equals("\\Zeta")) {

0745             output.addToken("\u0396");

0746         } else if (token.equals("\\zeta")) {

0747             output.addToken("\u03B6");

0748         } else if (token.equals("\\Eta")) {

0749             output.addToken("\u0397");

0750         } else if (token.equals("\\eta")) {

0751             output.addToken("\u03B7");

0752         } else if (token.equals("\\Theta")) {

0753             output.addToken("\u0398");

0754         } else if (token.equals("\\theta")) {

0755             output.addToken("\u03B8");

0756         } else if (token.equals("\\Iota")) {

0757             output.addToken("\u0399");

0758         } else if (token.equals("\\iota")) {

0759             output.addToken("\u03B9");

0760         } else if (token.equals("\\Kappa")) {

0761             output.addToken("\u039A");

0762         } else if (token.equals("\\kappa")) {

0763             output.addToken("\u03BA");

0764         } else if (token.equals("\\Lamda")) {

0765             output.addToken("\u039B");

0766         } else if (token.equals("\\lamda")) {

0767             output.addToken("\u03BB");

0768         } else if (token.equals("\\Mu")) {

0769             output.addToken("\u039C");

0770         } else if (token.equals("\\mu")) {

0771             output.addToken("\u03BC");

0772         } else if (token.equals("\\Nu")) {

0773             output.addToken("\u039D");

0774         } else if (token.equals("\\nu")) {

0775             output.addToken("\u03BD");

0776         } else if (token.equals("\\Xi")) {

0777             output.addToken("\u039E");

0778         } else if (token.equals("\\xi")) {

0779             output.addToken("\u03BE");

0780         } else if (token.equals("\\Omikron")) {

0781             output.addToken("\u039F");

0782         } else if (token.equals("\\omikron")) {

0783             output.addToken("\u03BF");

0784         } else if (token.equals("\\Pi")) {

0785             output.addToken("\u03A0");

0786         } else if (token.equals("\\pi")) {

0787             output.addToken("\u03C0");

0788         } else if (token.equals("\\Rho")) {

0789             output.addToken("\u03A1");

0790         } else if (token.equals("\\rho")) {

0791             output.addToken("\u03C1");

0792         } else if (token.equals("\\Sigma")) {

0793             output.addToken("\u03A3");

0794         } else if (token.equals("\\sigma")) {

0795             output.addToken("\u03C3");

0796         } else if (token.equals("\\Tau")) {

0797             output.addToken("\u03A4");

0798         } else if (token.equals("\\tau")) {

0799             output.addToken("\u03C4");

0800         } else if (token.equals("\\Upsilon")) {

0801             output.addToken("\u03A5");

0802         } else if (token.equals("\\upsilon")) {

0803             output.addToken("\u03C5");

0804         } else if (token.equals("\\Phi")) {

0805             output.addToken("\u03A6");

0806         } else if (token.equals("\\phi")) {

0807             output.addToken("\u03C6");

0808         } else if (token.equals("\\Chi")) {

0809             output.addToken("\u03A6");

0810         } else if (token.equals("\\chi")) {

0811             output.addToken("\u03C7");

0812         } else if (token.equals("\\Psi")) {

0813             output.addToken("\u03A8");

0814         } else if (token.equals("\\psi")) {

0815             output.addToken("\u03C8");

0816         } else if (token.equals("\\Omega")) {

0817             output.addToken("\u03A9");

0818         } else if (token.equals("\\omega")) {

0819             output.addToken("\u03C9");

0820         } else if (token.equals("\\subset")) {

0821             output.addToken("\u2282");

0822         } else if (token.equals("\\supset")) {

0823             output.addToken("\u2283");

0824         } else if (token.equals("\\subseteq")) {

0825             output.addToken("\u2286");

0826         } else if (token.equals("\\supseteq")) {

0827             output.addToken("\u2287");

0828         } else if (token.equals("\\{")) {

0829             output.addToken("{");

0830         } else if (token.equals("\\}")) {

0831             output.addToken("}");

0832         } else if (token.equals("\\&")) {

0833             output.addToken("&");

0834         } else if (token.equals("\\ ")) {

0835             output.addWs(" ");

0836         } else if (token.equals("\\S")) {

0837             output.addToken("\u00A7");

0838         } else if (token.equals("\\tt")) {

0839             // ignore

0840         } else if (token.equals("\\tiny")) {

0841             // ignore

0842         } else if (token.equals("\\nonumber")) {

0843             // ignore

0844         } else if (token.equals("\\LaTeX")) {

0845             output.addToken("LaTeX");

0846         } else if (token.equals("\\vdash")) {

0847             output.addToken("\u22A2");

0848         } else if (token.equals("\\dashv")) {

0849             output.addToken("\u22A3");

0850         } else if (token.equals("\\times")) {

0851             output.addToken("\u00D7");

0852         } else if (token.equals("~")) {

0853             output.addToken("\u00A0");

0854         } else if (token.equals("\\quad")) {

0855 //            output.addWs("\u2000");

0856             output.addWs(" ");

0857         } else if (token.equals("\\qquad")) {

0858 //            output.addWs("\u2000\u2000");

0859             output.addWs("  ");

0860         } else if (token.equals("\\,")) {

0861 //            output.addWs("\u2009");

0862             output.addWs(" ");

0863         } else if (token.equals("\\neg") || token.equals("\\not")) {

0864             output.addToken("\u00AC");

0865         } else if (token.equals("\\bot")) {

0866             output.addToken("\u22A5");

0867         } else if (token.equals("\\top")) {

0868             output.addToken("\u22A4");

0869         } else if (token.equals("''") || token.equals("\\grqq")) {

0870             output.addToken("\u201D");

0871         } else if (token.equals("``") || token.equals("\\glqq")) {

0872             skipWhitespace = true;

0873             output.addToken("\u201E");

0874         } else if (token.equals("\\ldots")) {

0875             output.addToken("...");

0876         } else if (token.equals("\\overline")) {    // TODO 20101018 m31: we assume set complement

0877             output.addToken("\u2201");

0878         } else if (token.startsWith("\\")) {

0879             addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE,

0880                 LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd);

0881         } else {

0882             if (mathfrak) {

0883                 mathfrak(token);

0884             } else if (mathbb) {

0885                 mathbb(token);

0886             } else if (emph) {

0887                 emph(token);

0888             } else if (bold) {

0889                 bold(token);

0890             } else {

0891                 if (isWs(token)) {

0892                     output.addWs(token);

0893                 } else {

0894                     output.addToken(token);

0895                 }

0896             }

0897         }

0898     }

0899 

0900     /**

0901      * Write token chars in mathbb mode.

0902      *

0903      * @param   token   Chars to write.

0904      */

0905     private void emph(final String token) {

0906         if (isWs(token)) {

0907             output.addWs(Latex2UnicodeSpecials.transform2Emph(token));

0908         } else {

0909             output.addToken(Latex2UnicodeSpecials.transform2Emph(token));

0910         }

0911     }

0912 

0913     /**

0914      * Write token chars in mathbb mode.

0915      *

0916      * @param   token   Chars to write.

0917      */

0918     private void mathbb(final String token) {

0919         for (int i = 0; i < token.length(); i++) {

0920             final char c = token.charAt(i);

0921             switch (c) {

0922             case 'C': output.addToken("\u2102");

0923                 break;

0924             case 'H': output.addToken("\u210D");

0925                 break;

0926             case 'N': output.addToken("\u2115");

0927                 break;

0928             case 'P': output.addToken("\u2119");

0929                 break;

0930             case 'Q': output.addToken("\u211A");

0931                 break;

0932             case 'R': output.addToken("\u211D");

0933                 break;

0934             case 'Z': output.addToken("\u2124");

0935                 break;

0936             default:

0937                 if (Character.isWhitespace(c)) {

0938                     output.addWs("" + c);

0939                 } else {

0940                     output.addToken("" + c);

0941                 }

0942             }

0943         }

0944     }

0945 

0946     private boolean isWs(final String token) {

0947         return token == null || token.trim().length() == 0;

0948     }

0949 

0950     /**

0951      * Write token chars in mathfrak mode.

0952      *

0953      * @param   token   Chars to write.

0954      */

0955     private void mathfrak(final String token) {

0956         if (isWs(token)) {

0957             output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token));

0958         } else {

0959             output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token));

0960         }

0961     }

0962 

0963     /**

0964      * Write token in bold mode.

0965      *

0966      * @param   token   Chars to write.

0967      */

0968     private void bold(final String token) {

0969         if (isWs(token)) {

0970             output.addWs(Latex2UnicodeSpecials.transform2Bold(token));

0971         } else {

0972             output.addToken(Latex2UnicodeSpecials.transform2Bold(token));

0973         }

0974     }

0975 

0976     /**

0977      * Print end of line.

0978      */

0979     private final void println() {

0980         output.println();

0981     }

0982 

0983     /**

0984      * Reads a single character and does not change the reading

0985      * position.

0986      *

0987      * @return  character read, if there are no more chars

0988      *          <code>-1</code> is returned

0989      */

0990     protected final int getChar() {

0991         return input.getChar();

0992     }

0993 

0994     /**

0995      * Reads a single character and increments the reading position

0996      * by one.

0997      *

0998      * @return  character read, if there are no more chars

0999      *          <code>-1</code> is returned

1000      */

1001     protected final int read() {

1002         return input.read();

1003     }

1004 

1005     /**

1006      * Read until end of line.

1007      *

1008      * @return  Characters read.

1009      */

1010     protected final String readln() {

1011         StringBuffer result = new StringBuffer();

1012         int c;

1013         while (TextInput.EOF != (c = read())) {

1014             if (c == '\n') {

1015                 break;

1016             }

1017             result.append((char) c);

1018         }

1019         return result.toString();

1020     }

1021 

1022     /**

1023      * Are there still any characters to read?

1024      *

1025      * @return  Anything left for reading further?

1026      */

1027     public final boolean eof() {

1028         return input.isEmpty();

1029     }

1030 

1031     /**

1032      * Convert character position into row and column information.

1033      *

1034      * @param   absolutePosition    Find this character position.

1035      * @return  Row and column information.

1036      */

1037     public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) {

1038         return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition);

1039     }

1040 

1041     /**

1042      * Add warning message.

1043      *

1044      * @param   code    Message code.

1045      * @param   message Message.

1046      * @param   from    Absolute character position of problem start.

1047      * @param   to      Absolute character position of problem end.

1048      */

1049     private void addWarning(final int code, final String message, final int from, final int to) {

1050         finder.addWarning(code, message, getAbsoluteSourcePosition(from),

1051             getAbsoluteSourcePosition(to));

1052     }

1053 

1054 

1055 

1056 }