Latex2UnicodeParser.java


0001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org

0002  *

0003  * Copyright 2000-2013,  Michael Meyling <mime@qedeq.org>.

0004  *

0005  * "Hilbert II" is free software; you can redistribute

0006  * it and/or modify it under the terms of the GNU General Public

0007  * License as published by the Free Software Foundation; either

0008  * version 2 of the License, or (at your option) any later version.

0009  *

0010  * This program is distributed in the hope that it will be useful,

0011  * but WITHOUT ANY WARRANTY; without even the implied warranty of

0012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

0013  * GNU General Public License for more details.

0014  */

0015 

0016 package org.qedeq.kernel.bo.service.unicode;

0017 

0018 import java.util.Stack;

0019 

0020 import org.qedeq.base.io.AbstractOutput;

0021 import org.qedeq.base.io.SourcePosition;

0022 import org.qedeq.base.io.StringOutput;

0023 import org.qedeq.base.io.SubTextInput;

0024 import org.qedeq.base.io.TextInput;

0025 import org.qedeq.base.trace.Trace;

0026 import org.qedeq.kernel.bo.service.latex.LatexErrorCodes;

0027 

0028 /**

0029  * Transform LaTeX into Unicode format.

0030  *

0031  * @author  Michael Meyling

0032  */

0033 public final class Latex2UnicodeParser {

0034 

0035     /** This class. */

0036     private static final Class CLASS = Latex2UnicodeParser.class;

0037 

0038     /** These characters get a special treatment in LaTeX. */

0039     private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-";

0040 

0041     /** Herein goes our output. */

0042     private final AbstractOutput output;

0043 

0044     /** Resolver for references. */

0045     private final ReferenceFinder finder;

0046 

0047     /** This is our current input stream .*/

0048     private SubTextInput input;

0049 

0050     /** Math mode on? */

0051     private boolean mathMode = false;

0052 

0053     /** Mathfrak mode on? */

0054     private boolean mathfrak = false;

0055 

0056     /** Emphasize on? */

0057     private boolean emph = false;

0058 

0059     /** Bold on? */

0060     private boolean bold = false;

0061 

0062     /** Mathbb on? */

0063     private boolean mathbb = false;

0064 

0065     /** Stack for input parser. */

0066     private Stack inputStack = new Stack();

0067 

0068     /** Stack for math mode. */

0069     private Stack mathModeStack = new Stack();

0070 

0071     /** Stack for mathfrak mode. */

0072     private Stack mathfrakStack = new Stack();

0073 

0074     /** Stack for emphasize mode. */

0075     private Stack emphStack = new Stack();

0076 

0077     /** Stack for bold mode. */

0078     private Stack boldStack = new Stack();

0079 

0080     /** Stack for mathbb mode. */

0081     private Stack mathbbStack = new Stack();

0082 

0083     /** Stack for skipWhitspace mode. */

0084     private Stack skipWhitespaceStack = new Stack();

0085 

0086     /** Should I skip whitespace before printing the next token. */

0087     private boolean skipWhitespace;

0088 

0089     /** Here the last read token begins. This is an absolute position. */

0090     private int tokenBegin;

0091 

0092     /** Here the last read token ends. This is an absolute position. */

0093     private int tokenEnd;

0094 

0095     /** Current item number. */

0096     private int itemNumber;

0097 

0098     /**

0099      * Parse LaTeX text into QEDEQ module string.

0100      *

0101      * @param   finder  Finder for references.

0102      * @param   input   Parse this input.

0103      * @param   columns Maximum column number. Break (if possible) before.

0104      * @return  QEDEQ module string.

0105      */

0106     public static final String transform(final ReferenceFinder finder, final String input,

0107             final int columns) {

0108         final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder);

0109         parser.output.setColumns(columns);

0110         return parser.getUtf8(input);

0111     }

0112 

0113     /**

0114      * Constructor.

0115      *

0116      * @param   finder  Finder for references.

0117      */

0118     private Latex2UnicodeParser(final ReferenceFinder finder) {

0119         // use dummy implementation if finder is null

0120         if (finder == null) {

0121             this.finder = new ReferenceFinder() {

0122                 public String getReferenceLink(final String reference,

0123                         final SourcePosition startDelta, final SourcePosition endDelta) {

0124                     return "[" + reference + "]";

0125                 }

0126 

0127                 public void addWarning(final int code, final String msg,

0128                         final SourcePosition startDelta, final SourcePosition endDelta) {

0129                 }

0130             };

0131         } else {

0132             this.finder = finder;

0133         }

0134         this.output = new StringOutput();

0135     }

0136 

0137     /**

0138      * Get UTF-8 String out of LaTeX text.

0139      *

0140      * @param   text    LaTeX.

0141      * @return  UTF-8.

0142      */

0143     private String getUtf8(final String text) {

0144         skipWhitespace = true;

0145         this.input = new SubTextInput(text);

0146         parseAndPrint(this.input);

0147         return output.toString();

0148     }

0149 

0150     /**

0151      * Do parsing and print result.

0152      *

0153      * @param   input   Parse this LaTeX text and print UTF-8 into output.

0154      */

0155     private void parseAndPrint(final SubTextInput input) {

0156         // remember old:

0157         inputStack.push(this.input);

0158         mathModeStack.push(Boolean.valueOf(mathMode));

0159         mathfrakStack.push(Boolean.valueOf(mathfrak));

0160         emphStack.push(Boolean.valueOf(emph));

0161         boldStack.push(Boolean.valueOf(bold));

0162         mathbbStack.push(Boolean.valueOf(mathbb));

0163         skipWhitespaceStack.push(Boolean.valueOf(skipWhitespace));

0164         try {

0165             this.input = input;

0166             boolean whitespace = false;

0167             while (!eof()) {

0168                 String token = readToken();

0169                 if (!token.startsWith("\\")) {

0170                     token = token.trim();

0171                 }

0172                 if (token.length() == 0) {

0173                     whitespace = true;

0174                     continue;

0175                 }

0176                 if (whitespace && !"\\par".equals(token)) {

0177                     print(" ");

0178                     whitespace = false;

0179                 }

0180                 if ("\\begin".equals(token)) {

0181                     parseBegin();

0182                 } else if ("\\footnote".equals(token)) {

0183                     parseFootnote();

0184                 } else if ("\\qref".equals(token)) {

0185                     parseQref();

0186                 } else if ("$$".equals(token)) {

0187                     mathMode = true;

0188                     final SubTextInput content = readTilToken(token);

0189                     println();

0190                     parseAndPrint(content);

0191                     println();

0192                     mathMode = false;

0193                 } else if ("$".equals(token)) {

0194                     mathMode = true;

0195                     final SubTextInput content = readTilToken(token);

0196                     parseAndPrint(content);

0197                     mathMode = false;

0198                 } else if ("\\mathfrak".equals(token)) {

0199                     if ('{' == getChar()) {

0200                         mathfrak = true;

0201                         final SubTextInput content = readCurlyBraceContents();

0202                         parseAndPrint(content);

0203                         mathfrak = false;

0204                     } else {

0205                         mathfrak = true;

0206                     }

0207                 } else if ("\\mathbb".equals(token)) {

0208                     if ('{' == getChar()) {

0209                         mathbb = true;

0210                         final SubTextInput content = readCurlyBraceContents();

0211                         parseAndPrint(content);

0212                         mathbb = false;

0213                     } else {

0214                         mathbb = true;

0215                     }

0216                 } else if ("\\emph".equals(token)) {

0217                     if ('{' == getChar()) {

0218                         emph = true;

0219                         final SubTextInput content = readCurlyBraceContents();

0220                         parseAndPrint(content);

0221 //                        output.addWs("\u2006");

0222                         output.addWs(" ");

0223                         emph = false;

0224                     } else {

0225                         emph = true;

0226                     }

0227                 } else if ("\\textbf".equals(token)) {

0228                     if ('{' == getChar()) {

0229                         bold = true;

0230                         final SubTextInput content = readCurlyBraceContents();

0231                         parseAndPrint(content);

0232                         bold = false;

0233                     } else {

0234                         bold = true;

0235                     }

0236                 } else if ("\\cite".equals(token)) {

0237                     if ('{' == getChar()) {

0238                         final SubTextInput content = readCurlyBraceContents();

0239                         output.addToken("[" + content.asString() + "]");

0240                     }

0241                 } else if ("\\tag".equals(token)) {

0242                     if ('{' == getChar()) {

0243                         final SubTextInput content = readCurlyBraceContents();

0244                         output.addToken("(" + content.asString() + ")");

0245                     }

0246                 } else if ("\\mbox".equals(token)) {

0247                     if ('{' == getChar()) {

0248                         final SubTextInput content = readCurlyBraceContents();

0249                         parseAndPrint(content);

0250                     }

0251                 } else if ("\\cline".equals(token)) {

0252                     if ('{' == getChar()) {

0253                         readCurlyBraceContents();

0254                         // ignore

0255                     }

0256                     output.addToken("_______________________________________");

0257                     println();

0258                 } else if ("\\item".equals(token)) {

0259                     output.popLevel(3);

0260                     itemNumber++;

0261                     output.println();

0262                     output.addToken(itemNumber + ".");

0263                     output.addWs("");

0264                     output.pushLevel("   ");

0265                     output.setTabLevel();

0266                 } else if ("{".equals(token)) {

0267                     input.readInverse();

0268                     final SubTextInput content = readCurlyBraceContents();

0269                     parseAndPrint(content);

0270                 } else if ("\\url".equals(token)) {

0271                     final SubTextInput content = readCurlyBraceContents();

0272                     output.addToken(" " + content.asString() + " ");

0273                 } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token)

0274                         || token.equals("\\vspace") || token.equals("\\hspace")

0275                         || token.equals("\\vspace*") || token.equals("\\hspace*"))) {

0276                     // ignore content

0277                     readCurlyBraceContents();

0278                 } else if ("_".equals(token) || "^".equals(token)) {

0279                     if (mathMode) {

0280                         String content;

0281                         if ('{' == getChar()) {

0282                             content = readCurlyBraceContents().asString();

0283                         } else {

0284                             content = readToken();

0285                         }

0286                         if ("_".equals(token)) {

0287                             printSubscript(content);

0288                         } else {

0289                             printSuperscript(content);

0290                         }

0291                     } else {

0292                         print(token);

0293                     }

0294                 } else {

0295                     print(token);

0296                 }

0297             }

0298         } finally {

0299             this.input = (SubTextInput) inputStack.pop();

0300             mathMode = ((Boolean) mathModeStack.pop()).booleanValue();

0301             mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue();

0302             emph = ((Boolean) emphStack.pop()).booleanValue();

0303             bold = ((Boolean) boldStack.pop()).booleanValue();

0304             skipWhitespace = ((Boolean) skipWhitespaceStack.pop()).booleanValue();

0305             output.flush();

0306         }

0307     }

0308 

0309     /**

0310      * Parse after \footnote.

0311      */

0312     private void parseFootnote() {

0313         if ('{' == getChar()) {

0314             final SubTextInput content = readCurlyBraceContents();

0315             println();

0316             output.printWithoutSplit("          \u250C");

0317             output.pushLevel();

0318             output.pushLevel();

0319             output.pushLevel();

0320             output.pushLevel();

0321             output.pushLevel();

0322             output.pushLevel("\u2502 ");

0323             println();

0324             parseAndPrint(content);

0325             output.popLevel();

0326             output.popLevel();

0327             output.popLevel();

0328             output.popLevel();

0329             output.popLevel();

0330             output.popLevel();

0331             println();

0332             output.printWithoutSplit("          \u2514");

0333             println();

0334         }

0335     }

0336 

0337     /**

0338      * Transform <code>\qref{key}</code> entries into common LaTeX code.

0339      *

0340      * @param   text    Work on this text.

0341      * @return  Result of transforming \qref into text.

0342      */

0343     /**

0344      * Parse after \footnote.

0345      */

0346     private void parseQref() {

0347         final String method = "parseQref()";

0348         final int localStart1 = input.getAbsolutePosition();

0349         if ('{' == getChar()) {

0350             final SubTextInput content = readCurlyBraceContents();

0351             String ref = content.asString().trim();

0352             Trace.param(CLASS, this, method, "ref", ref);

0353             if (ref.length() == 0) {

0354                 addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT,

0355                     localStart1, input.getAbsolutePosition());

0356                 return;

0357             }

0358             if (ref.length() > 1024) {

0359                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,

0360                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,

0361                     localStart1, input.getAbsolutePosition());

0362                 return;

0363             }

0364             if (ref.indexOf("{") >= 0) {

0365                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,

0366                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,

0367                     localStart1, input.getAbsolutePosition());

0368                 input.setAbsolutePosition(localStart1);

0369                 return;

0370             }

0371 

0372             String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1),

0373                 getAbsoluteSourcePosition(input.getAbsolutePosition()));

0374             output.addToken(display);

0375         }

0376     }

0377 

0378 

0379     /**

0380      * Parse after \begin.

0381      */

0382     private void parseBegin() {

0383         final String kind = readCurlyBraceContents().asString();   // ignore

0384         final SubTextInput content = readSection(kind);

0385         if ("eqnarray".equals(kind)

0386             || "eqnarray*".equals(kind)

0387             || "equation*".equals(kind)) {

0388             mathMode = true;

0389             skipWhitespace = false;

0390             parseAndPrint(content);

0391             println();

0392             mathMode = false;

0393         } else if ("quote".equals(kind)) {

0394             output.pushLevel();

0395             output.pushLevel();

0396             output.pushLevel();

0397             println();

0398             parseAndPrint(content);

0399             println();

0400             output.popLevel();

0401             output.popLevel();

0402             output.popLevel();

0403         } else if ("tabularx".equals(kind)) {

0404             skipWhitespace = false;

0405             parseAndPrint(content);

0406         } else if ("enumerate".equals(kind)) {

0407             itemNumber = 0;

0408             output.pushLevel("   ");

0409             parseAndPrint(content);

0410             output.popLevel(3);

0411         } else if ("verbatim".equals(kind)) {

0412             final String level = output.getLevel();

0413             output.setLevel("");

0414             print(content.asString());

0415             output.setLevel(level);

0416         } else {

0417             parseAndPrint(content);

0418         }

0419     }

0420 

0421     private void printSubscript(final String content) {

0422         output.addToken(Latex2UnicodeSpecials.transform2Subscript(content));

0423     }

0424 

0425     private void printSuperscript(final String content) {

0426         output.addToken(Latex2UnicodeSpecials.transform2Superscript(content));

0427     }

0428 

0429     /**

0430      * Read until section ends with \{kind}.

0431      *

0432      * @param   kind    Look for the end of this.

0433      * @return  Read text.

0434      */

0435     private SubTextInput readSection(final String kind) {

0436         if ('{' == getChar()) { // skip content

0437             readCurlyBraceContents();

0438         }

0439         if ('{' == getChar()) { // skip content

0440             readCurlyBraceContents();

0441         }

0442         final int localStart = input.getAbsolutePosition();

0443         int current = localStart;

0444         do {

0445             current = input.getAbsolutePosition();

0446             final String item = readToken();

0447             if (item == null) {

0448                 Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}",

0449                     new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition()

0450                     + input.getPosition()));

0451                 break;

0452             }

0453             if ("\\end".equals(item)) {

0454                 final String curly2 = readCurlyBraceContents().asString();

0455                 if (kind.equals(curly2)) {

0456                     break;

0457                 }

0458             }

0459         } while (true);

0460         return input.getSubTextInput(localStart, current);

0461     }

0462 

0463     /**

0464      * Get text till <code>token</code> occurs.

0465      *

0466      * @param   token   Terminator token.

0467      * @return  Read text before token.

0468      */

0469     private SubTextInput readTilToken(final String token) {

0470         final int localStart = input.getAbsolutePosition();

0471         final StringBuffer buffer = new StringBuffer();

0472         int current = localStart;

0473         do {

0474             current = input.getAbsolutePosition();

0475             final String item = readToken();

0476             if (item == null) {

0477                 Trace.fatal(CLASS, this, "readSection", "not found: " + token,

0478                     new IllegalArgumentException("from " + localStart + " to " + current

0479                     + input.getAbsolutePosition()));

0480                 break;

0481             }

0482             if (token.equals(item)) {

0483                 break;

0484             } else {

0485                 buffer.append(item);

0486             }

0487         } while (true);

0488         return input.getSubTextInput(localStart, current);

0489     }

0490 

0491     /**

0492      * Read next token from input stream.

0493      *

0494      * @return  Read token.

0495      */

0496     protected final String readToken() {

0497         final String method = "readToken()";

0498         Trace.begin(CLASS, this, method);

0499         tokenBegin = input.getAbsolutePosition();

0500         StringBuffer token = new StringBuffer();

0501         try {

0502             do {

0503                 if (eof()) {

0504                     if (token.length() <= 0) {

0505                         token = null;

0506                     }

0507                     break;

0508                 }

0509                 final char c = (char) getChar();

0510                 if (Character.isDigit(c)) {

0511                     token.append((char) read());

0512                     if (Character.isDigit((char) getChar())) {

0513                         continue;

0514                     }

0515                     break;

0516                 }

0517                 if (Character.isLetter((char) c)) {

0518                     token.append((char) read());

0519                     if (Character.isLetter((char) getChar())) {

0520                         continue;

0521                     }

0522                     break;

0523                 }

0524                 if (SPECIALCHARACTERS.indexOf(c) >= 0) {

0525                     switch (c) {

0526                     case '&':

0527                     case '{':

0528                     case '}':

0529                     case '~':

0530                     case '_':

0531                     case '^':

0532                         token.append((char) read());

0533                         break;

0534                     case '$':

0535                     case '\'':

0536                     case '`':

0537                     case '-':

0538                         token.append((char) read());

0539                         if (c == getChar()) {

0540                             continue;

0541                         }

0542                         break;

0543                     case '%':

0544                         token.append((char) read());

0545                         if (c == getChar()) {

0546                             // we must skip till end of line

0547                             token.append(readln());

0548 //                            System.out.println("skipping comment:");

0549 //                            System.out.println(token);

0550                             token.setLength(0);

0551                             continue;

0552                         }

0553                         break;

0554                     case '\\':

0555                         if (' ' == getChar()) {

0556                             token.append("\\");

0557                             token.append((char) read());

0558                             break;

0559                         }

0560                         final String t = readBackslashToken();

0561                         token.append(t);

0562                         break;

0563                     default:

0564                         read();

0565                         token.append((char) c);

0566                     }

0567                     break;

0568                 }

0569                 token.append((char) read());

0570                 if ('_' == getChar() || '^' == getChar()) {

0571                     token.append((char) read());

0572                     continue;

0573                 }

0574                 break;

0575             } while (!eof());

0576             Trace.param(CLASS, this, method, "Read token", token);

0577 //            System.out.println("< " + token);

0578             tokenEnd = input.getAbsolutePosition();

0579             return (token != null ? token.toString() : null);

0580         } finally {

0581             Trace.end(CLASS, this, method);

0582         }

0583     }

0584 

0585     /**

0586      * Get token that starts with a backlash.

0587      *

0588      * @return  Token with backslash.

0589      */

0590     private String readBackslashToken() {

0591         final String method = "readBackslashToken()";

0592         Trace.begin(CLASS, this, method);

0593         if (getChar() != '\\') {

0594             throw new IllegalArgumentException("\\ expected");

0595         }

0596         read(); // read \

0597         if (eof()) {

0598             Trace.param(CLASS, this, method, "return", null);

0599             Trace.end(CLASS, this, method);

0600             return null;

0601         }

0602         if (!Character.isLetter((char) getChar())) {

0603             Trace.param(CLASS, this, method, "return", (char) getChar());

0604             Trace.end(CLASS, this, method);

0605             return "\\" + ((char) read());

0606         }

0607         final StringBuffer buffer = new StringBuffer("\\");

0608         do {

0609             buffer.append((char) read());

0610         } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar()));

0611         Trace.param(CLASS, this, method, "return", buffer.toString());

0612         Trace.end(CLASS, this, method);

0613         return buffer.toString();

0614     }

0615 

0616     /**

0617      * Read contents that is within { .. }.

0618      *

0619      * @return  Contents.

0620      */

0621     private SubTextInput readCurlyBraceContents() {

0622         final int localStart = input.getAbsolutePosition();

0623         final String first = readToken();

0624         if (!"{".equals(first)) {

0625             addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE,

0626                     LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT,

0627                     localStart, input.getAbsolutePosition());

0628             throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");

0629         }

0630         final int curlyStart = input.getAbsolutePosition();

0631         int curlyEnd = curlyStart;

0632         final StringBuffer buffer = new StringBuffer();

0633         String next = "";

0634         int level = 1;

0635         while (level > 0 && getChar() != TextInput.EOF) {

0636             next = readToken();

0637             if ("{".equals(next)) {

0638                 level++;

0639             } else if ("}".equals(next)) {

0640                 level--;

0641             }

0642             if (level <= 0) {

0643                 break;

0644             }

0645             buffer.append(next);

0646             curlyEnd = input.getAbsolutePosition();

0647         }

0648         if (!"}".equals(next)) {

0649             addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE,

0650                 LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT,

0651                 localStart, input.getAbsolutePosition());

0652             buffer.setLength(0);

0653             input.setAbsolutePosition(curlyStart);

0654             curlyEnd = curlyStart;

0655         }

0656         return input.getSubTextInput(curlyStart, curlyEnd);

0657     }

0658 

0659     /**

0660      * Print <code>token</code> to output stream.

0661      *

0662      * @param   token    Print this for UTF-8.

0663      */

0664     private final void print(final String token) {

0665 //        System.out.println("> " + token);

0666         if (token.trim().length() == 0) {

0667             if (skipWhitespace) {

0668                 return;

0669             }

0670         }

0671         skipWhitespace = false;

0672         if (token.equals("\\par")) {

0673             println();

0674             println();

0675             skipWhitespace = true;

0676         } else if (token.equals("\\\\")) {

0677             println();

0678         } else if (token.equals("&")) {

0679             output.addWs(" ");

0680         } else if (token.equals("\\-")) {

0681             // ignore

0682         } else if (token.equals("--")) {

0683             output.addToken("\u2012");

0684         } else if (token.equals("`")) {

0685             output.addWs("\u2018");

0686         } else if (token.equals("'")) {

0687             output.addToken("\u2019");

0688         } else if (token.equals("\\neq")) {

0689             output.addToken("\u2260");

0690         } else if (token.equals("\\in")) {

0691             output.addToken("\u2208");

0692         } else if (token.equals("\\forall")) {

0693             output.addToken("\u2200");

0694         } else if (token.equals("\\exists")) {

0695             output.addToken("\u2203");

0696         } else if (token.equals("\\emptyset")) {

0697             output.addToken("\u2205");

0698         } else if (token.equals("\\rightarrow")) {

0699             output.addToken("\u2192");

0700         } else if (token.equals("\\Rightarrow")) {

0701             output.addToken("\u21D2");

0702         } else if (token.equals("\\leftrightarrow")) {

0703             output.addToken("\u2194");

0704         } else if (token.equals("\\Leftarrow")) {

0705             output.addToken("\u21D0");

0706         } else if (token.equals("\\Leftrightarrow")) {

0707             output.addToken("\u21D4");

0708         } else if (token.equals("\\langle")) {

0709             output.addToken("\u2329");

0710         } else if (token.equals("\\rangle")) {

0711             output.addToken("\u232A");

0712         } else if (token.equals("\\land") || token.equals("\\vee")) {

0713             output.addToken("\u2227");

0714         } else if (token.equals("\\lor") || token.equals("\\wedge")) {

0715             output.addToken("\u2228");

0716         } else if (token.equals("\\bar")) {

0717             output.addToken("\u203E");

0718         } else if (token.equals("\\bigcap")) {

0719             output.addToken("\u22C2");

0720         } else if (token.equals("\\cap")) {

0721             output.addToken("\u2229");

0722         } else if (token.equals("\\bigcup")) {

0723             output.addToken("\u22C3");

0724         } else if (token.equals("\\cup")) {

0725             output.addToken("\u222A");

0726         } else if (token.equals("\\in")) {

0727             output.addToken("\u2208");

0728         } else if (token.equals("\\notin")) {

0729             output.addToken("\u2209");

0730         } else if (token.equals("\\Alpha")) {

0731             output.addToken("\u0391");

0732         } else if (token.equals("\\alpha")) {

0733             output.addToken("\u03B1");

0734         } else if (token.equals("\\Beta")) {

0735             output.addToken("\u0392");

0736         } else if (token.equals("\\beta")) {

0737             output.addToken("\u03B2");

0738         } else if (token.equals("\\Gamma")) {

0739             output.addToken("\u0393");

0740         } else if (token.equals("\\gamma")) {

0741             output.addToken("\u03B3");

0742         } else if (token.equals("\\Delta")) {

0743             output.addToken("\u0394");

0744         } else if (token.equals("\\delta")) {

0745             output.addToken("\u03B4");

0746         } else if (token.equals("\\Epslilon")) {

0747             output.addToken("\u0395");

0748         } else if (token.equals("\\epsilon")) {

0749             output.addToken("\u03B5");

0750         } else if (token.equals("\\Zeta")) {

0751             output.addToken("\u0396");

0752         } else if (token.equals("\\zeta")) {

0753             output.addToken("\u03B6");

0754         } else if (token.equals("\\Eta")) {

0755             output.addToken("\u0397");

0756         } else if (token.equals("\\eta")) {

0757             output.addToken("\u03B7");

0758         } else if (token.equals("\\Theta")) {

0759             output.addToken("\u0398");

0760         } else if (token.equals("\\theta")) {

0761             output.addToken("\u03B8");

0762         } else if (token.equals("\\Iota")) {

0763             output.addToken("\u0399");

0764         } else if (token.equals("\\iota")) {

0765             output.addToken("\u03B9");

0766         } else if (token.equals("\\Kappa")) {

0767             output.addToken("\u039A");

0768         } else if (token.equals("\\kappa")) {

0769             output.addToken("\u03BA");

0770         } else if (token.equals("\\Lamda")) {

0771             output.addToken("\u039B");

0772         } else if (token.equals("\\lamda")) {

0773             output.addToken("\u03BB");

0774         } else if (token.equals("\\Mu")) {

0775             output.addToken("\u039C");

0776         } else if (token.equals("\\mu")) {

0777             output.addToken("\u03BC");

0778         } else if (token.equals("\\Nu")) {

0779             output.addToken("\u039D");

0780         } else if (token.equals("\\nu")) {

0781             output.addToken("\u03BD");

0782         } else if (token.equals("\\Xi")) {

0783             output.addToken("\u039E");

0784         } else if (token.equals("\\xi")) {

0785             output.addToken("\u03BE");

0786         } else if (token.equals("\\Omikron")) {

0787             output.addToken("\u039F");

0788         } else if (token.equals("\\omikron")) {

0789             output.addToken("\u03BF");

0790         } else if (token.equals("\\Pi")) {

0791             output.addToken("\u03A0");

0792         } else if (token.equals("\\pi")) {

0793             output.addToken("\u03C0");

0794         } else if (token.equals("\\Rho")) {

0795             output.addToken("\u03A1");

0796         } else if (token.equals("\\rho")) {

0797             output.addToken("\u03C1");

0798         } else if (token.equals("\\Sigma")) {

0799             output.addToken("\u03A3");

0800         } else if (token.equals("\\sigma")) {

0801             output.addToken("\u03C3");

0802         } else if (token.equals("\\Tau")) {

0803             output.addToken("\u03A4");

0804         } else if (token.equals("\\tau")) {

0805             output.addToken("\u03C4");

0806         } else if (token.equals("\\Upsilon")) {

0807             output.addToken("\u03A5");

0808         } else if (token.equals("\\upsilon")) {

0809             output.addToken("\u03C5");

0810         } else if (token.equals("\\Phi")) {

0811             output.addToken("\u03A6");

0812         } else if (token.equals("\\phi")) {

0813             output.addToken("\u03C6");

0814         } else if (token.equals("\\Chi")) {

0815             output.addToken("\u03A6");

0816         } else if (token.equals("\\chi")) {

0817             output.addToken("\u03C7");

0818         } else if (token.equals("\\Psi")) {

0819             output.addToken("\u03A8");

0820         } else if (token.equals("\\psi")) {

0821             output.addToken("\u03C8");

0822         } else if (token.equals("\\Omega")) {

0823             output.addToken("\u03A9");

0824         } else if (token.equals("\\omega")) {

0825             output.addToken("\u03C9");

0826         } else if (token.equals("\\subset")) {

0827             output.addToken("\u2282");

0828         } else if (token.equals("\\supset")) {

0829             output.addToken("\u2283");

0830         } else if (token.equals("\\subseteq")) {

0831             output.addToken("\u2286");

0832         } else if (token.equals("\\supseteq")) {

0833             output.addToken("\u2287");

0834         } else if (token.equals("\\{")) {

0835             output.addToken("{");

0836         } else if (token.equals("\\}")) {

0837             output.addToken("}");

0838         } else if (token.equals("\\&")) {

0839             output.addToken("&");

0840         } else if (token.equals("\\ ")) {

0841             output.addWs(" ");

0842         } else if (token.equals("\\S")) {

0843             output.addToken("\u00A7");

0844         } else if (token.equals("\\tt")) {

0845             // ignore

0846         } else if (token.equals("\\tiny")) {

0847             // ignore

0848         } else if (token.equals("\\nonumber")) {

0849             // ignore

0850         } else if (token.equals("\\LaTeX")) {

0851             output.addToken("LaTeX");

0852         } else if (token.equals("\\vdash")) {

0853             output.addToken("\u22A2");

0854         } else if (token.equals("\\dashv")) {

0855             output.addToken("\u22A3");

0856         } else if (token.equals("\\times")) {

0857             output.addToken("\u00D7");

0858         } else if (token.equals("~")) {

0859             output.addToken("\u00A0");

0860         } else if (token.equals("\\quad")) {

0861 //            output.addWs("\u2000");

0862             output.addWs(" ");

0863         } else if (token.equals("\\qquad")) {

0864 //            output.addWs("\u2000\u2000");

0865             output.addWs("  ");

0866         } else if (token.equals("\\,")) {

0867 //            output.addWs("\u2009");

0868             output.addWs(" ");

0869         } else if (token.equals("\\neg") || token.equals("\\not")) {

0870             output.addToken("\u00AC");

0871         } else if (token.equals("\\bot")) {

0872             output.addToken("\u22A5");

0873         } else if (token.equals("\\top")) {

0874             output.addToken("\u22A4");

0875         } else if (token.equals("''") || token.equals("\\grqq")) {

0876             output.addToken("\u201D");

0877         } else if (token.equals("``") || token.equals("\\glqq")) {

0878             skipWhitespace = true;

0879             output.addToken("\u201E");

0880         } else if (token.equals("\\ldots")) {

0881             output.addToken("...");

0882         } else if (token.equals("\\overline")) {    // TODO 20101018 m31: we assume set complement

0883             output.addToken("\u2201");

0884         } else if (token.startsWith("\\")) {

0885             addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE,

0886                 LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd);

0887         } else {

0888             if (mathfrak) {

0889                 mathfrak(token);

0890             } else if (mathbb) {

0891                 mathbb(token);

0892             } else if (emph) {

0893                 emph(token);

0894             } else if (bold) {

0895                 bold(token);

0896             } else {

0897                 if (isWs(token)) {

0898                     output.addWs(token);

0899                 } else {

0900                     output.addToken(token);

0901                 }

0902             }

0903         }

0904     }

0905 

0906     /**

0907      * Write token chars in mathbb mode.

0908      *

0909      * @param   token   Chars to write.

0910      */

0911     private void emph(final String token) {

0912         if (isWs(token)) {

0913             output.addWs(Latex2UnicodeSpecials.transform2Emph(token));

0914         } else {

0915             output.addToken(Latex2UnicodeSpecials.transform2Emph(token));

0916         }

0917     }

0918 

0919     /**

0920      * Write token chars in mathbb mode.

0921      *

0922      * @param   token   Chars to write.

0923      */

0924     private void mathbb(final String token) {

0925         for (int i = 0; i < token.length(); i++) {

0926             final char c = token.charAt(i);

0927             switch (c) {

0928             case 'C': output.addToken("\u2102");

0929                 break;

0930             case 'H': output.addToken("\u210D");

0931                 break;

0932             case 'N': output.addToken("\u2115");

0933                 break;

0934             case 'P': output.addToken("\u2119");

0935                 break;

0936             case 'Q': output.addToken("\u211A");

0937                 break;

0938             case 'R': output.addToken("\u211D");

0939                 break;

0940             case 'Z': output.addToken("\u2124");

0941                 break;

0942             default:

0943                 if (Character.isWhitespace(c)) {

0944                     output.addWs("" + c);

0945                 } else {

0946                     output.addToken("" + c);

0947                 }

0948             }

0949         }

0950     }

0951 

0952     private boolean isWs(final String token) {

0953         return token == null || token.trim().length() == 0;

0954     }

0955 

0956     /**

0957      * Write token chars in mathfrak mode.

0958      *

0959      * @param   token   Chars to write.

0960      */

0961     private void mathfrak(final String token) {

0962         if (isWs(token)) {

0963             output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token));

0964         } else {

0965             output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token));

0966         }

0967     }

0968 

0969     /**

0970      * Write token in bold mode.

0971      *

0972      * @param   token   Chars to write.

0973      */

0974     private void bold(final String token) {

0975         if (isWs(token)) {

0976             output.addWs(Latex2UnicodeSpecials.transform2Bold(token));

0977         } else {

0978             output.addToken(Latex2UnicodeSpecials.transform2Bold(token));

0979         }

0980     }

0981 

0982     /**

0983      * Print end of line.

0984      */

0985     private final void println() {

0986         output.println();

0987     }

0988 

0989     /**

0990      * Reads a single character and does not change the reading

0991      * position.

0992      *

0993      * @return  character read, if there are no more chars

0994      *          <code>-1</code> is returned

0995      */

0996     protected final int getChar() {

0997         return input.getChar();

0998     }

0999 

1000     /**

1001      * Reads a single character and increments the reading position

1002      * by one.

1003      *

1004      * @return  character read, if there are no more chars

1005      *          <code>-1</code> is returned

1006      */

1007     protected final int read() {

1008         return input.read();

1009     }

1010 

1011     /**

1012      * Read until end of line.

1013      *

1014      * @return  Characters read.

1015      */

1016     protected final String readln() {

1017         StringBuffer result = new StringBuffer();

1018         int c;

1019         while (TextInput.EOF != (c = read())) {

1020             if (c == '\n') {

1021                 break;

1022             }

1023             result.append((char) c);

1024         }

1025         return result.toString();

1026     }

1027 

1028     /**

1029      * Are there still any characters to read?

1030      *

1031      * @return  Anything left for reading further?

1032      */

1033     public final boolean eof() {

1034         return input.isEmpty();

1035     }

1036 

1037     /**

1038      * Convert character position into row and column information.

1039      *

1040      * @param   absolutePosition    Find this character position.

1041      * @return  Row and column information.

1042      */

1043     public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) {

1044         return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition);

1045     }

1046 

1047     /**

1048      * Add warning message.

1049      *

1050      * @param   code    Message code.

1051      * @param   message Message.

1052      * @param   from    Absolute character position of problem start.

1053      * @param   to      Absolute character position of problem end.

1054      */

1055     private void addWarning(final int code, final String message, final int from, final int to) {

1056         finder.addWarning(code, message, getAbsoluteSourcePosition(from),

1057             getAbsoluteSourcePosition(to));

1058     }

1059 

1060 

1061 

1062 }