Latex2UnicodeParser xref

View Javadoc

1   /* This file is part of the project "Hilbert II" - http://www.qedeq.org" target="alexandria_uri">http://www.qedeq.org
2    *
3    * Copyright 2000-2014,  Michael Meyling <mime@qedeq.org>.
4    *
5    * "Hilbert II" is free software; you can redistribute
6    * it and/or modify it under the terms of the GNU General Public
7    * License as published by the Free Software Foundation; either
8    * version 2 of the License, or (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13   * GNU General Public License for more details.
14   */
15  
16  package org.qedeq.kernel.bo.service.unicode;
17  
18  import java.util.Stack;
19  
20  import org.qedeq.base.io.AbstractOutput;
21  import org.qedeq.base.io.SourcePosition;
22  import org.qedeq.base.io.StringOutput;
23  import org.qedeq.base.io.SubTextInput;
24  import org.qedeq.base.io.TextInput;
25  import org.qedeq.base.trace.Trace;
26  import org.qedeq.kernel.bo.service.latex.LatexErrorCodes;
27  
28  /**
29   * Transform LaTeX into Unicode format.
30   *
31   * @author  Michael Meyling
32   */
33  public final class Latex2UnicodeParser {
34  
35      /** This class. */
36      private static final Class CLASS = Latex2UnicodeParser.class;
37  
38      /** These characters get a special treatment in LaTeX. */
39      private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-";
40  
41      /** Herein goes our output. */
42      private final AbstractOutput output;
43  
44      /** Resolver for references. */
45      private final ReferenceFinder finder;
46  
47      /** This is our current input stream .*/
48      private SubTextInput input;
49  
50      /** Math mode on? */
51      private boolean mathMode = false;
52  
53      /** Mathfrak mode on? */
54      private boolean mathfrak = false;
55  
56      /** Emphasize on? */
57      private boolean emph = false;
58  
59      /** Bold on? */
60      private boolean bold = false;
61  
62      /** Mathbb on? */
63      private boolean mathbb = false;
64  
65      /** Stack for input parser. */
66      private Stack inputStack = new Stack();
67  
68      /** Stack for math mode. */
69      private Stack mathModeStack = new Stack();
70  
71      /** Stack for mathfrak mode. */
72      private Stack mathfrakStack = new Stack();
73  
74      /** Stack for emphasize mode. */
75      private Stack emphStack = new Stack();
76  
77      /** Stack for bold mode. */
78      private Stack boldStack = new Stack();
79  
80      /** Stack for mathbb mode. */
81      private Stack mathbbStack = new Stack();
82  
83      /** Stack for skipWhitspace mode. */
84      private Stack skipWhitespaceStack = new Stack();
85  
86      /** Should I skip whitespace before printing the next token. */
87      private boolean skipWhitespace;
88  
89      /** Here the last read token begins. This is an absolute position. */
90      private int tokenBegin;
91  
92      /** Here the last read token ends. This is an absolute position. */
93      private int tokenEnd;
94  
95      /** Current item number. */
96      private int itemNumber;
97  
98      /**
99       * Parse LaTeX text into QEDEQ module string.
100      *
101      * @param   finder  Finder for references.
102      * @param   input   Parse this input.
103      * @param   columns Maximum column number. Break (if possible) before.
104      * @return  QEDEQ module string.
105      */
106     public static final String transform(final ReferenceFinder finder, final String input,
107             final int columns) {
108         final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder);
109         parser.output.setColumns(columns);
110         return parser.getUtf8(input);
111     }
112 
113     /**
114      * Constructor.
115      *
116      * @param   finder  Finder for references.
117      */
118     private Latex2UnicodeParser(final ReferenceFinder finder) {
119         // use dummy implementation if finder is null
120         if (finder == null) {
121             this.finder = new ReferenceFinder() {
122                 public String getReferenceLink(final String reference,
123                         final SourcePosition startDelta, final SourcePosition endDelta) {
124                     return "[" + reference + "]";
125                 }
126 
127                 public void addWarning(final int code, final String msg,
128                         final SourcePosition startDelta, final SourcePosition endDelta) {
129                     // nothing to do
130                 }
131             };
132         } else {
133             this.finder = finder;
134         }
135         this.output = new StringOutput();
136     }
137 
138     /**
139      * Get UTF-8 String out of LaTeX text.
140      *
141      * @param   text    LaTeX.
142      * @return  UTF-8.
143      */
144     private String getUtf8(final String text) {
145         skipWhitespace = true;
146         this.input = new SubTextInput(text);
147         parseAndPrint(this.input);
148         return output.toString();
149     }
150 
151     /**
152      * Do parsing and print result.
153      *
154      * @param   input   Parse this LaTeX text and print UTF-8 into output.
155      */
156     private void parseAndPrint(final SubTextInput input) {
157         // remember old:
158         inputStack.push(this.input);
159         mathModeStack.push(Boolean.valueOf(mathMode));
160         mathfrakStack.push(Boolean.valueOf(mathfrak));
161         emphStack.push(Boolean.valueOf(emph));
162         boldStack.push(Boolean.valueOf(bold));
163         mathbbStack.push(Boolean.valueOf(mathbb));
164         skipWhitespaceStack.push(Boolean.valueOf(skipWhitespace));
165         try {
166             this.input = input;
167             boolean whitespace = false;
168             while (!eof()) {
169                 String token = readToken();
170                 if (!token.startsWith("\\")) {
171                     token = token.trim();
172                 }
173                 if (token.length() == 0) {
174                     whitespace = true;
175                     continue;
176                 }
177                 if (whitespace && !"\\par".equals(token)) {
178                     print(" ");
179                     whitespace = false;
180                 }
181                 if ("\\begin".equals(token)) {
182                     parseBegin();
183                 } else if ("\\footnote".equals(token)) {
184                     parseFootnote();
185                 } else if ("\\qref".equals(token)) {
186                     parseQref();
187                 } else if ("$$".equals(token)) {
188                     mathMode = true;
189                     final SubTextInput content = readTilToken(token);
190                     println();
191                     parseAndPrint(content);
192                     println();
193                     mathMode = false;
194                 } else if ("$".equals(token)) {
195                     mathMode = true;
196                     final SubTextInput content = readTilToken(token);
197                     parseAndPrint(content);
198                     mathMode = false;
199                 } else if ("\\mathfrak".equals(token)) {
200                     if ('{' == getChar()) {
201                         mathfrak = true;
202                         final SubTextInput content = readCurlyBraceContents();
203                         parseAndPrint(content);
204                         mathfrak = false;
205                     } else {
206                         mathfrak = true;
207                     }
208                 } else if ("\\mathbb".equals(token)) {
209                     if ('{' == getChar()) {
210                         mathbb = true;
211                         final SubTextInput content = readCurlyBraceContents();
212                         parseAndPrint(content);
213                         mathbb = false;
214                     } else {
215                         mathbb = true;
216                     }
217                 } else if ("\\emph".equals(token)) {
218                     if ('{' == getChar()) {
219                         emph = true;
220                         final SubTextInput content = readCurlyBraceContents();
221                         parseAndPrint(content);
222 //                        output.addWs("\u2006");
223                         output.addWs(" ");
224                         emph = false;
225                     } else {
226                         emph = true;
227                     }
228                 } else if ("\\textbf".equals(token)) {
229                     if ('{' == getChar()) {
230                         bold = true;
231                         final SubTextInput content = readCurlyBraceContents();
232                         parseAndPrint(content);
233                         bold = false;
234                     } else {
235                         bold = true;
236                     }
237                 } else if ("\\cite".equals(token)) {
238                     if ('{' == getChar()) {
239                         final SubTextInput content = readCurlyBraceContents();
240                         output.addToken("[" + content.asString() + "]");
241                     }
242                 } else if ("\\tag".equals(token)) {
243                     if ('{' == getChar()) {
244                         final SubTextInput content = readCurlyBraceContents();
245                         output.addToken("(" + content.asString() + ")");
246                     }
247                 } else if ("\\mbox".equals(token)) {
248                     if ('{' == getChar()) {
249                         final SubTextInput content = readCurlyBraceContents();
250                         parseAndPrint(content);
251                     }
252                 } else if ("\\cline".equals(token)) {
253                     if ('{' == getChar()) {
254                         readCurlyBraceContents();
255                         // ignore
256                     }
257                     output.addToken("_______________________________________");
258                     println();
259                 } else if ("\\item".equals(token)) {
260                     output.popLevel(3);
261                     itemNumber++;
262                     output.println();
263                     output.addToken(itemNumber + ".");
264                     output.addWs("");
265                     output.pushLevel("   ");
266                     output.setTabLevel();
267                 } else if ("{".equals(token)) {
268                     input.readInverse();
269                     final SubTextInput content = readCurlyBraceContents();
270                     parseAndPrint(content);
271                 } else if ("\\url".equals(token)) {
272                     final SubTextInput content = readCurlyBraceContents();
273                     output.addToken(" " + content.asString() + " ");
274                 } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token)
275                         || token.equals("\\vspace") || token.equals("\\hspace")
276                         || token.equals("\\vspace*") || token.equals("\\hspace*"))) {
277                     // ignore content
278                     readCurlyBraceContents();
279                 } else if ("_".equals(token) || "^".equals(token)) {
280                     if (mathMode) {
281                         String content;
282                         if ('{' == getChar()) {
283                             content = readCurlyBraceContents().asString();
284                         } else {
285                             content = readToken();
286                         }
287                         if ("_".equals(token)) {
288                             printSubscript(content);
289                         } else {
290                             printSuperscript(content);
291                         }
292                     } else {
293                         print(token);
294                     }
295                 } else {
296                     print(token);
297                 }
298             }
299         } finally {
300             this.input = (SubTextInput) inputStack.pop();
301             mathMode = ((Boolean) mathModeStack.pop()).booleanValue();
302             mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue();
303             emph = ((Boolean) emphStack.pop()).booleanValue();
304             bold = ((Boolean) boldStack.pop()).booleanValue();
305             skipWhitespace = ((Boolean) skipWhitespaceStack.pop()).booleanValue();
306             output.flush();
307         }
308     }
309 
310     /**
311      * Parse after \footnote.
312      */
313     private void parseFootnote() {
314         if ('{' == getChar()) {
315             final SubTextInput content = readCurlyBraceContents();
316             println();
317             output.printWithoutSplit("          \u250C");
318             output.pushLevel();
319             output.pushLevel();
320             output.pushLevel();
321             output.pushLevel();
322             output.pushLevel();
323             output.pushLevel("\u2502 ");
324             println();
325             parseAndPrint(content);
326             output.popLevel();
327             output.popLevel();
328             output.popLevel();
329             output.popLevel();
330             output.popLevel();
331             output.popLevel();
332             println();
333             output.printWithoutSplit("          \u2514");
334             println();
335         }
336     }
337 
338     /**
339      * Transform <code>\qref{key}</code> entries into common LaTeX code.
340      *
341      * @param   text    Work on this text.
342      * @return  Result of transforming \qref into text.
343      */
344     /**
345      * Parse after \footnote.
346      */
347     private void parseQref() {
348         final String method = "parseQref()";
349         final int localStart1 = input.getAbsolutePosition();
350         if ('{' == getChar()) {
351             final SubTextInput content = readCurlyBraceContents();
352             String ref = content.asString().trim();
353             Trace.param(CLASS, this, method, "ref", ref);
354             if (ref.length() == 0) {
355                 addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT,
356                     localStart1, input.getAbsolutePosition());
357                 return;
358             }
359             if (ref.length() > 1024) {
360                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
361                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
362                     localStart1, input.getAbsolutePosition());
363                 return;
364             }
365             if (ref.indexOf("{") >= 0) {
366                 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
367                     LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
368                     localStart1, input.getAbsolutePosition());
369                 input.setAbsolutePosition(localStart1);
370                 return;
371             }
372 
373             String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1),
374                 getAbsoluteSourcePosition(input.getAbsolutePosition()));
375             output.addToken(display);
376         }
377     }
378 
379 
380     /**
381      * Parse after \begin.
382      */
383     private void parseBegin() {
384         final String kind = readCurlyBraceContents().asString();   // ignore
385         final SubTextInput content = readSection(kind);
386         if ("eqnarray".equals(kind)
387             || "eqnarray*".equals(kind)
388             || "equation*".equals(kind)) {
389             mathMode = true;
390             skipWhitespace = false;
391             parseAndPrint(content);
392             println();
393             mathMode = false;
394         } else if ("quote".equals(kind)) {
395             output.pushLevel();
396             output.pushLevel();
397             output.pushLevel();
398             println();
399             parseAndPrint(content);
400             println();
401             output.popLevel();
402             output.popLevel();
403             output.popLevel();
404         } else if ("tabularx".equals(kind)) {
405             skipWhitespace = false;
406             parseAndPrint(content);
407         } else if ("enumerate".equals(kind)) {
408             itemNumber = 0;
409             output.pushLevel("   ");
410             parseAndPrint(content);
411             output.popLevel(3);
412         } else if ("verbatim".equals(kind)) {
413             final String level = output.getLevel();
414             output.setLevel("");
415             print(content.asString());
416             output.setLevel(level);
417         } else {
418             parseAndPrint(content);
419         }
420     }
421 
422     private void printSubscript(final String content) {
423         output.addToken(Latex2UnicodeSpecials.transform2Subscript(content));
424     }
425 
426     private void printSuperscript(final String content) {
427         output.addToken(Latex2UnicodeSpecials.transform2Superscript(content));
428     }
429 
430     /**
431      * Read until section ends with \{kind}.
432      *
433      * @param   kind    Look for the end of this.
434      * @return  Read text.
435      */
436     private SubTextInput readSection(final String kind) {
437         if ('{' == getChar()) { // skip content
438             readCurlyBraceContents();
439         }
440         if ('{' == getChar()) { // skip content
441             readCurlyBraceContents();
442         }
443         final int localStart = input.getAbsolutePosition();
444         int current = localStart;
445         do {
446             current = input.getAbsolutePosition();
447             final String item = readToken();
448             if (item == null) {
449                 Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}",
450                     new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition()
451                     + input.getPosition()));
452                 break;
453             }
454             if ("\\end".equals(item)) {
455                 final String curly2 = readCurlyBraceContents().asString();
456                 if (kind.equals(curly2)) {
457                     break;
458                 }
459             }
460         } while (true);
461         return input.getSubTextInput(localStart, current);
462     }
463 
464     /**
465      * Get text till <code>token</code> occurs.
466      *
467      * @param   token   Terminator token.
468      * @return  Read text before token.
469      */
470     private SubTextInput readTilToken(final String token) {
471         final int localStart = input.getAbsolutePosition();
472         final StringBuffer buffer = new StringBuffer();
473         int current = localStart;
474         do {
475             current = input.getAbsolutePosition();
476             final String item = readToken();
477             if (item == null) {
478                 Trace.fatal(CLASS, this, "readSection", "not found: " + token,
479                     new IllegalArgumentException("from " + localStart + " to " + current
480                     + input.getAbsolutePosition()));
481                 break;
482             }
483             if (token.equals(item)) {
484                 break;
485             }
486             buffer.append(item);
487         } while (true);
488         return input.getSubTextInput(localStart, current);
489     }
490 
491     /**
492      * Read next token from input stream.
493      *
494      * @return  Read token.
495      */
496     protected final String readToken() {
497         final String method = "readToken()";
498         Trace.begin(CLASS, this, method);
499         tokenBegin = input.getAbsolutePosition();
500         StringBuffer token = new StringBuffer();
501         try {
502             do {
503                 if (eof()) {
504                     if (token.length() <= 0) {
505                         token = null;
506                     }
507                     break;
508                 }
509                 final char c = (char) getChar();
510                 if (Character.isDigit(c)) {
511                     token.append((char) read());
512                     if (Character.isDigit((char) getChar())) {
513                         continue;
514                     }
515                     break;
516                 }
517                 if (Character.isLetter(c)) {
518                     token.append((char) read());
519                     if (Character.isLetter((char) getChar())) {
520                         continue;
521                     }
522                     break;
523                 }
524                 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
525                     switch (c) {
526                     case '&':
527                     case '{':
528                     case '}':
529                     case '~':
530                     case '_':
531                     case '^':
532                         token.append((char) read());
533                         break;
534                     case '$':
535                     case '\'':
536                     case '`':
537                     case '-':
538                         token.append((char) read());
539                         if (c == getChar()) {
540                             continue;
541                         }
542                         break;
543                     case '%':
544                         token.append((char) read());
545                         if (c == getChar()) {
546                             // we must skip till end of line
547                             token.append(readln());
548 //                            System.out.println("skipping comment:");
549 //                            System.out.println(token);
550                             token.setLength(0);
551                             continue;
552                         }
553                         break;
554                     case '\\':
555                         if (' ' == getChar()) {
556                             token.append("\\");
557                             token.append((char) read());
558                             break;
559                         }
560                         final String t = readBackslashToken();
561                         token.append(t);
562                         break;
563                     default:
564                         read();
565                         token.append(c);
566                     }
567                     break;
568                 }
569                 token.append((char) read());
570                 if ('_' == getChar() || '^' == getChar()) {
571                     token.append((char) read());
572                     continue;
573                 }
574                 break;
575             } while (!eof());
576             Trace.param(CLASS, this, method, "Read token", token);
577 //            System.out.println("< " + token);
578             tokenEnd = input.getAbsolutePosition();
579             return (token != null ? token.toString() : null);
580         } finally {
581             Trace.end(CLASS, this, method);
582         }
583     }
584 
585     /**
586      * Get token that starts with a backlash.
587      *
588      * @return  Token with backslash.
589      */
590     private String readBackslashToken() {
591         final String method = "readBackslashToken()";
592         Trace.begin(CLASS, this, method);
593         if (getChar() != '\\') {
594             throw new IllegalArgumentException("\\ expected");
595         }
596         read(); // read \
597         if (eof()) {
598             Trace.param(CLASS, this, method, "return", null);
599             Trace.end(CLASS, this, method);
600             return null;
601         }
602         if (!Character.isLetter((char) getChar())) {
603             Trace.param(CLASS, this, method, "return", (char) getChar());
604             Trace.end(CLASS, this, method);
605             return "\\" + ((char) read());
606         }
607         final StringBuffer buffer = new StringBuffer("\\");
608         do {
609             buffer.append((char) read());
610         } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar()));
611         Trace.param(CLASS, this, method, "return", buffer.toString());
612         Trace.end(CLASS, this, method);
613         return buffer.toString();
614     }
615 
616     /**
617      * Read contents that is within { .. }.
618      *
619      * @return  Contents.
620      */
621     private SubTextInput readCurlyBraceContents() {
622         final int localStart = input.getAbsolutePosition();
623         final String first = readToken();
624         if (!"{".equals(first)) {
625             addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE,
626                     LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT,
627                     localStart, input.getAbsolutePosition());
628             throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");
629         }
630         final int curlyStart = input.getAbsolutePosition();
631         int curlyEnd = curlyStart;
632         final StringBuffer buffer = new StringBuffer();
633         String next = "";
634         int level = 1;
635         while (level > 0 && getChar() != TextInput.EOF) {
636             next = readToken();
637             if ("{".equals(next)) {
638                 level++;
639             } else if ("}".equals(next)) {
640                 level--;
641             }
642             if (level <= 0) {
643                 break;
644             }
645             buffer.append(next);
646             curlyEnd = input.getAbsolutePosition();
647         }
648         if (!"}".equals(next)) {
649             addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE,
650                 LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT,
651                 localStart, input.getAbsolutePosition());
652             buffer.setLength(0);
653             input.setAbsolutePosition(curlyStart);
654             curlyEnd = curlyStart;
655         }
656         return input.getSubTextInput(curlyStart, curlyEnd);
657     }
658 
659     /**
660      * Print <code>token</code> to output stream.
661      *
662      * @param   token    Print this for UTF-8.
663      */
664     private final void print(final String token) {
665 //        System.out.println("> " + token);
666         if (token.trim().length() == 0) {
667             if (skipWhitespace) {
668                 return;
669             }
670         }
671         skipWhitespace = false;
672         if (token.equals("\\par")) {
673             println();
674             println();
675             skipWhitespace = true;
676         } else if (token.equals("\\\\")) {
677             println();
678         } else if (token.equals("&")) {
679             output.addWs(" ");
680         } else if (token.equals("\\-")) {
681             // ignore
682         } else if (token.equals("--")) {
683             output.addToken("\u2012");
684         } else if (token.equals("`")) {
685             output.addWs("\u2018");
686         } else if (token.equals("'")) {
687             output.addToken("\u2019");
688         } else if (token.equals("\\neq")) {
689             output.addToken("\u2260");
690         } else if (token.equals("\\in")) {
691             output.addToken("\u2208");
692         } else if (token.equals("\\forall")) {
693             output.addToken("\u2200");
694         } else if (token.equals("\\exists")) {
695             output.addToken("\u2203");
696         } else if (token.equals("\\emptyset")) {
697             output.addToken("\u2205");
698         } else if (token.equals("\\rightarrow")) {
699             output.addToken("\u2192");
700         } else if (token.equals("\\Rightarrow")) {
701             output.addToken("\u21D2");
702         } else if (token.equals("\\leftrightarrow")) {
703             output.addToken("\u2194");
704         } else if (token.equals("\\Leftarrow")) {
705             output.addToken("\u21D0");
706         } else if (token.equals("\\Leftrightarrow")) {
707             output.addToken("\u21D4");
708         } else if (token.equals("\\langle")) {
709             output.addToken("\u2329");
710         } else if (token.equals("\\rangle")) {
711             output.addToken("\u232A");
712         } else if (token.equals("\\land") || token.equals("\\vee")) {
713             output.addToken("\u2227");
714         } else if (token.equals("\\lor") || token.equals("\\wedge")) {
715             output.addToken("\u2228");
716         } else if (token.equals("\\bar")) {
717             output.addToken("\u203E");
718         } else if (token.equals("\\bigcap")) {
719             output.addToken("\u22C2");
720         } else if (token.equals("\\cap")) {
721             output.addToken("\u2229");
722         } else if (token.equals("\\bigcup")) {
723             output.addToken("\u22C3");
724         } else if (token.equals("\\cup")) {
725             output.addToken("\u222A");
726         } else if (token.equals("\\in")) {
727             output.addToken("\u2208");
728         } else if (token.equals("\\notin")) {
729             output.addToken("\u2209");
730         } else if (token.equals("\\Alpha")) {
731             output.addToken("\u0391");
732         } else if (token.equals("\\alpha")) {
733             output.addToken("\u03B1");
734         } else if (token.equals("\\Beta")) {
735             output.addToken("\u0392");
736         } else if (token.equals("\\beta")) {
737             output.addToken("\u03B2");
738         } else if (token.equals("\\Gamma")) {
739             output.addToken("\u0393");
740         } else if (token.equals("\\gamma")) {
741             output.addToken("\u03B3");
742         } else if (token.equals("\\Delta")) {
743             output.addToken("\u0394");
744         } else if (token.equals("\\delta")) {
745             output.addToken("\u03B4");
746         } else if (token.equals("\\Epslilon")) {
747             output.addToken("\u0395");
748         } else if (token.equals("\\epsilon")) {
749             output.addToken("\u03B5");
750         } else if (token.equals("\\Zeta")) {
751             output.addToken("\u0396");
752         } else if (token.equals("\\zeta")) {
753             output.addToken("\u03B6");
754         } else if (token.equals("\\Eta")) {
755             output.addToken("\u0397");
756         } else if (token.equals("\\eta")) {
757             output.addToken("\u03B7");
758         } else if (token.equals("\\Theta")) {
759             output.addToken("\u0398");
760         } else if (token.equals("\\theta")) {
761             output.addToken("\u03B8");
762         } else if (token.equals("\\Iota")) {
763             output.addToken("\u0399");
764         } else if (token.equals("\\iota")) {
765             output.addToken("\u03B9");
766         } else if (token.equals("\\Kappa")) {
767             output.addToken("\u039A");
768         } else if (token.equals("\\kappa")) {
769             output.addToken("\u03BA");
770         } else if (token.equals("\\Lamda")) {
771             output.addToken("\u039B");
772         } else if (token.equals("\\lamda")) {
773             output.addToken("\u03BB");
774         } else if (token.equals("\\Mu")) {
775             output.addToken("\u039C");
776         } else if (token.equals("\\mu")) {
777             output.addToken("\u03BC");
778         } else if (token.equals("\\Nu")) {
779             output.addToken("\u039D");
780         } else if (token.equals("\\nu")) {
781             output.addToken("\u03BD");
782         } else if (token.equals("\\Xi")) {
783             output.addToken("\u039E");
784         } else if (token.equals("\\xi")) {
785             output.addToken("\u03BE");
786         } else if (token.equals("\\Omikron")) {
787             output.addToken("\u039F");
788         } else if (token.equals("\\omikron")) {
789             output.addToken("\u03BF");
790         } else if (token.equals("\\Pi")) {
791             output.addToken("\u03A0");
792         } else if (token.equals("\\pi")) {
793             output.addToken("\u03C0");
794         } else if (token.equals("\\Rho")) {
795             output.addToken("\u03A1");
796         } else if (token.equals("\\rho")) {
797             output.addToken("\u03C1");
798         } else if (token.equals("\\Sigma")) {
799             output.addToken("\u03A3");
800         } else if (token.equals("\\sigma")) {
801             output.addToken("\u03C3");
802         } else if (token.equals("\\Tau")) {
803             output.addToken("\u03A4");
804         } else if (token.equals("\\tau")) {
805             output.addToken("\u03C4");
806         } else if (token.equals("\\Upsilon")) {
807             output.addToken("\u03A5");
808         } else if (token.equals("\\upsilon")) {
809             output.addToken("\u03C5");
810         } else if (token.equals("\\Phi")) {
811             output.addToken("\u03A6");
812         } else if (token.equals("\\phi")) {
813             output.addToken("\u03C6");
814         } else if (token.equals("\\Chi")) {
815             output.addToken("\u03A6");
816         } else if (token.equals("\\chi")) {
817             output.addToken("\u03C7");
818         } else if (token.equals("\\Psi")) {
819             output.addToken("\u03A8");
820         } else if (token.equals("\\psi")) {
821             output.addToken("\u03C8");
822         } else if (token.equals("\\Omega")) {
823             output.addToken("\u03A9");
824         } else if (token.equals("\\omega")) {
825             output.addToken("\u03C9");
826         } else if (token.equals("\\subset")) {
827             output.addToken("\u2282");
828         } else if (token.equals("\\supset")) {
829             output.addToken("\u2283");
830         } else if (token.equals("\\subseteq")) {
831             output.addToken("\u2286");
832         } else if (token.equals("\\supseteq")) {
833             output.addToken("\u2287");
834         } else if (token.equals("\\{")) {
835             output.addToken("{");
836         } else if (token.equals("\\}")) {
837             output.addToken("}");
838         } else if (token.equals("\\&")) {
839             output.addToken("&");
840         } else if (token.equals("\\ ")) {
841             output.addWs(" ");
842         } else if (token.equals("\\S")) {
843             output.addToken("\u00A7");
844         } else if (token.equals("\\tt")) {
845             // ignore
846         } else if (token.equals("\\tiny")) {
847             // ignore
848         } else if (token.equals("\\nonumber")) {
849             // ignore
850         } else if (token.equals("\\LaTeX")) {
851             output.addToken("LaTeX");
852         } else if (token.equals("\\vdash")) {
853             output.addToken("\u22A2");
854         } else if (token.equals("\\dashv")) {
855             output.addToken("\u22A3");
856         } else if (token.equals("\\times")) {
857             output.addToken("\u00D7");
858         } else if (token.equals("~")) {
859             output.addToken("\u00A0");
860         } else if (token.equals("\\quad")) {
861 //            output.addWs("\u2000");
862             output.addWs(" ");
863         } else if (token.equals("\\qquad")) {
864 //            output.addWs("\u2000\u2000");
865             output.addWs("  ");
866         } else if (token.equals("\\,")) {
867 //            output.addWs("\u2009");
868             output.addWs(" ");
869         } else if (token.equals("\\neg") || token.equals("\\not")) {
870             output.addToken("\u00AC");
871         } else if (token.equals("\\bot")) {
872             output.addToken("\u22A5");
873         } else if (token.equals("\\top")) {
874             output.addToken("\u22A4");
875         } else if (token.equals("''") || token.equals("\\grqq")) {
876             output.addToken("\u201D");
877         } else if (token.equals("``") || token.equals("\\glqq")) {
878             skipWhitespace = true;
879             output.addToken("\u201E");
880         } else if (token.equals("\\ldots")) {
881             output.addToken("...");
882         } else if (token.equals("\\cdots")) {
883             output.addToken("\u00B7\u00B7\u00B7");
884         } else if (token.equals("\\hdots")) {
885             output.addToken("\u00B7\u00B7\u00B7");
886         } else if (token.equals("\\vdots")) {
887             output.addToken("\u2807");
888         } else if (token.equals("\\overline")) {    // TODO 20101018 m31: we assume set complement
889             output.addToken("\u2201");
890         } else if (token.startsWith("\\")) {
891             addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE,
892                 LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd);
893         } else {
894             if (mathfrak) {
895                 mathfrak(token);
896             } else if (mathbb) {
897                 mathbb(token);
898             } else if (emph) {
899                 emph(token);
900             } else if (bold) {
901                 bold(token);
902             } else {
903                 if (isWs(token)) {
904                     output.addWs(token);
905                 } else {
906                     output.addToken(token);
907                 }
908             }
909         }
910     }
911 
912     /**
913      * Write token chars in mathbb mode.
914      *
915      * @param   token   Chars to write.
916      */
917     private void emph(final String token) {
918         if (isWs(token)) {
919             output.addWs(Latex2UnicodeSpecials.transform2Emph(token));
920         } else {
921             output.addToken(Latex2UnicodeSpecials.transform2Emph(token));
922         }
923     }
924 
925     /**
926      * Write token chars in mathbb mode.
927      *
928      * @param   token   Chars to write.
929      */
930     private void mathbb(final String token) {
931         for (int i = 0; i < token.length(); i++) {
932             final char c = token.charAt(i);
933             switch (c) {
934             case 'C': output.addToken("\u2102");
935                 break;
936             case 'H': output.addToken("\u210D");
937                 break;
938             case 'N': output.addToken("\u2115");
939                 break;
940             case 'P': output.addToken("\u2119");
941                 break;
942             case 'Q': output.addToken("\u211A");
943                 break;
944             case 'R': output.addToken("\u211D");
945                 break;
946             case 'Z': output.addToken("\u2124");
947                 break;
948             default:
949                 if (Character.isWhitespace(c)) {
950                     output.addWs("" + c);
951                 } else {
952                     output.addToken("" + c);
953                 }
954             }
955         }
956     }
957 
958     private boolean isWs(final String token) {
959         return token == null || token.trim().length() == 0;
960     }
961 
962     /**
963      * Write token chars in mathfrak mode.
964      *
965      * @param   token   Chars to write.
966      */
967     private void mathfrak(final String token) {
968         if (isWs(token)) {
969             output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token));
970         } else {
971             output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token));
972         }
973     }
974 
975     /**
976      * Write token in bold mode.
977      *
978      * @param   token   Chars to write.
979      */
980     private void bold(final String token) {
981         if (isWs(token)) {
982             output.addWs(Latex2UnicodeSpecials.transform2Bold(token));
983         } else {
984             output.addToken(Latex2UnicodeSpecials.transform2Bold(token));
985         }
986     }
987 
988     /**
989      * Print end of line.
990      */
991     private final void println() {
992         output.println();
993     }
994 
995     /**
996      * Reads a single character and does not change the reading
997      * position.
998      *
999      * @return  character read, if there are no more chars
1000      *          <code>-1</code> is returned
1001      */
1002     protected final int getChar() {
1003         return input.getChar();
1004     }
1005 
1006     /**
1007      * Reads a single character and increments the reading position
1008      * by one.
1009      *
1010      * @return  character read, if there are no more chars
1011      *          <code>-1</code> is returned
1012      */
1013     protected final int read() {
1014         return input.read();
1015     }
1016 
1017     /**
1018      * Read until end of line.
1019      *
1020      * @return  Characters read.
1021      */
1022     protected final String readln() {
1023         StringBuffer result = new StringBuffer();
1024         int c;
1025         while (TextInput.EOF != (c = read())) {
1026             if (c == '\n') {
1027                 break;
1028             }
1029             result.append((char) c);
1030         }
1031         return result.toString();
1032     }
1033 
1034     /**
1035      * Are there still any characters to read?
1036      *
1037      * @return  Anything left for reading further?
1038      */
1039     public final boolean eof() {
1040         return input.isEmpty();
1041     }
1042 
1043     /**
1044      * Convert character position into row and column information.
1045      *
1046      * @param   absolutePosition    Find this character position.
1047      * @return  Row and column information.
1048      */
1049     public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) {
1050         return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition);
1051     }
1052 
1053     /**
1054      * Add warning message.
1055      *
1056      * @param   code    Message code.
1057      * @param   message Message.
1058      * @param   from    Absolute character position of problem start.
1059      * @param   to      Absolute character position of problem end.
1060      */
1061     private void addWarning(final int code, final String message, final int from, final int to) {
1062         finder.addWarning(code, message, getAbsoluteSourcePosition(from),
1063             getAbsoluteSourcePosition(to));
1064     }
1065 
1066 
1067 
1068 }