Clover Coverage Report
Coverage timestamp: Fri Feb 14 2014 01:47:57 UTC
../../../../../../img/srcFileCovDistChart9.png 31% of files have more coverage
625   1,068   262   21.55
390   820   0.42   29
29     9.03  
1    
 
  Latex2UnicodeParser       Line # 33 625 262 83.9% 0.83908045
 
  (37)
 
1    /* This file is part of the project "Hilbert II" - http://www.qedeq.org
2    *
3    * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>.
4    *
5    * "Hilbert II" is free software; you can redistribute
6    * it and/or modify it under the terms of the GNU General Public
7    * License as published by the Free Software Foundation; either
8    * version 2 of the License, or (at your option) any later version.
9    *
10    * This program is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13    * GNU General Public License for more details.
14    */
15   
16    package org.qedeq.kernel.bo.service.unicode;
17   
18    import java.util.Stack;
19   
20    import org.qedeq.base.io.AbstractOutput;
21    import org.qedeq.base.io.SourcePosition;
22    import org.qedeq.base.io.StringOutput;
23    import org.qedeq.base.io.SubTextInput;
24    import org.qedeq.base.io.TextInput;
25    import org.qedeq.base.trace.Trace;
26    import org.qedeq.kernel.bo.service.latex.LatexErrorCodes;
27   
28    /**
29    * Transform LaTeX into Unicode format.
30    *
31    * @author Michael Meyling
32    */
 
33    public final class Latex2UnicodeParser {
34   
35    /** This class. */
36    private static final Class CLASS = Latex2UnicodeParser.class;
37   
38    /** These characters get a special treatment in LaTeX. */
39    private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-";
40   
41    /** Herein goes our output. */
42    private final AbstractOutput output;
43   
44    /** Resolver for references. */
45    private final ReferenceFinder finder;
46   
47    /** This is our current input stream .*/
48    private SubTextInput input;
49   
50    /** Math mode on? */
51    private boolean mathMode = false;
52   
53    /** Mathfrak mode on? */
54    private boolean mathfrak = false;
55   
56    /** Emphasize on? */
57    private boolean emph = false;
58   
59    /** Bold on? */
60    private boolean bold = false;
61   
62    /** Mathbb on? */
63    private boolean mathbb = false;
64   
65    /** Stack for input parser. */
66    private Stack inputStack = new Stack();
67   
68    /** Stack for math mode. */
69    private Stack mathModeStack = new Stack();
70   
71    /** Stack for mathfrak mode. */
72    private Stack mathfrakStack = new Stack();
73   
74    /** Stack for emphasize mode. */
75    private Stack emphStack = new Stack();
76   
77    /** Stack for bold mode. */
78    private Stack boldStack = new Stack();
79   
80    /** Stack for mathbb mode. */
81    private Stack mathbbStack = new Stack();
82   
83    /** Stack for skipWhitspace mode. */
84    private Stack skipWhitespaceStack = new Stack();
85   
86    /** Should I skip whitespace before printing the next token. */
87    private boolean skipWhitespace;
88   
89    /** Here the last read token begins. This is an absolute position. */
90    private int tokenBegin;
91   
92    /** Here the last read token ends. This is an absolute position. */
93    private int tokenEnd;
94   
95    /** Current item number. */
96    private int itemNumber;
97   
98    /**
99    * Parse LaTeX text into QEDEQ module string.
100    *
101    * @param finder Finder for references.
102    * @param input Parse this input.
103    * @param columns Maximum column number. Break (if possible) before.
104    * @return QEDEQ module string.
105    */
 
106  11147 toggle public static final String transform(final ReferenceFinder finder, final String input,
107    final int columns) {
108  11147 final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder);
109  11147 parser.output.setColumns(columns);
110  11147 return parser.getUtf8(input);
111    }
112   
113    /**
114    * Constructor.
115    *
116    * @param finder Finder for references.
117    */
 
118  11147 toggle private Latex2UnicodeParser(final ReferenceFinder finder) {
119    // use dummy implementation if finder is null
120  11147 if (finder == null) {
121  3469 this.finder = new ReferenceFinder() {
 
122  0 toggle public String getReferenceLink(final String reference,
123    final SourcePosition startDelta, final SourcePosition endDelta) {
124  0 return "[" + reference + "]";
125    }
126   
 
127  0 toggle public void addWarning(final int code, final String msg,
128    final SourcePosition startDelta, final SourcePosition endDelta) {
129    // nothing to do
130    }
131    };
132    } else {
133  7678 this.finder = finder;
134    }
135  11147 this.output = new StringOutput();
136    }
137   
138    /**
139    * Get UTF-8 String out of LaTeX text.
140    *
141    * @param text LaTeX.
142    * @return UTF-8.
143    */
 
144  11147 toggle private String getUtf8(final String text) {
145  11147 skipWhitespace = true;
146  11147 this.input = new SubTextInput(text);
147  11147 parseAndPrint(this.input);
148  11147 return output.toString();
149    }
150   
151    /**
152    * Do parsing and print result.
153    *
154    * @param input Parse this LaTeX text and print UTF-8 into output.
155    */
 
156  22616 toggle private void parseAndPrint(final SubTextInput input) {
157    // remember old:
158  22616 inputStack.push(this.input);
159  22616 mathModeStack.push(Boolean.valueOf(mathMode));
160  22616 mathfrakStack.push(Boolean.valueOf(mathfrak));
161  22616 emphStack.push(Boolean.valueOf(emph));
162  22616 boldStack.push(Boolean.valueOf(bold));
163  22616 mathbbStack.push(Boolean.valueOf(mathbb));
164  22616 skipWhitespaceStack.push(Boolean.valueOf(skipWhitespace));
165  22616 try {
166  22616 this.input = input;
167  22616 boolean whitespace = false;
168  699180 while (!eof()) {
169  676564 String token = readToken();
170  676564 if (!token.startsWith("\\")) {
171  601925 token = token.trim();
172    }
173  676564 if (token.length() == 0) {
174  407029 whitespace = true;
175  407029 continue;
176    }
177  269535 if (whitespace && !"\\par".equals(token)) {
178  128101 print(" ");
179  128101 whitespace = false;
180    }
181  269535 if ("\\begin".equals(token)) {
182  467 parseBegin();
183  269068 } else if ("\\footnote".equals(token)) {
184  295 parseFootnote();
185  268773 } else if ("\\qref".equals(token)) {
186  698 parseQref();
187  268075 } else if ("$$".equals(token)) {
188  41 mathMode = true;
189  41 final SubTextInput content = readTilToken(token);
190  41 println();
191  41 parseAndPrint(content);
192  41 println();
193  41 mathMode = false;
194  268034 } else if ("$".equals(token)) {
195  5905 mathMode = true;
196  5905 final SubTextInput content = readTilToken(token);
197  5905 parseAndPrint(content);
198  5905 mathMode = false;
199  262129 } else if ("\\mathfrak".equals(token)) {
200  1520 if ('{' == getChar()) {
201  1520 mathfrak = true;
202  1520 final SubTextInput content = readCurlyBraceContents();
203  1520 parseAndPrint(content);
204  1520 mathfrak = false;
205    } else {
206  0 mathfrak = true;
207    }
208  260609 } else if ("\\mathbb".equals(token)) {
209  6 if ('{' == getChar()) {
210  6 mathbb = true;
211  6 final SubTextInput content = readCurlyBraceContents();
212  6 parseAndPrint(content);
213  6 mathbb = false;
214    } else {
215  0 mathbb = true;
216    }
217  260603 } else if ("\\emph".equals(token)) {
218  1853 if ('{' == getChar()) {
219  1850 emph = true;
220  1850 final SubTextInput content = readCurlyBraceContents();
221  1850 parseAndPrint(content);
222    // output.addWs("\u2006");
223  1850 output.addWs(" ");
224  1850 emph = false;
225    } else {
226  3 emph = true;
227    }
228  258750 } else if ("\\textbf".equals(token)) {
229  172 if ('{' == getChar()) {
230  172 bold = true;
231  172 final SubTextInput content = readCurlyBraceContents();
232  172 parseAndPrint(content);
233  172 bold = false;
234    } else {
235  0 bold = true;
236    }
237  258578 } else if ("\\cite".equals(token)) {
238  66 if ('{' == getChar()) {
239  66 final SubTextInput content = readCurlyBraceContents();
240  66 output.addToken("[" + content.asString() + "]");
241    }
242  258512 } else if ("\\tag".equals(token)) {
243  30 if ('{' == getChar()) {
244  30 final SubTextInput content = readCurlyBraceContents();
245  30 output.addToken("(" + content.asString() + ")");
246    }
247  258482 } else if ("\\mbox".equals(token)) {
248  324 if ('{' == getChar()) {
249  324 final SubTextInput content = readCurlyBraceContents();
250  324 parseAndPrint(content);
251    }
252  258158 } else if ("\\cline".equals(token)) {
253  42 if ('{' == getChar()) {
254  42 readCurlyBraceContents();
255    // ignore
256    }
257  42 output.addToken("_______________________________________");
258  42 println();
259  258116 } else if ("\\item".equals(token)) {
260  300 output.popLevel(3);
261  300 itemNumber++;
262  300 output.println();
263  300 output.addToken(itemNumber + ".");
264  300 output.addWs("");
265  300 output.pushLevel(" ");
266  300 output.setTabLevel();
267  257816 } else if ("{".equals(token)) {
268  909 input.readInverse();
269  909 final SubTextInput content = readCurlyBraceContents();
270  909 parseAndPrint(content);
271  256907 } else if ("\\url".equals(token)) {
272  208 final SubTextInput content = readCurlyBraceContents();
273  208 output.addToken(" " + content.asString() + " ");
274  256699 } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token)
275    || token.equals("\\vspace") || token.equals("\\hspace")
276    || token.equals("\\vspace*") || token.equals("\\hspace*"))) {
277    // ignore content
278  1415 readCurlyBraceContents();
279  255284 } else if ("_".equals(token) || "^".equals(token)) {
280  2507 if (mathMode) {
281  2497 String content;
282  2497 if ('{' == getChar()) {
283  24 content = readCurlyBraceContents().asString();
284    } else {
285  2473 content = readToken();
286    }
287  2497 if ("_".equals(token)) {
288  2257 printSubscript(content);
289    } else {
290  240 printSuperscript(content);
291    }
292    } else {
293  10 print(token);
294    }
295    } else {
296  252777 print(token);
297    }
298    }
299    } finally {
300  22616 this.input = (SubTextInput) inputStack.pop();
301  22616 mathMode = ((Boolean) mathModeStack.pop()).booleanValue();
302  22616 mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue();
303  22616 emph = ((Boolean) emphStack.pop()).booleanValue();
304  22616 bold = ((Boolean) boldStack.pop()).booleanValue();
305  22616 skipWhitespace = ((Boolean) skipWhitespaceStack.pop()).booleanValue();
306  22616 output.flush();
307    }
308    }
309   
310    /**
311    * Parse after \footnote.
312    */
 
313  295 toggle private void parseFootnote() {
314  295 if ('{' == getChar()) {
315  295 final SubTextInput content = readCurlyBraceContents();
316  295 println();
317  295 output.printWithoutSplit(" \u250C");
318  295 output.pushLevel();
319  295 output.pushLevel();
320  295 output.pushLevel();
321  295 output.pushLevel();
322  295 output.pushLevel();
323  295 output.pushLevel("\u2502 ");
324  295 println();
325  295 parseAndPrint(content);
326  295 output.popLevel();
327  295 output.popLevel();
328  295 output.popLevel();
329  295 output.popLevel();
330  295 output.popLevel();
331  295 output.popLevel();
332  295 println();
333  295 output.printWithoutSplit(" \u2514");
334  295 println();
335    }
336    }
337   
338    /**
339    * Transform <code>\qref{key}</code> entries into common LaTeX code.
340    *
341    * @param text Work on this text.
342    * @return Result of transforming \qref into text.
343    */
344    /**
345    * Parse after \footnote.
346    */
 
347  698 toggle private void parseQref() {
348  698 final String method = "parseQref()";
349  698 final int localStart1 = input.getAbsolutePosition();
350  698 if ('{' == getChar()) {
351  698 final SubTextInput content = readCurlyBraceContents();
352  698 String ref = content.asString().trim();
353  698 Trace.param(CLASS, this, method, "ref", ref);
354  698 if (ref.length() == 0) {
355  1 addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT,
356    localStart1, input.getAbsolutePosition());
357  1 return;
358    }
359  697 if (ref.length() > 1024) {
360  0 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
361    LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
362    localStart1, input.getAbsolutePosition());
363  0 return;
364    }
365  697 if (ref.indexOf("{") >= 0) {
366  1 addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE,
367    LatexErrorCodes.QREF_END_NOT_FOUND_TEXT,
368    localStart1, input.getAbsolutePosition());
369  1 input.setAbsolutePosition(localStart1);
370  1 return;
371    }
372   
373  696 String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1),
374    getAbsoluteSourcePosition(input.getAbsolutePosition()));
375  696 output.addToken(display);
376    }
377    }
378   
379   
380    /**
381    * Parse after \begin.
382    */
 
383  467 toggle private void parseBegin() {
384  467 final String kind = readCurlyBraceContents().asString(); // ignore
385  467 final SubTextInput content = readSection(kind);
386  467 if ("eqnarray".equals(kind)
387    || "eqnarray*".equals(kind)
388    || "equation*".equals(kind)) {
389  42 mathMode = true;
390  42 skipWhitespace = false;
391  42 parseAndPrint(content);
392  42 println();
393  42 mathMode = false;
394  425 } else if ("quote".equals(kind)) {
395  25 output.pushLevel();
396  25 output.pushLevel();
397  25 output.pushLevel();
398  25 println();
399  25 parseAndPrint(content);
400  25 println();
401  25 output.popLevel();
402  25 output.popLevel();
403  25 output.popLevel();
404  400 } else if ("tabularx".equals(kind)) {
405  244 skipWhitespace = false;
406  244 parseAndPrint(content);
407  156 } else if ("enumerate".equals(kind)) {
408  24 itemNumber = 0;
409  24 output.pushLevel(" ");
410  24 parseAndPrint(content);
411  24 output.popLevel(3);
412  132 } else if ("verbatim".equals(kind)) {
413  20 final String level = output.getLevel();
414  20 output.setLevel("");
415  20 print(content.asString());
416  20 output.setLevel(level);
417    } else {
418  112 parseAndPrint(content);
419    }
420    }
421   
 
422  2257 toggle private void printSubscript(final String content) {
423  2257 output.addToken(Latex2UnicodeSpecials.transform2Subscript(content));
424    }
425   
 
426  240 toggle private void printSuperscript(final String content) {
427  240 output.addToken(Latex2UnicodeSpecials.transform2Superscript(content));
428    }
429   
430    /**
431    * Read until section ends with \{kind}.
432    *
433    * @param kind Look for the end of this.
434    * @return Read text.
435    */
 
436  467 toggle private SubTextInput readSection(final String kind) {
437  467 if ('{' == getChar()) { // skip content
438  268 readCurlyBraceContents();
439    }
440  467 if ('{' == getChar()) { // skip content
441  244 readCurlyBraceContents();
442    }
443  467 final int localStart = input.getAbsolutePosition();
444  467 int current = localStart;
445  467 do {
446  155330 current = input.getAbsolutePosition();
447  155330 final String item = readToken();
448  155330 if (item == null) {
449  0 Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}",
450    new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition()
451    + input.getPosition()));
452  0 break;
453    }
454  155330 if ("\\end".equals(item)) {
455  480 final String curly2 = readCurlyBraceContents().asString();
456  480 if (kind.equals(curly2)) {
457  467 break;
458    }
459    }
460    } while (true);
461  467 return input.getSubTextInput(localStart, current);
462    }
463   
464    /**
465    * Get text till <code>token</code> occurs.
466    *
467    * @param token Terminator token.
468    * @return Read text before token.
469    */
 
470  5946 toggle private SubTextInput readTilToken(final String token) {
471  5946 final int localStart = input.getAbsolutePosition();
472  5946 final StringBuffer buffer = new StringBuffer();
473  5946 int current = localStart;
474  5946 do {
475  51486 current = input.getAbsolutePosition();
476  51486 final String item = readToken();
477  51486 if (item == null) {
478  0 Trace.fatal(CLASS, this, "readSection", "not found: " + token,
479    new IllegalArgumentException("from " + localStart + " to " + current
480    + input.getAbsolutePosition()));
481  0 break;
482    }
483  51486 if (token.equals(item)) {
484  5946 break;
485    }
486  45540 buffer.append(item);
487    } while (true);
488  5946 return input.getSubTextInput(localStart, current);
489    }
490   
491    /**
492    * Read next token from input stream.
493    *
494    * @return Read token.
495    */
 
496  951463 toggle protected final String readToken() {
497  951463 final String method = "readToken()";
498  951463 Trace.begin(CLASS, this, method);
499  951463 tokenBegin = input.getAbsolutePosition();
500  951463 StringBuffer token = new StringBuffer();
501  951463 try {
502  951463 do {
503  1549016 if (eof()) {
504  0 if (token.length() <= 0) {
505  0 token = null;
506    }
507  0 break;
508    }
509  1549016 final char c = (char) getChar();
510  1549016 if (Character.isDigit(c)) {
511  8428 token.append((char) read());
512  8428 if (Character.isDigit((char) getChar())) {
513  2753 continue;
514    }
515  5675 break;
516    }
517  1540588 if (Character.isLetter(c)) {
518  780779 token.append((char) read());
519  780779 if (Character.isLetter((char) getChar())) {
520  594419 continue;
521    }
522  186360 break;
523    }
524  759809 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
525  205744 switch (c) {
526  5546 case '&':
527  13335 case '{':
528  12424 case '}':
529  2216 case '~':
530  5863 case '_':
531  594 case '^':
532  39978 token.append((char) read());
533  39978 break;
534  18278 case '$':
535  1203 case '\'':
536  750 case '`':
537  866 case '-':
538  21097 token.append((char) read());
539  21097 if (c == getChar()) {
540  355 continue;
541    }
542  20742 break;
543  26 case '%':
544  26 token.append((char) read());
545  26 if (c == getChar()) {
546    // we must skip till end of line
547  26 token.append(readln());
548    // System.out.println("skipping comment:");
549    // System.out.println(token);
550  26 token.setLength(0);
551  26 continue;
552    }
553  0 break;
554  96718 case '\\':
555  96718 if (' ' == getChar()) {
556  0 token.append("\\");
557  0 token.append((char) read());
558  0 break;
559    }
560  96718 final String t = readBackslashToken();
561  96718 token.append(t);
562  96718 break;
563  47925 default:
564  47925 read();
565  47925 token.append(c);
566    }
567  205363 break;
568    }
569  554065 token.append((char) read());
570  554065 if ('_' == getChar() || '^' == getChar()) {
571  0 token.append((char) read());
572  0 continue;
573    }
574  554065 break;
575  597553 } while (!eof());
576  951463 Trace.param(CLASS, this, method, "Read token", token);
577    // System.out.println("< " + token);
578  951463 tokenEnd = input.getAbsolutePosition();
579  951463 return (token != null ? token.toString() : null);
580    } finally {
581  951463 Trace.end(CLASS, this, method);
582    }
583    }
584   
585    /**
586    * Get token that starts with a backlash.
587    *
588    * @return Token with backslash.
589    */
 
590  96718 toggle private String readBackslashToken() {
591  96718 final String method = "readBackslashToken()";
592  96718 Trace.begin(CLASS, this, method);
593  96718 if (getChar() != '\\') {
594  0 throw new IllegalArgumentException("\\ expected");
595    }
596  96718 read(); // read \
597  96718 if (eof()) {
598  0 Trace.param(CLASS, this, method, "return", null);
599  0 Trace.end(CLASS, this, method);
600  0 return null;
601    }
602  96718 if (!Character.isLetter((char) getChar())) {
603  41954 Trace.param(CLASS, this, method, "return", (char) getChar());
604  41954 Trace.end(CLASS, this, method);
605  41954 return "\\" + ((char) read());
606    }
607  54764 final StringBuffer buffer = new StringBuffer("\\");
608  54764 do {
609  318328 buffer.append((char) read());
610  318328 } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar()));
611  54764 Trace.param(CLASS, this, method, "return", buffer.toString());
612  54764 Trace.end(CLASS, this, method);
613  54764 return buffer.toString();
614    }
615   
616    /**
617    * Read contents that is within { .. }.
618    *
619    * @return Contents.
620    */
 
621  9018 toggle private SubTextInput readCurlyBraceContents() {
622  9018 final int localStart = input.getAbsolutePosition();
623  9018 final String first = readToken();
624  9018 if (!"{".equals(first)) {
625  0 addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE,
626    LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT,
627    localStart, input.getAbsolutePosition());
628  0 throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");
629    }
630  9018 final int curlyStart = input.getAbsolutePosition();
631  9018 int curlyEnd = curlyStart;
632  9018 final StringBuffer buffer = new StringBuffer();
633  9018 String next = "";
634  9018 int level = 1;
635  56594 while (level > 0 && getChar() != TextInput.EOF) {
636  56592 next = readToken();
637  56592 if ("{".equals(next)) {
638  503 level++;
639  56089 } else if ("}".equals(next)) {
640  9519 level--;
641    }
642  56592 if (level <= 0) {
643  9016 break;
644    }
645  47576 buffer.append(next);
646  47576 curlyEnd = input.getAbsolutePosition();
647    }
648  9018 if (!"}".equals(next)) {
649  2 addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE,
650    LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT,
651    localStart, input.getAbsolutePosition());
652  2 buffer.setLength(0);
653  2 input.setAbsolutePosition(curlyStart);
654  2 curlyEnd = curlyStart;
655    }
656  9018 return input.getSubTextInput(curlyStart, curlyEnd);
657    }
658   
659    /**
660    * Print <code>token</code> to output stream.
661    *
662    * @param token Print this for UTF-8.
663    */
 
664  380908 toggle private final void print(final String token) {
665    // System.out.println("> " + token);
666  380908 if (token.trim().length() == 0) {
667  128101 if (skipWhitespace) {
668  4496 return;
669    }
670    }
671  376412 skipWhitespace = false;
672  376412 if (token.equals("\\par")) {
673  1148 println();
674  1148 println();
675  1148 skipWhitespace = true;
676  375264 } else if (token.equals("\\\\")) {
677  1159 println();
678  374105 } else if (token.equals("&")) {
679  2713 output.addWs(" ");
680  371392 } else if (token.equals("\\-")) {
681    // ignore
682  371386 } else if (token.equals("--")) {
683  9 output.addToken("\u2012");
684  371377 } else if (token.equals("`")) {
685  336 output.addWs("\u2018");
686  371041 } else if (token.equals("'")) {
687  534 output.addToken("\u2019");
688  370507 } else if (token.equals("\\neq")) {
689  168 output.addToken("\u2260");
690  370339 } else if (token.equals("\\in")) {
691  1479 output.addToken("\u2208");
692  368860 } else if (token.equals("\\forall")) {
693  1424 output.addToken("\u2200");
694  367436 } else if (token.equals("\\exists")) {
695  748 output.addToken("\u2203");
696  366688 } else if (token.equals("\\emptyset")) {
697  263 output.addToken("\u2205");
698  366425 } else if (token.equals("\\rightarrow")) {
699  8620 output.addToken("\u2192");
700  357805 } else if (token.equals("\\Rightarrow")) {
701  18 output.addToken("\u21D2");
702  357787 } else if (token.equals("\\leftrightarrow")) {
703  1430 output.addToken("\u2194");
704  356357 } else if (token.equals("\\Leftarrow")) {
705  12 output.addToken("\u21D0");
706  356345 } else if (token.equals("\\Leftrightarrow")) {
707  0 output.addToken("\u21D4");
708  356345 } else if (token.equals("\\langle")) {
709  66 output.addToken("\u2329");
710  356279 } else if (token.equals("\\rangle")) {
711  66 output.addToken("\u232A");
712  356213 } else if (token.equals("\\land") || token.equals("\\vee")) {
713  2747 output.addToken("\u2227");
714  353466 } else if (token.equals("\\lor") || token.equals("\\wedge")) {
715  3414 output.addToken("\u2228");
716  350052 } else if (token.equals("\\bar")) {
717  19 output.addToken("\u203E");
718  350033 } else if (token.equals("\\bigcap")) {
719  90 output.addToken("\u22C2");
720  349943 } else if (token.equals("\\cap")) {
721  264 output.addToken("\u2229");
722  349679 } else if (token.equals("\\bigcup")) {
723  132 output.addToken("\u22C3");
724  349547 } else if (token.equals("\\cup")) {
725  259 output.addToken("\u222A");
726  349288 } else if (token.equals("\\in")) {
727  0 output.addToken("\u2208");
728  349288 } else if (token.equals("\\notin")) {
729  150 output.addToken("\u2209");
730  349138 } else if (token.equals("\\Alpha")) {
731  0 output.addToken("\u0391");
732  349138 } else if (token.equals("\\alpha")) {
733  1008 output.addToken("\u03B1");
734  348130 } else if (token.equals("\\Beta")) {
735  0 output.addToken("\u0392");
736  348130 } else if (token.equals("\\beta")) {
737  676 output.addToken("\u03B2");
738  347454 } else if (token.equals("\\Gamma")) {
739  18 output.addToken("\u0393");
740  347436 } else if (token.equals("\\gamma")) {
741  42 output.addToken("\u03B3");
742  347394 } else if (token.equals("\\Delta")) {
743  0 output.addToken("\u0394");
744  347394 } else if (token.equals("\\delta")) {
745  18 output.addToken("\u03B4");
746  347376 } else if (token.equals("\\Epslilon")) {
747  0 output.addToken("\u0395");
748  347376 } else if (token.equals("\\epsilon")) {
749  0 output.addToken("\u03B5");
750  347376 } else if (token.equals("\\Zeta")) {
751  0 output.addToken("\u0396");
752  347376 } else if (token.equals("\\zeta")) {
753  0 output.addToken("\u03B6");
754  347376 } else if (token.equals("\\Eta")) {
755  0 output.addToken("\u0397");
756  347376 } else if (token.equals("\\eta")) {
757  0 output.addToken("\u03B7");
758  347376 } else if (token.equals("\\Theta")) {
759  0 output.addToken("\u0398");
760  347376 } else if (token.equals("\\theta")) {
761  0 output.addToken("\u03B8");
762  347376 } else if (token.equals("\\Iota")) {
763  0 output.addToken("\u0399");
764  347376 } else if (token.equals("\\iota")) {
765  0 output.addToken("\u03B9");
766  347376 } else if (token.equals("\\Kappa")) {
767  0 output.addToken("\u039A");
768  347376 } else if (token.equals("\\kappa")) {
769  0 output.addToken("\u03BA");
770  347376 } else if (token.equals("\\Lamda")) {
771  0 output.addToken("\u039B");
772  347376 } else if (token.equals("\\lamda")) {
773  0 output.addToken("\u03BB");
774  347376 } else if (token.equals("\\Mu")) {
775  0 output.addToken("\u039C");
776  347376 } else if (token.equals("\\mu")) {
777  0 output.addToken("\u03BC");
778  347376 } else if (token.equals("\\Nu")) {
779  0 output.addToken("\u039D");
780  347376 } else if (token.equals("\\nu")) {
781  0 output.addToken("\u03BD");
782  347376 } else if (token.equals("\\Xi")) {
783  0 output.addToken("\u039E");
784  347376 } else if (token.equals("\\xi")) {
785  0 output.addToken("\u03BE");
786  347376 } else if (token.equals("\\Omikron")) {
787  0 output.addToken("\u039F");
788  347376 } else if (token.equals("\\omikron")) {
789  0 output.addToken("\u03BF");
790  347376 } else if (token.equals("\\Pi")) {
791  0 output.addToken("\u03A0");
792  347376 } else if (token.equals("\\pi")) {
793  0 output.addToken("\u03C0");
794  347376 } else if (token.equals("\\Rho")) {
795  0 output.addToken("\u03A1");
796  347376 } else if (token.equals("\\rho")) {
797  0 output.addToken("\u03C1");
798  347376 } else if (token.equals("\\Sigma")) {
799  0 output.addToken("\u03A3");
800  347376 } else if (token.equals("\\sigma")) {
801  79 output.addToken("\u03C3");
802  347297 } else if (token.equals("\\Tau")) {
803  0 output.addToken("\u03A4");
804  347297 } else if (token.equals("\\tau")) {
805  125 output.addToken("\u03C4");
806  347172 } else if (token.equals("\\Upsilon")) {
807  0 output.addToken("\u03A5");
808  347172 } else if (token.equals("\\upsilon")) {
809  0 output.addToken("\u03C5");
810  347172 } else if (token.equals("\\Phi")) {
811  0 output.addToken("\u03A6");
812  347172 } else if (token.equals("\\phi")) {
813  1661 output.addToken("\u03C6");
814  345511 } else if (token.equals("\\Chi")) {
815  0 output.addToken("\u03A6");
816  345511 } else if (token.equals("\\chi")) {
817  0 output.addToken("\u03C7");
818  345511 } else if (token.equals("\\Psi")) {
819  0 output.addToken("\u03A8");
820  345511 } else if (token.equals("\\psi")) {
821  258 output.addToken("\u03C8");
822  345253 } else if (token.equals("\\Omega")) {
823  0 output.addToken("\u03A9");
824  345253 } else if (token.equals("\\omega")) {
825  108 output.addToken("\u03C9");
826  345145 } else if (token.equals("\\subset")) {
827  0 output.addToken("\u2282");
828  345145 } else if (token.equals("\\supset")) {
829  12 output.addToken("\u2283");
830  345133 } else if (token.equals("\\subseteq")) {
831  335 output.addToken("\u2286");
832  344798 } else if (token.equals("\\supseteq")) {
833  0 output.addToken("\u2287");
834  344798 } else if (token.equals("\\{")) {
835  849 output.addToken("{");
836  343949 } else if (token.equals("\\}")) {
837  849 output.addToken("}");
838  343100 } else if (token.equals("\\&")) {
839  6 output.addToken("&");
840  343094 } else if (token.equals("\\ ")) {
841  34029 output.addWs(" ");
842  309065 } else if (token.equals("\\S")) {
843  18 output.addToken("\u00A7");
844  309047 } else if (token.equals("\\tt")) {
845    // ignore
846  308832 } else if (token.equals("\\tiny")) {
847    // ignore
848  308802 } else if (token.equals("\\nonumber")) {
849    // ignore
850  308796 } else if (token.equals("\\LaTeX")) {
851  131 output.addToken("LaTeX");
852  308665 } else if (token.equals("\\vdash")) {
853  18 output.addToken("\u22A2");
854  308647 } else if (token.equals("\\dashv")) {
855  0 output.addToken("\u22A3");
856  308647 } else if (token.equals("\\times")) {
857  12 output.addToken("\u00D7");
858  308635 } else if (token.equals("~")) {
859  1082 output.addToken("\u00A0");
860  307553 } else if (token.equals("\\quad")) {
861    // output.addWs("\u2000");
862  7 output.addWs(" ");
863  307546 } else if (token.equals("\\qquad")) {
864    // output.addWs("\u2000\u2000");
865  38 output.addWs(" ");
866  307508 } else if (token.equals("\\,")) {
867    // output.addWs("\u2009");
868  12 output.addWs(" ");
869  307496 } else if (token.equals("\\neg") || token.equals("\\not")) {
870  1469 output.addToken("\u00AC");
871  306027 } else if (token.equals("\\bot")) {
872  66 output.addToken("\u22A5");
873  305961 } else if (token.equals("\\top")) {
874  132 output.addToken("\u22A4");
875  305829 } else if (token.equals("''") || token.equals("\\grqq")) {
876  179 output.addToken("\u201D");
877  305650 } else if (token.equals("``") || token.equals("\\glqq")) {
878  185 skipWhitespace = true;
879  185 output.addToken("\u201E");
880  305465 } else if (token.equals("\\ldots")) {
881  780 output.addToken("...");
882  304685 } else if (token.equals("\\cdots")) {
883  0 output.addToken("\u00B7\u00B7\u00B7");
884  304685 } else if (token.equals("\\hdots")) {
885  0 output.addToken("\u00B7\u00B7\u00B7");
886  304685 } else if (token.equals("\\vdots")) {
887  6 output.addToken("\u2807");
888  304679 } else if (token.equals("\\overline")) { // TODO 20101018 m31: we assume set complement
889  228 output.addToken("\u2201");
890  304451 } else if (token.startsWith("\\")) {
891  106 addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE,
892    LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd);
893    } else {
894  304345 if (mathfrak) {
895  1520 mathfrak(token);
896  302825 } else if (mathbb) {
897  6 mathbb(token);
898  302819 } else if (emph) {
899  4350 emph(token);
900  298469 } else if (bold) {
901  362 bold(token);
902    } else {
903  298107 if (isWs(token)) {
904  122732 output.addWs(token);
905    } else {
906  175375 output.addToken(token);
907    }
908    }
909    }
910    }
911   
912    /**
913    * Write token chars in mathbb mode.
914    *
915    * @param token Chars to write.
916    */
 
917  4350 toggle private void emph(final String token) {
918  4350 if (isWs(token)) {
919  843 output.addWs(Latex2UnicodeSpecials.transform2Emph(token));
920    } else {
921  3507 output.addToken(Latex2UnicodeSpecials.transform2Emph(token));
922    }
923    }
924   
925    /**
926    * Write token chars in mathbb mode.
927    *
928    * @param token Chars to write.
929    */
 
930  6 toggle private void mathbb(final String token) {
931  12 for (int i = 0; i < token.length(); i++) {
932  6 final char c = token.charAt(i);
933  6 switch (c) {
934  0 case 'C': output.addToken("\u2102");
935  0 break;
936  0 case 'H': output.addToken("\u210D");
937  0 break;
938  6 case 'N': output.addToken("\u2115");
939  6 break;
940  0 case 'P': output.addToken("\u2119");
941  0 break;
942  0 case 'Q': output.addToken("\u211A");
943  0 break;
944  0 case 'R': output.addToken("\u211D");
945  0 break;
946  0 case 'Z': output.addToken("\u2124");
947  0 break;
948  0 default:
949  0 if (Character.isWhitespace(c)) {
950  0 output.addWs("" + c);
951    } else {
952  0 output.addToken("" + c);
953    }
954    }
955    }
956    }
957   
 
958  304339 toggle private boolean isWs(final String token) {
959  304339 return token == null || token.trim().length() == 0;
960    }
961   
962    /**
963    * Write token chars in mathfrak mode.
964    *
965    * @param token Chars to write.
966    */
 
967  1520 toggle private void mathfrak(final String token) {
968  1520 if (isWs(token)) {
969  0 output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token));
970    } else {
971  1520 output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token));
972    }
973    }
974   
975    /**
976    * Write token in bold mode.
977    *
978    * @param token Chars to write.
979    */
 
980  362 toggle private void bold(final String token) {
981  362 if (isWs(token)) {
982  30 output.addWs(Latex2UnicodeSpecials.transform2Bold(token));
983    } else {
984  332 output.addToken(Latex2UnicodeSpecials.transform2Bold(token));
985    }
986    }
987   
988    /**
989    * Print end of line.
990    */
 
991  4851 toggle private final void println() {
992  4851 output.println();
993    }
994   
995    /**
996    * Reads a single character and does not change the reading
997    * position.
998    *
999    * @return character read, if there are no more chars
1000    * <code>-1</code> is returned
1001    */
 
1002  4490314 toggle protected final int getChar() {
1003  4490314 return input.getChar();
1004    }
1005   
1006    /**
1007    * Reads a single character and increments the reading position
1008    * by one.
1009    *
1010    * @return character read, if there are no more chars
1011    * <code>-1</code> is returned
1012    */
 
1013  1910912 toggle protected final int read() {
1014  1910912 return input.read();
1015    }
1016   
1017    /**
1018    * Read until end of line.
1019    *
1020    * @return Characters read.
1021    */
 
1022  26 toggle protected final String readln() {
1023  26 StringBuffer result = new StringBuffer();
1024  26 int c;
1025  ? while (TextInput.EOF != (c = read())) {
1026  1614 if (c == '\n') {
1027  26 break;
1028    }
1029  1588 result.append((char) c);
1030    }
1031  26 return result.toString();
1032    }
1033   
1034    /**
1035    * Are there still any characters to read?
1036    *
1037    * @return Anything left for reading further?
1038    */
 
1039  3260795 toggle public final boolean eof() {
1040  3260795 return input.isEmpty();
1041    }
1042   
1043    /**
1044    * Convert character position into row and column information.
1045    *
1046    * @param absolutePosition Find this character position.
1047    * @return Row and column information.
1048    */
 
1049  1612 toggle public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) {
1050  1612 return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition);
1051    }
1052   
1053    /**
1054    * Add warning message.
1055    *
1056    * @param code Message code.
1057    * @param message Message.
1058    * @param from Absolute character position of problem start.
1059    * @param to Absolute character position of problem end.
1060    */
 
1061  110 toggle private void addWarning(final int code, final String message, final int from, final int to) {
1062  110 finder.addWarning(code, message, getAbsoluteSourcePosition(from),
1063    getAbsoluteSourcePosition(to));
1064    }
1065   
1066   
1067   
1068    }