1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
2 | * |
3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
4 | * |
5 | * "Hilbert II" is free software; you can redistribute |
6 | * it and/or modify it under the terms of the GNU General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | */ |
15 | |
16 | package org.qedeq.kernel.bo.service.unicode; |
17 | |
18 | import java.util.Stack; |
19 | |
20 | import org.qedeq.base.io.AbstractOutput; |
21 | import org.qedeq.base.io.SourcePosition; |
22 | import org.qedeq.base.io.StringOutput; |
23 | import org.qedeq.base.io.SubTextInput; |
24 | import org.qedeq.base.io.TextInput; |
25 | import org.qedeq.base.trace.Trace; |
26 | import org.qedeq.kernel.bo.service.latex.LatexErrorCodes; |
27 | |
28 | /** |
29 | * Transform LaTeX into Unicode format. |
30 | * |
31 | * @author Michael Meyling |
32 | */ |
33 | public final class Latex2UnicodeParser { |
34 | |
35 | /** This class. */ |
36 | private static final Class CLASS = Latex2UnicodeParser.class; |
37 | |
38 | /** These characters get a special treatment in LaTeX. */ |
39 | private static final String SPECIALCHARACTERS = "(),{}\\~%$&\'`^_-"; |
40 | |
41 | /** Herein goes our output. */ |
42 | private final AbstractOutput output; |
43 | |
44 | /** Resolver for references. */ |
45 | private final ReferenceFinder finder; |
46 | |
47 | /** This is our current input stream .*/ |
48 | private SubTextInput input; |
49 | |
50 | /** Math mode on? */ |
51 | private boolean mathMode = false; |
52 | |
53 | /** Mathfrak mode on? */ |
54 | private boolean mathfrak = false; |
55 | |
56 | /** Emphasize on? */ |
57 | private boolean emph = false; |
58 | |
59 | /** Bold on? */ |
60 | private boolean bold = false; |
61 | |
62 | /** Mathbb on? */ |
63 | private boolean mathbb = false; |
64 | |
65 | /** Stack for input parser. */ |
66 | private Stack inputStack = new Stack(); |
67 | |
68 | /** Stack for math mode. */ |
69 | private Stack mathModeStack = new Stack(); |
70 | |
71 | /** Stack for mathfrak mode. */ |
72 | private Stack mathfrakStack = new Stack(); |
73 | |
74 | /** Stack for emphasize mode. */ |
75 | private Stack emphStack = new Stack(); |
76 | |
77 | /** Stack for bold mode. */ |
78 | private Stack boldStack = new Stack(); |
79 | |
80 | /** Stack for mathbb mode. */ |
81 | private Stack mathbbStack = new Stack(); |
82 | |
83 | /** Stack for skipWhitspace mode. */ |
84 | private Stack skipWhitespaceStack = new Stack(); |
85 | |
86 | /** Should I skip whitespace before printing the next token. */ |
87 | private boolean skipWhitespace; |
88 | |
89 | /** Here the last read token begins. This is an absolute position. */ |
90 | private int tokenBegin; |
91 | |
92 | /** Here the last read token ends. This is an absolute position. */ |
93 | private int tokenEnd; |
94 | |
95 | /** Current item number. */ |
96 | private int itemNumber; |
97 | |
98 | /** |
99 | * Parse LaTeX text into QEDEQ module string. |
100 | * |
101 | * @param finder Finder for references. |
102 | * @param input Parse this input. |
103 | * @param columns Maximum column number. Break (if possible) before. |
104 | * @return QEDEQ module string. |
105 | */ |
106 | public static final String transform(final ReferenceFinder finder, final String input, |
107 | final int columns) { |
108 | final Latex2UnicodeParser parser = new Latex2UnicodeParser(finder); |
109 | parser.output.setColumns(columns); |
110 | return parser.getUtf8(input); |
111 | } |
112 | |
113 | /** |
114 | * Constructor. |
115 | * |
116 | * @param finder Finder for references. |
117 | */ |
118 | private Latex2UnicodeParser(final ReferenceFinder finder) { |
119 | // use dummy implementation if finder is null |
120 | if (finder == null) { |
121 | this.finder = new ReferenceFinder() { |
122 | public String getReferenceLink(final String reference, |
123 | final SourcePosition startDelta, final SourcePosition endDelta) { |
124 | return "[" + reference + "]"; |
125 | } |
126 | |
127 | public void addWarning(final int code, final String msg, |
128 | final SourcePosition startDelta, final SourcePosition endDelta) { |
129 | // nothing to do |
130 | } |
131 | }; |
132 | } else { |
133 | this.finder = finder; |
134 | } |
135 | this.output = new StringOutput(); |
136 | } |
137 | |
138 | /** |
139 | * Get UTF-8 String out of LaTeX text. |
140 | * |
141 | * @param text LaTeX. |
142 | * @return UTF-8. |
143 | */ |
144 | private String getUtf8(final String text) { |
145 | skipWhitespace = true; |
146 | this.input = new SubTextInput(text); |
147 | parseAndPrint(this.input); |
148 | return output.toString(); |
149 | } |
150 | |
151 | /** |
152 | * Do parsing and print result. |
153 | * |
154 | * @param input Parse this LaTeX text and print UTF-8 into output. |
155 | */ |
156 | private void parseAndPrint(final SubTextInput input) { |
157 | // remember old: |
158 | inputStack.push(this.input); |
159 | mathModeStack.push(Boolean.valueOf(mathMode)); |
160 | mathfrakStack.push(Boolean.valueOf(mathfrak)); |
161 | emphStack.push(Boolean.valueOf(emph)); |
162 | boldStack.push(Boolean.valueOf(bold)); |
163 | mathbbStack.push(Boolean.valueOf(mathbb)); |
164 | skipWhitespaceStack.push(Boolean.valueOf(skipWhitespace)); |
165 | try { |
166 | this.input = input; |
167 | boolean whitespace = false; |
168 | while (!eof()) { |
169 | String token = readToken(); |
170 | if (!token.startsWith("\\")) { |
171 | token = token.trim(); |
172 | } |
173 | if (token.length() == 0) { |
174 | whitespace = true; |
175 | continue; |
176 | } |
177 | if (whitespace && !"\\par".equals(token)) { |
178 | print(" "); |
179 | whitespace = false; |
180 | } |
181 | if ("\\begin".equals(token)) { |
182 | parseBegin(); |
183 | } else if ("\\footnote".equals(token)) { |
184 | parseFootnote(); |
185 | } else if ("\\qref".equals(token)) { |
186 | parseQref(); |
187 | } else if ("$$".equals(token)) { |
188 | mathMode = true; |
189 | final SubTextInput content = readTilToken(token); |
190 | println(); |
191 | parseAndPrint(content); |
192 | println(); |
193 | mathMode = false; |
194 | } else if ("$".equals(token)) { |
195 | mathMode = true; |
196 | final SubTextInput content = readTilToken(token); |
197 | parseAndPrint(content); |
198 | mathMode = false; |
199 | } else if ("\\mathfrak".equals(token)) { |
200 | if ('{' == getChar()) { |
201 | mathfrak = true; |
202 | final SubTextInput content = readCurlyBraceContents(); |
203 | parseAndPrint(content); |
204 | mathfrak = false; |
205 | } else { |
206 | mathfrak = true; |
207 | } |
208 | } else if ("\\mathbb".equals(token)) { |
209 | if ('{' == getChar()) { |
210 | mathbb = true; |
211 | final SubTextInput content = readCurlyBraceContents(); |
212 | parseAndPrint(content); |
213 | mathbb = false; |
214 | } else { |
215 | mathbb = true; |
216 | } |
217 | } else if ("\\emph".equals(token)) { |
218 | if ('{' == getChar()) { |
219 | emph = true; |
220 | final SubTextInput content = readCurlyBraceContents(); |
221 | parseAndPrint(content); |
222 | // output.addWs("\u2006"); |
223 | output.addWs(" "); |
224 | emph = false; |
225 | } else { |
226 | emph = true; |
227 | } |
228 | } else if ("\\textbf".equals(token)) { |
229 | if ('{' == getChar()) { |
230 | bold = true; |
231 | final SubTextInput content = readCurlyBraceContents(); |
232 | parseAndPrint(content); |
233 | bold = false; |
234 | } else { |
235 | bold = true; |
236 | } |
237 | } else if ("\\cite".equals(token)) { |
238 | if ('{' == getChar()) { |
239 | final SubTextInput content = readCurlyBraceContents(); |
240 | output.addToken("[" + content.asString() + "]"); |
241 | } |
242 | } else if ("\\tag".equals(token)) { |
243 | if ('{' == getChar()) { |
244 | final SubTextInput content = readCurlyBraceContents(); |
245 | output.addToken("(" + content.asString() + ")"); |
246 | } |
247 | } else if ("\\mbox".equals(token)) { |
248 | if ('{' == getChar()) { |
249 | final SubTextInput content = readCurlyBraceContents(); |
250 | parseAndPrint(content); |
251 | } |
252 | } else if ("\\cline".equals(token)) { |
253 | if ('{' == getChar()) { |
254 | readCurlyBraceContents(); |
255 | // ignore |
256 | } |
257 | output.addToken("_______________________________________"); |
258 | println(); |
259 | } else if ("\\item".equals(token)) { |
260 | output.popLevel(3); |
261 | itemNumber++; |
262 | output.println(); |
263 | output.addToken(itemNumber + "."); |
264 | output.addWs(""); |
265 | output.pushLevel(" "); |
266 | output.setTabLevel(); |
267 | } else if ("{".equals(token)) { |
268 | input.readInverse(); |
269 | final SubTextInput content = readCurlyBraceContents(); |
270 | parseAndPrint(content); |
271 | } else if ("\\url".equals(token)) { |
272 | final SubTextInput content = readCurlyBraceContents(); |
273 | output.addToken(" " + content.asString() + " "); |
274 | } else if ('{' == getChar() && ("\\index".equals(token) || "\\label".equals(token) |
275 | || token.equals("\\vspace") || token.equals("\\hspace") |
276 | || token.equals("\\vspace*") || token.equals("\\hspace*"))) { |
277 | // ignore content |
278 | readCurlyBraceContents(); |
279 | } else if ("_".equals(token) || "^".equals(token)) { |
280 | if (mathMode) { |
281 | String content; |
282 | if ('{' == getChar()) { |
283 | content = readCurlyBraceContents().asString(); |
284 | } else { |
285 | content = readToken(); |
286 | } |
287 | if ("_".equals(token)) { |
288 | printSubscript(content); |
289 | } else { |
290 | printSuperscript(content); |
291 | } |
292 | } else { |
293 | print(token); |
294 | } |
295 | } else { |
296 | print(token); |
297 | } |
298 | } |
299 | } finally { |
300 | this.input = (SubTextInput) inputStack.pop(); |
301 | mathMode = ((Boolean) mathModeStack.pop()).booleanValue(); |
302 | mathfrak = ((Boolean) mathfrakStack.pop()).booleanValue(); |
303 | emph = ((Boolean) emphStack.pop()).booleanValue(); |
304 | bold = ((Boolean) boldStack.pop()).booleanValue(); |
305 | skipWhitespace = ((Boolean) skipWhitespaceStack.pop()).booleanValue(); |
306 | output.flush(); |
307 | } |
308 | } |
309 | |
310 | /** |
311 | * Parse after \footnote. |
312 | */ |
313 | private void parseFootnote() { |
314 | if ('{' == getChar()) { |
315 | final SubTextInput content = readCurlyBraceContents(); |
316 | println(); |
317 | output.printWithoutSplit(" \u250C"); |
318 | output.pushLevel(); |
319 | output.pushLevel(); |
320 | output.pushLevel(); |
321 | output.pushLevel(); |
322 | output.pushLevel(); |
323 | output.pushLevel("\u2502 "); |
324 | println(); |
325 | parseAndPrint(content); |
326 | output.popLevel(); |
327 | output.popLevel(); |
328 | output.popLevel(); |
329 | output.popLevel(); |
330 | output.popLevel(); |
331 | output.popLevel(); |
332 | println(); |
333 | output.printWithoutSplit(" \u2514"); |
334 | println(); |
335 | } |
336 | } |
337 | |
338 | /** |
339 | * Transform <code>\qref{key}</code> entries into common LaTeX code. |
340 | * |
341 | * @param text Work on this text. |
342 | * @return Result of transforming \qref into text. |
343 | */ |
344 | /** |
345 | * Parse after \footnote. |
346 | */ |
347 | private void parseQref() { |
348 | final String method = "parseQref()"; |
349 | final int localStart1 = input.getAbsolutePosition(); |
350 | if ('{' == getChar()) { |
351 | final SubTextInput content = readCurlyBraceContents(); |
352 | String ref = content.asString().trim(); |
353 | Trace.param(CLASS, this, method, "ref", ref); |
354 | if (ref.length() == 0) { |
355 | addWarning(LatexErrorCodes.QREF_EMPTY_CODE, LatexErrorCodes.QREF_EMPTY_TEXT, |
356 | localStart1, input.getAbsolutePosition()); |
357 | return; |
358 | } |
359 | if (ref.length() > 1024) { |
360 | addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE, |
361 | LatexErrorCodes.QREF_END_NOT_FOUND_TEXT, |
362 | localStart1, input.getAbsolutePosition()); |
363 | return; |
364 | } |
365 | if (ref.indexOf("{") >= 0) { |
366 | addWarning(LatexErrorCodes.QREF_END_NOT_FOUND_CODE, |
367 | LatexErrorCodes.QREF_END_NOT_FOUND_TEXT, |
368 | localStart1, input.getAbsolutePosition()); |
369 | input.setAbsolutePosition(localStart1); |
370 | return; |
371 | } |
372 | |
373 | String display = finder.getReferenceLink(ref, getAbsoluteSourcePosition(localStart1), |
374 | getAbsoluteSourcePosition(input.getAbsolutePosition())); |
375 | output.addToken(display); |
376 | } |
377 | } |
378 | |
379 | |
380 | /** |
381 | * Parse after \begin. |
382 | */ |
383 | private void parseBegin() { |
384 | final String kind = readCurlyBraceContents().asString(); // ignore |
385 | final SubTextInput content = readSection(kind); |
386 | if ("eqnarray".equals(kind) |
387 | || "eqnarray*".equals(kind) |
388 | || "equation*".equals(kind)) { |
389 | mathMode = true; |
390 | skipWhitespace = false; |
391 | parseAndPrint(content); |
392 | println(); |
393 | mathMode = false; |
394 | } else if ("quote".equals(kind)) { |
395 | output.pushLevel(); |
396 | output.pushLevel(); |
397 | output.pushLevel(); |
398 | println(); |
399 | parseAndPrint(content); |
400 | println(); |
401 | output.popLevel(); |
402 | output.popLevel(); |
403 | output.popLevel(); |
404 | } else if ("tabularx".equals(kind)) { |
405 | skipWhitespace = false; |
406 | parseAndPrint(content); |
407 | } else if ("enumerate".equals(kind)) { |
408 | itemNumber = 0; |
409 | output.pushLevel(" "); |
410 | parseAndPrint(content); |
411 | output.popLevel(3); |
412 | } else if ("verbatim".equals(kind)) { |
413 | final String level = output.getLevel(); |
414 | output.setLevel(""); |
415 | print(content.asString()); |
416 | output.setLevel(level); |
417 | } else { |
418 | parseAndPrint(content); |
419 | } |
420 | } |
421 | |
422 | private void printSubscript(final String content) { |
423 | output.addToken(Latex2UnicodeSpecials.transform2Subscript(content)); |
424 | } |
425 | |
426 | private void printSuperscript(final String content) { |
427 | output.addToken(Latex2UnicodeSpecials.transform2Superscript(content)); |
428 | } |
429 | |
430 | /** |
431 | * Read until section ends with \{kind}. |
432 | * |
433 | * @param kind Look for the end of this. |
434 | * @return Read text. |
435 | */ |
436 | private SubTextInput readSection(final String kind) { |
437 | if ('{' == getChar()) { // skip content |
438 | readCurlyBraceContents(); |
439 | } |
440 | if ('{' == getChar()) { // skip content |
441 | readCurlyBraceContents(); |
442 | } |
443 | final int localStart = input.getAbsolutePosition(); |
444 | int current = localStart; |
445 | do { |
446 | current = input.getAbsolutePosition(); |
447 | final String item = readToken(); |
448 | if (item == null) { |
449 | Trace.fatal(CLASS, this, "readSection", "not found: " + "\\end{" + kind + "}", |
450 | new IllegalArgumentException("from " + localStart + " to " + input.getAbsolutePosition() |
451 | + input.getPosition())); |
452 | break; |
453 | } |
454 | if ("\\end".equals(item)) { |
455 | final String curly2 = readCurlyBraceContents().asString(); |
456 | if (kind.equals(curly2)) { |
457 | break; |
458 | } |
459 | } |
460 | } while (true); |
461 | return input.getSubTextInput(localStart, current); |
462 | } |
463 | |
464 | /** |
465 | * Get text till <code>token</code> occurs. |
466 | * |
467 | * @param token Terminator token. |
468 | * @return Read text before token. |
469 | */ |
470 | private SubTextInput readTilToken(final String token) { |
471 | final int localStart = input.getAbsolutePosition(); |
472 | final StringBuffer buffer = new StringBuffer(); |
473 | int current = localStart; |
474 | do { |
475 | current = input.getAbsolutePosition(); |
476 | final String item = readToken(); |
477 | if (item == null) { |
478 | Trace.fatal(CLASS, this, "readSection", "not found: " + token, |
479 | new IllegalArgumentException("from " + localStart + " to " + current |
480 | + input.getAbsolutePosition())); |
481 | break; |
482 | } |
483 | if (token.equals(item)) { |
484 | break; |
485 | } |
486 | buffer.append(item); |
487 | } while (true); |
488 | return input.getSubTextInput(localStart, current); |
489 | } |
490 | |
491 | /** |
492 | * Read next token from input stream. |
493 | * |
494 | * @return Read token. |
495 | */ |
496 | protected final String readToken() { |
497 | final String method = "readToken()"; |
498 | Trace.begin(CLASS, this, method); |
499 | tokenBegin = input.getAbsolutePosition(); |
500 | StringBuffer token = new StringBuffer(); |
501 | try { |
502 | do { |
503 | if (eof()) { |
504 | if (token.length() <= 0) { |
505 | token = null; |
506 | } |
507 | break; |
508 | } |
509 | final char c = (char) getChar(); |
510 | if (Character.isDigit(c)) { |
511 | token.append((char) read()); |
512 | if (Character.isDigit((char) getChar())) { |
513 | continue; |
514 | } |
515 | break; |
516 | } |
517 | if (Character.isLetter(c)) { |
518 | token.append((char) read()); |
519 | if (Character.isLetter((char) getChar())) { |
520 | continue; |
521 | } |
522 | break; |
523 | } |
524 | if (SPECIALCHARACTERS.indexOf(c) >= 0) { |
525 | switch (c) { |
526 | case '&': |
527 | case '{': |
528 | case '}': |
529 | case '~': |
530 | case '_': |
531 | case '^': |
532 | token.append((char) read()); |
533 | break; |
534 | case '$': |
535 | case '\'': |
536 | case '`': |
537 | case '-': |
538 | token.append((char) read()); |
539 | if (c == getChar()) { |
540 | continue; |
541 | } |
542 | break; |
543 | case '%': |
544 | token.append((char) read()); |
545 | if (c == getChar()) { |
546 | // we must skip till end of line |
547 | token.append(readln()); |
548 | // System.out.println("skipping comment:"); |
549 | // System.out.println(token); |
550 | token.setLength(0); |
551 | continue; |
552 | } |
553 | break; |
554 | case '\\': |
555 | if (' ' == getChar()) { |
556 | token.append("\\"); |
557 | token.append((char) read()); |
558 | break; |
559 | } |
560 | final String t = readBackslashToken(); |
561 | token.append(t); |
562 | break; |
563 | default: |
564 | read(); |
565 | token.append(c); |
566 | } |
567 | break; |
568 | } |
569 | token.append((char) read()); |
570 | if ('_' == getChar() || '^' == getChar()) { |
571 | token.append((char) read()); |
572 | continue; |
573 | } |
574 | break; |
575 | } while (!eof()); |
576 | Trace.param(CLASS, this, method, "Read token", token); |
577 | // System.out.println("< " + token); |
578 | tokenEnd = input.getAbsolutePosition(); |
579 | return (token != null ? token.toString() : null); |
580 | } finally { |
581 | Trace.end(CLASS, this, method); |
582 | } |
583 | } |
584 | |
585 | /** |
586 | * Get token that starts with a backlash. |
587 | * |
588 | * @return Token with backslash. |
589 | */ |
590 | private String readBackslashToken() { |
591 | final String method = "readBackslashToken()"; |
592 | Trace.begin(CLASS, this, method); |
593 | if (getChar() != '\\') { |
594 | throw new IllegalArgumentException("\\ expected"); |
595 | } |
596 | read(); // read \ |
597 | if (eof()) { |
598 | Trace.param(CLASS, this, method, "return", null); |
599 | Trace.end(CLASS, this, method); |
600 | return null; |
601 | } |
602 | if (!Character.isLetter((char) getChar())) { |
603 | Trace.param(CLASS, this, method, "return", (char) getChar()); |
604 | Trace.end(CLASS, this, method); |
605 | return "\\" + ((char) read()); |
606 | } |
607 | final StringBuffer buffer = new StringBuffer("\\"); |
608 | do { |
609 | buffer.append((char) read()); |
610 | } while (!eof() && (Character.isLetter((char) getChar()) || '*' == (char) getChar())); |
611 | Trace.param(CLASS, this, method, "return", buffer.toString()); |
612 | Trace.end(CLASS, this, method); |
613 | return buffer.toString(); |
614 | } |
615 | |
616 | /** |
617 | * Read contents that is within { .. }. |
618 | * |
619 | * @return Contents. |
620 | */ |
621 | private SubTextInput readCurlyBraceContents() { |
622 | final int localStart = input.getAbsolutePosition(); |
623 | final String first = readToken(); |
624 | if (!"{".equals(first)) { |
625 | addWarning(LatexErrorCodes.BRACKET_START_NOT_FOUND_CODE, |
626 | LatexErrorCodes.BRACKET_START_NOT_FOUND_TEXT, |
627 | localStart, input.getAbsolutePosition()); |
628 | throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\""); |
629 | } |
630 | final int curlyStart = input.getAbsolutePosition(); |
631 | int curlyEnd = curlyStart; |
632 | final StringBuffer buffer = new StringBuffer(); |
633 | String next = ""; |
634 | int level = 1; |
635 | while (level > 0 && getChar() != TextInput.EOF) { |
636 | next = readToken(); |
637 | if ("{".equals(next)) { |
638 | level++; |
639 | } else if ("}".equals(next)) { |
640 | level--; |
641 | } |
642 | if (level <= 0) { |
643 | break; |
644 | } |
645 | buffer.append(next); |
646 | curlyEnd = input.getAbsolutePosition(); |
647 | } |
648 | if (!"}".equals(next)) { |
649 | addWarning(LatexErrorCodes.BRACKET_END_NOT_FOUND_CODE, |
650 | LatexErrorCodes.BRACKET_END_NOT_FOUND_TEXT, |
651 | localStart, input.getAbsolutePosition()); |
652 | buffer.setLength(0); |
653 | input.setAbsolutePosition(curlyStart); |
654 | curlyEnd = curlyStart; |
655 | } |
656 | return input.getSubTextInput(curlyStart, curlyEnd); |
657 | } |
658 | |
659 | /** |
660 | * Print <code>token</code> to output stream. |
661 | * |
662 | * @param token Print this for UTF-8. |
663 | */ |
664 | private final void print(final String token) { |
665 | // System.out.println("> " + token); |
666 | if (token.trim().length() == 0) { |
667 | if (skipWhitespace) { |
668 | return; |
669 | } |
670 | } |
671 | skipWhitespace = false; |
672 | if (token.equals("\\par")) { |
673 | println(); |
674 | println(); |
675 | skipWhitespace = true; |
676 | } else if (token.equals("\\\\")) { |
677 | println(); |
678 | } else if (token.equals("&")) { |
679 | output.addWs(" "); |
680 | } else if (token.equals("\\-")) { |
681 | // ignore |
682 | } else if (token.equals("--")) { |
683 | output.addToken("\u2012"); |
684 | } else if (token.equals("`")) { |
685 | output.addWs("\u2018"); |
686 | } else if (token.equals("'")) { |
687 | output.addToken("\u2019"); |
688 | } else if (token.equals("\\neq")) { |
689 | output.addToken("\u2260"); |
690 | } else if (token.equals("\\in")) { |
691 | output.addToken("\u2208"); |
692 | } else if (token.equals("\\forall")) { |
693 | output.addToken("\u2200"); |
694 | } else if (token.equals("\\exists")) { |
695 | output.addToken("\u2203"); |
696 | } else if (token.equals("\\emptyset")) { |
697 | output.addToken("\u2205"); |
698 | } else if (token.equals("\\rightarrow")) { |
699 | output.addToken("\u2192"); |
700 | } else if (token.equals("\\Rightarrow")) { |
701 | output.addToken("\u21D2"); |
702 | } else if (token.equals("\\leftrightarrow")) { |
703 | output.addToken("\u2194"); |
704 | } else if (token.equals("\\Leftarrow")) { |
705 | output.addToken("\u21D0"); |
706 | } else if (token.equals("\\Leftrightarrow")) { |
707 | output.addToken("\u21D4"); |
708 | } else if (token.equals("\\langle")) { |
709 | output.addToken("\u2329"); |
710 | } else if (token.equals("\\rangle")) { |
711 | output.addToken("\u232A"); |
712 | } else if (token.equals("\\land") || token.equals("\\vee")) { |
713 | output.addToken("\u2227"); |
714 | } else if (token.equals("\\lor") || token.equals("\\wedge")) { |
715 | output.addToken("\u2228"); |
716 | } else if (token.equals("\\bar")) { |
717 | output.addToken("\u203E"); |
718 | } else if (token.equals("\\bigcap")) { |
719 | output.addToken("\u22C2"); |
720 | } else if (token.equals("\\cap")) { |
721 | output.addToken("\u2229"); |
722 | } else if (token.equals("\\bigcup")) { |
723 | output.addToken("\u22C3"); |
724 | } else if (token.equals("\\cup")) { |
725 | output.addToken("\u222A"); |
726 | } else if (token.equals("\\in")) { |
727 | output.addToken("\u2208"); |
728 | } else if (token.equals("\\notin")) { |
729 | output.addToken("\u2209"); |
730 | } else if (token.equals("\\Alpha")) { |
731 | output.addToken("\u0391"); |
732 | } else if (token.equals("\\alpha")) { |
733 | output.addToken("\u03B1"); |
734 | } else if (token.equals("\\Beta")) { |
735 | output.addToken("\u0392"); |
736 | } else if (token.equals("\\beta")) { |
737 | output.addToken("\u03B2"); |
738 | } else if (token.equals("\\Gamma")) { |
739 | output.addToken("\u0393"); |
740 | } else if (token.equals("\\gamma")) { |
741 | output.addToken("\u03B3"); |
742 | } else if (token.equals("\\Delta")) { |
743 | output.addToken("\u0394"); |
744 | } else if (token.equals("\\delta")) { |
745 | output.addToken("\u03B4"); |
746 | } else if (token.equals("\\Epslilon")) { |
747 | output.addToken("\u0395"); |
748 | } else if (token.equals("\\epsilon")) { |
749 | output.addToken("\u03B5"); |
750 | } else if (token.equals("\\Zeta")) { |
751 | output.addToken("\u0396"); |
752 | } else if (token.equals("\\zeta")) { |
753 | output.addToken("\u03B6"); |
754 | } else if (token.equals("\\Eta")) { |
755 | output.addToken("\u0397"); |
756 | } else if (token.equals("\\eta")) { |
757 | output.addToken("\u03B7"); |
758 | } else if (token.equals("\\Theta")) { |
759 | output.addToken("\u0398"); |
760 | } else if (token.equals("\\theta")) { |
761 | output.addToken("\u03B8"); |
762 | } else if (token.equals("\\Iota")) { |
763 | output.addToken("\u0399"); |
764 | } else if (token.equals("\\iota")) { |
765 | output.addToken("\u03B9"); |
766 | } else if (token.equals("\\Kappa")) { |
767 | output.addToken("\u039A"); |
768 | } else if (token.equals("\\kappa")) { |
769 | output.addToken("\u03BA"); |
770 | } else if (token.equals("\\Lamda")) { |
771 | output.addToken("\u039B"); |
772 | } else if (token.equals("\\lamda")) { |
773 | output.addToken("\u03BB"); |
774 | } else if (token.equals("\\Mu")) { |
775 | output.addToken("\u039C"); |
776 | } else if (token.equals("\\mu")) { |
777 | output.addToken("\u03BC"); |
778 | } else if (token.equals("\\Nu")) { |
779 | output.addToken("\u039D"); |
780 | } else if (token.equals("\\nu")) { |
781 | output.addToken("\u03BD"); |
782 | } else if (token.equals("\\Xi")) { |
783 | output.addToken("\u039E"); |
784 | } else if (token.equals("\\xi")) { |
785 | output.addToken("\u03BE"); |
786 | } else if (token.equals("\\Omikron")) { |
787 | output.addToken("\u039F"); |
788 | } else if (token.equals("\\omikron")) { |
789 | output.addToken("\u03BF"); |
790 | } else if (token.equals("\\Pi")) { |
791 | output.addToken("\u03A0"); |
792 | } else if (token.equals("\\pi")) { |
793 | output.addToken("\u03C0"); |
794 | } else if (token.equals("\\Rho")) { |
795 | output.addToken("\u03A1"); |
796 | } else if (token.equals("\\rho")) { |
797 | output.addToken("\u03C1"); |
798 | } else if (token.equals("\\Sigma")) { |
799 | output.addToken("\u03A3"); |
800 | } else if (token.equals("\\sigma")) { |
801 | output.addToken("\u03C3"); |
802 | } else if (token.equals("\\Tau")) { |
803 | output.addToken("\u03A4"); |
804 | } else if (token.equals("\\tau")) { |
805 | output.addToken("\u03C4"); |
806 | } else if (token.equals("\\Upsilon")) { |
807 | output.addToken("\u03A5"); |
808 | } else if (token.equals("\\upsilon")) { |
809 | output.addToken("\u03C5"); |
810 | } else if (token.equals("\\Phi")) { |
811 | output.addToken("\u03A6"); |
812 | } else if (token.equals("\\phi")) { |
813 | output.addToken("\u03C6"); |
814 | } else if (token.equals("\\Chi")) { |
815 | output.addToken("\u03A6"); |
816 | } else if (token.equals("\\chi")) { |
817 | output.addToken("\u03C7"); |
818 | } else if (token.equals("\\Psi")) { |
819 | output.addToken("\u03A8"); |
820 | } else if (token.equals("\\psi")) { |
821 | output.addToken("\u03C8"); |
822 | } else if (token.equals("\\Omega")) { |
823 | output.addToken("\u03A9"); |
824 | } else if (token.equals("\\omega")) { |
825 | output.addToken("\u03C9"); |
826 | } else if (token.equals("\\subset")) { |
827 | output.addToken("\u2282"); |
828 | } else if (token.equals("\\supset")) { |
829 | output.addToken("\u2283"); |
830 | } else if (token.equals("\\subseteq")) { |
831 | output.addToken("\u2286"); |
832 | } else if (token.equals("\\supseteq")) { |
833 | output.addToken("\u2287"); |
834 | } else if (token.equals("\\{")) { |
835 | output.addToken("{"); |
836 | } else if (token.equals("\\}")) { |
837 | output.addToken("}"); |
838 | } else if (token.equals("\\&")) { |
839 | output.addToken("&"); |
840 | } else if (token.equals("\\ ")) { |
841 | output.addWs(" "); |
842 | } else if (token.equals("\\S")) { |
843 | output.addToken("\u00A7"); |
844 | } else if (token.equals("\\tt")) { |
845 | // ignore |
846 | } else if (token.equals("\\tiny")) { |
847 | // ignore |
848 | } else if (token.equals("\\nonumber")) { |
849 | // ignore |
850 | } else if (token.equals("\\LaTeX")) { |
851 | output.addToken("LaTeX"); |
852 | } else if (token.equals("\\vdash")) { |
853 | output.addToken("\u22A2"); |
854 | } else if (token.equals("\\dashv")) { |
855 | output.addToken("\u22A3"); |
856 | } else if (token.equals("\\times")) { |
857 | output.addToken("\u00D7"); |
858 | } else if (token.equals("~")) { |
859 | output.addToken("\u00A0"); |
860 | } else if (token.equals("\\quad")) { |
861 | // output.addWs("\u2000"); |
862 | output.addWs(" "); |
863 | } else if (token.equals("\\qquad")) { |
864 | // output.addWs("\u2000\u2000"); |
865 | output.addWs(" "); |
866 | } else if (token.equals("\\,")) { |
867 | // output.addWs("\u2009"); |
868 | output.addWs(" "); |
869 | } else if (token.equals("\\neg") || token.equals("\\not")) { |
870 | output.addToken("\u00AC"); |
871 | } else if (token.equals("\\bot")) { |
872 | output.addToken("\u22A5"); |
873 | } else if (token.equals("\\top")) { |
874 | output.addToken("\u22A4"); |
875 | } else if (token.equals("''") || token.equals("\\grqq")) { |
876 | output.addToken("\u201D"); |
877 | } else if (token.equals("``") || token.equals("\\glqq")) { |
878 | skipWhitespace = true; |
879 | output.addToken("\u201E"); |
880 | } else if (token.equals("\\ldots")) { |
881 | output.addToken("..."); |
882 | } else if (token.equals("\\cdots")) { |
883 | output.addToken("\u00B7\u00B7\u00B7"); |
884 | } else if (token.equals("\\hdots")) { |
885 | output.addToken("\u00B7\u00B7\u00B7"); |
886 | } else if (token.equals("\\vdots")) { |
887 | output.addToken("\u2807"); |
888 | } else if (token.equals("\\overline")) { // TODO 20101018 m31: we assume set complement |
889 | output.addToken("\u2201"); |
890 | } else if (token.startsWith("\\")) { |
891 | addWarning(LatexErrorCodes.COMMAND_NOT_SUPPORTED_CODE, |
892 | LatexErrorCodes.COMMAND_NOT_SUPPORTED_TEXT + token, tokenBegin, tokenEnd); |
893 | } else { |
894 | if (mathfrak) { |
895 | mathfrak(token); |
896 | } else if (mathbb) { |
897 | mathbb(token); |
898 | } else if (emph) { |
899 | emph(token); |
900 | } else if (bold) { |
901 | bold(token); |
902 | } else { |
903 | if (isWs(token)) { |
904 | output.addWs(token); |
905 | } else { |
906 | output.addToken(token); |
907 | } |
908 | } |
909 | } |
910 | } |
911 | |
912 | /** |
913 | * Write token chars in mathbb mode. |
914 | * |
915 | * @param token Chars to write. |
916 | */ |
917 | private void emph(final String token) { |
918 | if (isWs(token)) { |
919 | output.addWs(Latex2UnicodeSpecials.transform2Emph(token)); |
920 | } else { |
921 | output.addToken(Latex2UnicodeSpecials.transform2Emph(token)); |
922 | } |
923 | } |
924 | |
925 | /** |
926 | * Write token chars in mathbb mode. |
927 | * |
928 | * @param token Chars to write. |
929 | */ |
930 | private void mathbb(final String token) { |
931 | for (int i = 0; i < token.length(); i++) { |
932 | final char c = token.charAt(i); |
933 | switch (c) { |
934 | case 'C': output.addToken("\u2102"); |
935 | break; |
936 | case 'H': output.addToken("\u210D"); |
937 | break; |
938 | case 'N': output.addToken("\u2115"); |
939 | break; |
940 | case 'P': output.addToken("\u2119"); |
941 | break; |
942 | case 'Q': output.addToken("\u211A"); |
943 | break; |
944 | case 'R': output.addToken("\u211D"); |
945 | break; |
946 | case 'Z': output.addToken("\u2124"); |
947 | break; |
948 | default: |
949 | if (Character.isWhitespace(c)) { |
950 | output.addWs("" + c); |
951 | } else { |
952 | output.addToken("" + c); |
953 | } |
954 | } |
955 | } |
956 | } |
957 | |
958 | private boolean isWs(final String token) { |
959 | return token == null || token.trim().length() == 0; |
960 | } |
961 | |
962 | /** |
963 | * Write token chars in mathfrak mode. |
964 | * |
965 | * @param token Chars to write. |
966 | */ |
967 | private void mathfrak(final String token) { |
968 | if (isWs(token)) { |
969 | output.addWs(Latex2UnicodeSpecials.transform2Mathfrak(token)); |
970 | } else { |
971 | output.addToken(Latex2UnicodeSpecials.transform2Mathfrak(token)); |
972 | } |
973 | } |
974 | |
975 | /** |
976 | * Write token in bold mode. |
977 | * |
978 | * @param token Chars to write. |
979 | */ |
980 | private void bold(final String token) { |
981 | if (isWs(token)) { |
982 | output.addWs(Latex2UnicodeSpecials.transform2Bold(token)); |
983 | } else { |
984 | output.addToken(Latex2UnicodeSpecials.transform2Bold(token)); |
985 | } |
986 | } |
987 | |
988 | /** |
989 | * Print end of line. |
990 | */ |
991 | private final void println() { |
992 | output.println(); |
993 | } |
994 | |
995 | /** |
996 | * Reads a single character and does not change the reading |
997 | * position. |
998 | * |
999 | * @return character read, if there are no more chars |
1000 | * <code>-1</code> is returned |
1001 | */ |
1002 | protected final int getChar() { |
1003 | return input.getChar(); |
1004 | } |
1005 | |
1006 | /** |
1007 | * Reads a single character and increments the reading position |
1008 | * by one. |
1009 | * |
1010 | * @return character read, if there are no more chars |
1011 | * <code>-1</code> is returned |
1012 | */ |
1013 | protected final int read() { |
1014 | return input.read(); |
1015 | } |
1016 | |
1017 | /** |
1018 | * Read until end of line. |
1019 | * |
1020 | * @return Characters read. |
1021 | */ |
1022 | protected final String readln() { |
1023 | StringBuffer result = new StringBuffer(); |
1024 | int c; |
1025 | while (TextInput.EOF != (c = read())) { |
1026 | if (c == '\n') { |
1027 | break; |
1028 | } |
1029 | result.append((char) c); |
1030 | } |
1031 | return result.toString(); |
1032 | } |
1033 | |
1034 | /** |
1035 | * Are there still any characters to read? |
1036 | * |
1037 | * @return Anything left for reading further? |
1038 | */ |
1039 | public final boolean eof() { |
1040 | return input.isEmpty(); |
1041 | } |
1042 | |
1043 | /** |
1044 | * Convert character position into row and column information. |
1045 | * |
1046 | * @param absolutePosition Find this character position. |
1047 | * @return Row and column information. |
1048 | */ |
1049 | public SourcePosition getAbsoluteSourcePosition(final int absolutePosition) { |
1050 | return ((SubTextInput) inputStack.get(0)).getPosition(absolutePosition); |
1051 | } |
1052 | |
1053 | /** |
1054 | * Add warning message. |
1055 | * |
1056 | * @param code Message code. |
1057 | * @param message Message. |
1058 | * @param from Absolute character position of problem start. |
1059 | * @param to Absolute character position of problem end. |
1060 | */ |
1061 | private void addWarning(final int code, final String message, final int from, final int to) { |
1062 | finder.addWarning(code, message, getAbsoluteSourcePosition(from), |
1063 | getAbsoluteSourcePosition(to)); |
1064 | } |
1065 | |
1066 | |
1067 | |
1068 | } |