001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2011, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015
016 package org.qedeq.kernel.bo.service.unicode;
017
018 import org.qedeq.base.io.TextInput;
019 import org.qedeq.base.trace.Trace;
020 import org.qedeq.kernel.bo.parser.MementoTextInput;
021
022 /**
023 * Transform LaTeX into QEDEQ format.
024 *
025 * @author Michael Meyling
026 */
027 public final class LatexTextParser {
028
029 /** This class. */
030 private static final Class CLASS = LatexTextParser.class;
031
032 /** These characters get a special treatment in LaTeX. */
033 private static final String SPECIALCHARACTERS = "(),{}\\~%$&";
034
035 /** This is our input stream .*/
036 private MementoTextInput input;
037
038 /** Herein goes our output. */
039 private StringBuffer output;
040
041 /**
042 * Parse LaTeX text into QEDEQ module string.
043 *
044 * @param input Parse this input.
045 * @return QEDEQ module string.
046 */
047 public static final String transform(final String input) {
048 final LatexTextParser parser = new LatexTextParser(input);
049 return parser.parse();
050 }
051
052 /**
053 * Constructor.
054 *
055 * @param input Parse this input.
056 */
057 private LatexTextParser(final String input) {
058 this.input = new MementoTextInput(new TextInput(input));
059 this.output = new StringBuffer();
060 }
061
062 /**
063 * Do parsing.
064 *
065 * @return QEDEQ module string.
066 */
067 private String parse() {
068 while (!eof()) {
069 final String token = readToken();
070 if ("\\begin".equals(token)) {
071 final String curly = readCurlyBraceContents();
072 if ("eqnarray".equals(curly)) {
073 printMathTillEnd(curly);
074 } else if ("eqnarray*".equals(curly)) {
075 printMathTillEnd(curly);
076 } else if ("equation".equals(curly)) {
077 printMathTillEnd(curly);
078 } else if ("equation*".equals(curly)) {
079 printMathTillEnd(curly);
080 } else {
081 print(token + "{" + curly + "}");
082 }
083 } else if ("$$".equals(token)) {
084 println();
085 println("<MATH>");
086 printMathTillToken(token);
087 println("\\,</MATH>");
088 println();
089 } else if ("$".equals(token)) {
090 print("<MATH>");
091 printMathTillToken(token);
092 print("\\,</MATH>");
093 } else {
094 print(token);
095 }
096 }
097 return output.toString();
098 }
099
100 private void printMathTillEnd(final String curly) {
101 final StringBuffer buffer = new StringBuffer();
102 do {
103 final String item = readToken();
104 if ("\\end".equals(item)) {
105 final String curly2 = readCurlyBraceContents();
106 if (curly.equals(curly2)) {
107 break;
108 }
109 buffer.append(item + "{" + curly2 + "}");
110 } else {
111 buffer.append(item);
112 }
113 } while (true);
114
115 /*
116 println("\\begin{" + curly + "}");
117 println(buffer);
118 println("\\end{" + curly + "}");
119 println();
120 */
121 printMath(buffer);
122 }
123
124 /**
125 * Print math content till <code>token</code> occurs.
126 *
127 * @param token Terminator token.
128 */
129 private void printMathTillToken(final String token) {
130 final StringBuffer buffer = new StringBuffer();
131 do {
132 final String item = readToken();
133 if (token.equals(item)) {
134 break;
135 } else {
136 buffer.append(item);
137 }
138 } while (true);
139 printMath(buffer);
140 }
141
142 /**
143 * Print math content.
144 *
145 * @param buffer This should be printed as mathematical content.
146 */
147 private void printMath(final StringBuffer buffer) {
148 print(buffer.toString());
149 }
150
151 /**
152 * Read next token from input stream.
153 *
154 * @return Read token.
155 */
156 protected final String readToken() {
157 final String method = "readToken()";
158 Trace.begin(CLASS, this, method);
159 StringBuffer token = new StringBuffer();
160 try {
161 do {
162 if (eof()) {
163 if (token.length() <= 0) {
164 token = null;
165 }
166 break;
167 }
168 final int c = getChar();
169 if (Character.isDigit((char) c)) {
170 token.append((char) readChar());
171 if (Character.isDigit((char) getChar())) {
172 continue;
173 }
174 break;
175 }
176 if (Character.isLetter((char) c)) {
177 token.append((char) readChar());
178 if (Character.isLetter((char) getChar())) {
179 continue;
180 }
181 break;
182 }
183 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
184 switch (c) {
185 case '&':
186 case '%':
187 case '{':
188 case '}':
189 case '~':
190 token.append((char) readChar());
191 break;
192 case '$':
193 token.append((char) readChar());
194 if ('$' == getChar()) {
195 continue;
196 }
197 break;
198 case '\\':
199 final String t = readBackslashToken();
200 token.append(t);
201 if ('_' == getChar() || '^' == getChar()) {
202 token.append((char) readChar());
203 continue;
204 }
205 break;
206 default:
207 readChar();
208 token.append((char) c);
209 }
210 break;
211 }
212 token.append((char) readChar());
213 if ('_' == getChar() || '^' == getChar()) {
214 token.append((char) readChar());
215 continue;
216 }
217 break;
218 } while (!eof());
219 Trace.param(CLASS, this, method, "Read token", token);
220 return (token != null ? token.toString() : null);
221 } finally {
222 Trace.end(CLASS, this, method);
223 }
224 }
225
226 /**
227 * Get token that starts with a backlash. The backslash itself is removed from the token.
228 *
229 * @return Token (without backslash).
230 */
231 private String readBackslashToken() {
232 final String method = "readBackslashToken()";
233 Trace.begin(CLASS, this, method);
234 if (getChar() != '\\') {
235 throw new IllegalArgumentException("\\ expected");
236 }
237 readChar(); // read \
238 if (eof()) {
239 Trace.param(CLASS, this, method, "return", null);
240 Trace.end(CLASS, this, method);
241 return null;
242 }
243 if (!Character.isLetter((char) getChar())) {
244 Trace.param(CLASS, this, method, "return", (char) getChar());
245 Trace.end(CLASS, this, method);
246 return "\\" + ((char) readChar());
247 }
248 final StringBuffer buffer = new StringBuffer("\\");
249 do {
250 buffer.append((char) readChar());
251 } while (!eof() && Character.isLetter((char) getChar()));
252 Trace.param(CLASS, this, method, "return", buffer.toString());
253 Trace.end(CLASS, this, method);
254 return buffer.toString();
255 }
256
257 /**
258 * Read contents that is within { .. }.
259 *
260 * @return Contents.
261 */
262 private String readCurlyBraceContents() {
263 final String first = readToken();
264 if (!"{".equals(first)) {
265 throw new IllegalArgumentException("\"{\" expected, but was: \"" + first + "\"");
266 }
267 final StringBuffer buffer = new StringBuffer();
268 String next;
269 int level = 1;
270 while (level > 0) {
271 next = readToken();
272 if ("{".equals(next)) {
273 level++;
274 } else if ("}".equals(next)) {
275 level--;
276 }
277 if (level <= 0) {
278 break;
279 }
280 buffer.append(next);
281 }
282 return buffer.toString();
283 }
284
285 /**
286 * Print <code>line</code> to output stream.
287 *
288 * @param line Print this.
289 */
290 private final void print(final String line) {
291 output.append(line);
292 }
293
294 /**
295 * Print end of line.
296 */
297 private final void println() {
298 println("");
299 }
300
301 /**
302 * Print <code>line</code> and start new line to output stream.
303 *
304 * @param line Print this.
305 */
306 private final void println(final String line) {
307 print(line);
308 print("\n");
309 }
310
311 /**
312 * Read next token from input but don't move reading position.
313 *
314 * @return Token read, is <code>null</code> if end of data reached.
315 */
316 public final String getToken() {
317 markPosition();
318 final String result = readToken();
319 rewindPosition();
320 return result;
321 }
322
323 /**
324 * Remember current position.
325 */
326 protected final void markPosition() {
327 input.markPosition();
328 }
329
330 /**
331 * Rewind to previous marked position. Also clears the mark.
332 *
333 * @return Current position before pop.
334 */
335 protected final long rewindPosition() {
336 return input.rewindPosition();
337 }
338
339 /**
340 * Forget last remembered position.
341 */
342 protected final void clearMark() {
343 input.clearMark();
344 }
345
346 /**
347 * Get byte position.
348 *
349 * @return Position.
350 */
351 protected long getPosition() {
352 return input.getPosition();
353 }
354
355 /**
356 * Reads a single character and does not change the reading
357 * position.
358 *
359 * @return character read, if there are no more chars
360 * <code>Character.MAX_VALUE</code> is returned
361 */
362 protected final int getChar() {
363 return input.getChar();
364 }
365
366 /**
367 * Reads a single character and increments the reading position
368 * by one.
369 *
370 * @return character read, if there are no more chars
371 * <code>Character.MAX_VALUE</code> is returned
372 */
373 protected final int readChar() {
374 return input.read();
375 }
376
377 /**
378 * Are there still any characters to read?
379 *
380 * @return Anything left for reading further?
381 */
382 public final boolean eof() {
383 return input.isEmpty();
384 }
385
386 /**
387 * Get rewind stack size.
388 *
389 * @return Rewind stack size.
390 */
391 public final int getRewindStackSize() {
392 return input.getRewindStackSize();
393 }
394
395 }
|