Clover Coverage Report
Coverage timestamp: Sa Aug 2 2008 13:56:27 CEST
../../../../../img/srcFileCovDistChart8.png 47% of files have more coverage
108   285   49   13,5
54   163   0,45   8
8     6,12  
1    
 
  LatexMathParser       Line # 78 108 49 78,2% 0.7823529
 
  (11)
 
1    /* $Id: LatexMathParser.java,v 1.1 2008/07/26 07:58:30 m31 Exp $
2    *
3    * This file is part of the project "Hilbert II" - http://www.qedeq.org
4    *
5    * Copyright 2000-2008, Michael Meyling <mime@qedeq.org>.
6    *
7    * "Hilbert II" is free software; you can redistribute
8    * it and/or modify it under the terms of the GNU General Public
9    * License as published by the Free Software Foundation; either
10    * version 2 of the License, or (at your option) any later version.
11    *
12    * This program is distributed in the hope that it will be useful,
13    * but WITHOUT ANY WARRANTY; without even the implied warranty of
14    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15    * GNU General Public License for more details.
16    */
17   
18    package org.qedeq.kernel.bo.parser;
19   
20    import java.util.ArrayList;
21    import java.util.List;
22   
23    import org.qedeq.base.io.TextInput;
24    import org.qedeq.base.trace.Trace;
25   
26    /*
27    * TODO mime 20080118: refactor
28    *
29    * Whitespace LaTeX form, could be eaten
30    * \t
31    * \r
32    * \n
33    * \\
34    * \\,
35    * &
36    * \\\\
37    * \\par
38    * \\quad
39    * \\qquad
40    *
41    * Separator only one allowed, before and after only whitespace is possible
42    * ,
43    * |
44    * $$
45    * Separator should be read as tokens.
46    *
47    * Problem: If some atom like is followed by "(" it should be taken as an
48    * (function) operator. But if we start with readToken we don't see the "("
49    * character.
50    *
51    * Problem: Could whitespace be recognized?
52    * Translating whitespace tokens into spaces is not easy, one has to know the
53    * end of the whitespace token.
54    * Possible solution:
55    * function read token (LaTeX specific)
56    * skip real whitespace (" ", "\t", "\r", "\n")
57    * read char
58    * case char
59    * "\\" read characters or numbers (check LaTeX Syntax)
60    * "{", "}", "(", ")" are also allowed
61    * resulting string is token
62    * LaTeX command definition modifies above:
63    * Die meisten LATEX-Befehle haben eines der beiden folgenden Formate: Entweder sie beginnen
64    * mit einem Backslash (\) und haben dann einen nur aus Buchstaben bestehenden Namen, der durch
65    * ein oder mehrere Leerzeichen oder durch ein nachfolgendes Sonderzeichen oder eine Ziffer beendet
66    * wird; oder sie bestehen aus einem Backslash und genau einem Sonderzeichen oder einer Ziffer.
67    * Gross- und Kleinbuchstaben haben auch in Befehlsnamen verschiedene Bedeutung. Wenn man nach
68    * einem Befehlsnamen eine Leerstelle erhalten will, muss man "{}" zur Beendigung des Befehlsnamens
69    * oder einen eigenen Befehl f\u00fcr die Leerstelle verwenden.
70    */
71   
72    /**
73    * Parse LaTeX term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s.
74    *
75    * @version $Revision: 1.1 $
76    * @author Michael Meyling
77    */
 
78    public class LatexMathParser extends MathParser {
79   
80    /** This class. */
81    private static final Class CLASS = LatexMathParser.class;
82   
83    /** Characters with special LaTeX meaning. */
84    private static final String SPECIALCHARACTERS = "(),{}\\~%$&";
85   
86    /** Counter for token whitespace lines. */
87    private int tokenWhiteSpaceLines;
88   
89    /**
90    * Constructor.
91    *
92    * @param input Parse this input.
93    * @param operators List of operators.
94    */
 
95  11 toggle public LatexMathParser(final TextInput input, final List operators) {
96  11 super(new MementoTextInput(input), operators);
97    }
98   
99   
100    /**
101    * Constructor.
102    *
103    * @param buffer Parse this input.
104    * @param operators List of operators.
105    */
 
106  0 toggle public LatexMathParser(final StringBuffer buffer, final List operators) {
107  0 this(new TextInput(buffer), operators);
108    }
109   
 
110  658 toggle protected final String readToken() {
111  658 final String method = "readToken()";
112  658 Trace.begin(CLASS, this, method);
113  658 StringBuffer token = new StringBuffer();
114  658 tokenWhiteSpaceLines = 0;
115  658 try {
116  658 do {
117  836 tokenWhiteSpaceLines += readPureWhitespace();
118  836 if (tokenWhiteSpaceLines > 1) {
119  0 break;
120    }
121  836 if (eof()) {
122  32 if (token.length() <= 0) {
123  32 token = null;
124    }
125  32 break;
126    }
127  804 final int c = getChar();
128  804 if (Character.isDigit((char) c)) {
129  8 token.append((char) readChar());
130  8 if (Character.isDigit((char) getChar())) {
131  0 continue;
132    }
133  8 break;
134    }
135  796 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
136  697 switch (c) {
137  0 case '&':
138  0 case '%':
139  22 case '~':
140  134 case '$': // TODO mime 20060504 or break in this case?
141  156 readChar();
142  156 continue;
143  252 case '\\':
144  252 final String t = readBackslashToken();
145  252 if (t.equals(" ") || t.equals("quad") || t.equals("qquad")) {
146  34 continue;
147    }
148  218 token.append(t);
149  218 if ('_' == getChar() || '^' == getChar()) {
150  0 token.append((char) readChar());
151  0 continue;
152    }
153  218 break;
154  0 case '{':
155  0 readChar();
156  0 token.append("(");
157  0 break;
158  0 case '}':
159  0 readChar();
160  0 token.append(")");
161  0 break;
162  289 default:
163  289 readChar();
164  289 token.append((char) c);
165  289 if ('_' == getChar() || '^' == getChar()) {
166  0 token.append((char) readChar());
167  0 continue;
168    }
169    }
170  507 break;
171    }
172  99 token.append((char) readChar());
173  99 if ('_' == getChar() || '^' == getChar()) {
174  8 token.append((char) readChar());
175  8 continue;
176    }
177  91 break;
178    /*
179    String operator = null;
180    markPosition();
181    while (!eof() && (Character.isLetterOrDigit((char) getChar()) || '_' == getChar()
182    || '^' == getChar())) {
183    token.append((char) readChar());
184    if (null != getOperator(token.toString())) {
185    operator = token.toString();
186    clearMark();
187    markPosition();
188    }
189    }
190    if (operator != null) {
191    rewindPosition();
192    token.setLength(0);
193    token.append(operator);
194    } else {
195    clearMark();
196    }
197    */
198  198 } while (!eof());
199  658 Trace.param(CLASS, this, method, "return token", token);
200  658 return (token != null ? token.toString() : null);
201    } finally {
202  658 Trace.end(CLASS, this, method);
203    }
204    }
205   
 
206  252 toggle private String readBackslashToken() {
207  252 final String method = "readBackslashToken()";
208  252 Trace.begin(CLASS, this, method);
209  252 if (getChar() != '\\') {
210  0 throw new IllegalArgumentException("\\ expected");
211    }
212  252 readChar(); // read \
213  252 if (eof()) {
214  0 Trace.param(CLASS, this, method, "return", null);
215  0 Trace.end(CLASS, this, method);
216  0 return null;
217    }
218  252 if (!Character.isLetter((char) getChar())) {
219  34 Trace.param(CLASS, this, method, "return", (char) getChar());
220  34 Trace.end(CLASS, this, method);
221  34 return "" + ((char) readChar());
222    }
223  218 final StringBuffer buffer = new StringBuffer();
224  218 do {
225  880 buffer.append((char) readChar());
226  880 } while (!eof() && Character.isLetter((char) getChar()));
227  218 Trace.param(CLASS, this, method, "return", buffer.toString());
228  218 Trace.end(CLASS, this, method);
229  218 return buffer.toString();
230    }
231   
 
232  836 toggle private int readPureWhitespace() {
233  836 int lines = 0;
234  1130 while (getChar() != -1 && Character.isWhitespace((char) getChar())) {
235  294 if ('\n' == (char) getChar()) {
236  60 lines++;
237    }
238  294 readChar();
239    }
240  836 return lines;
241    }
242   
 
243  177 toggle protected final Operator getOperator(final String token) {
244  177 Operator result = null;
245  177 if (token == null) {
246  0 return null;
247    }
248  3249 for (int i = 0; i < getOperators().size(); i++) {
249  3156 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
250  84 result = (Operator) getOperators().get(i);
251  84 break;
252    }
253    }
254  177 if (result != null) {
255  84 return result;
256    }
257    // mime 20080725: no operator found -> return subject variable
258  93 if (SPECIALCHARACTERS.indexOf(token) < 0) {
259  0 return new Operator(token, null, null, "VAR", token, 200, 0, 0);
260    }
261  93 return null;
262    }
263   
 
264  140 toggle protected final List getOperators(final String token) {
265  140 final List result = new ArrayList();
266  140 if (token == null) {
267  0 return result;
268    }
269  3780 for (int i = 0; i < getOperators().size(); i++) {
270  3640 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
271  102 result.add(getOperators().get(i));
272    }
273    }
274    // mime 20080725: no operator found -> return subject variable
275  140 if (result.size() <= 0 && SPECIALCHARACTERS.indexOf(token) < 0) {
276  0 result.add(new Operator(token, null, null, "VAR", token, 200, 0, 0));
277    }
278  140 return result;
279    }
280   
 
281  162 toggle protected boolean eot(final String token) {
282  162 return token == null || token.trim().length() == 0;
283    }
284   
285    }