Clover Coverage Report
Coverage timestamp: Fri May 24 2013 13:47:27 UTC
../../../../../img/srcFileCovDistChart8.png 62% of files have more coverage
107   268   49   15.29
54   159   0.46   7
7     7  
1    
 
  LatexMathParser       Line # 74 107 49 79.2% 0.7916667
 
  (11)
 
1    /* This file is part of the project "Hilbert II" - http://www.qedeq.org
2    *
3    * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
4    *
5    * "Hilbert II" is free software; you can redistribute
6    * it and/or modify it under the terms of the GNU General Public
7    * License as published by the Free Software Foundation; either
8    * version 2 of the License, or (at your option) any later version.
9    *
10    * This program is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13    * GNU General Public License for more details.
14    */
15   
16    package org.qedeq.kernel.bo.parser;
17   
18    import java.util.ArrayList;
19    import java.util.List;
20   
21    import org.qedeq.base.trace.Trace;
22   
23    /*
24    * TODO mime 20080118: refactor
25    *
26    * Whitespace LaTeX form, could be eaten
27    * \t
28    * \r
29    * \n
30    * \\
31    * \\,
32    * &
33    * \\\\
34    * \\par
35    * \\quad
36    * \\qquad
37    *
38    * Separator only one allowed, before and after only whitespace is possible
39    * ,
40    * |
41    * $$
42    * Separator should be read as tokens.
43    *
44    * Problem: If some atom like is followed by "(" it should be taken as an
45    * (function) operator. But if we start with readToken we don't see the "("
46    * character.
47    *
48    * Problem: Could whitespace be recognized?
49    * Translating whitespace tokens into spaces is not easy, one has to know the
50    * end of the whitespace token.
51    * Possible solution:
52    * function read token (LaTeX specific)
53    * skip real whitespace (" ", "\t", "\r", "\n")
54    * read char
55    * case char
56    * "\\" read characters or numbers (check LaTeX Syntax)
57    * "{", "}", "(", ")" are also allowed
58    * resulting string is token
59    * LaTeX command definition modifies above:
60    * Die meisten LATEX-Befehle haben eines der beiden folgenden Formate: Entweder sie beginnen
61    * mit einem Backslash (\) und haben dann einen nur aus Buchstaben bestehenden Namen, der durch
62    * ein oder mehrere Leerzeichen oder durch ein nachfolgendes Sonderzeichen oder eine Ziffer beendet
63    * wird; oder sie bestehen aus einem Backslash und genau einem Sonderzeichen oder einer Ziffer.
64    * Gross- und Kleinbuchstaben haben auch in Befehlsnamen verschiedene Bedeutung. Wenn man nach
65    * einem Befehlsnamen eine Leerstelle erhalten will, muss man "{}" zur Beendigung des Befehlsnamens
66    * oder einen eigenen Befehl f\u00fcr die Leerstelle verwenden.
67    */
68   
69    /**
70    * Parse LaTeX term or formula data into {@link org.qedeq.kernel.bo.parser.Term}s.
71    *
72    * @author Michael Meyling
73    */
 
74    public class LatexMathParser extends MathParser {
75   
76    /** This class. */
77    private static final Class CLASS = LatexMathParser.class;
78   
79    /** Characters with special LaTeX meaning. */
80    private static final String SPECIALCHARACTERS = "(),{}\\~%$&";
81   
82    /** Counter for token whitespace lines. */
83    private int tokenWhiteSpaceLines;
84   
85    /**
86    * Constructor.
87    *
88    */
 
89  11 toggle public LatexMathParser() {
90  11 super();
91    }
92   
 
93  658 toggle protected final String readToken() {
94  658 final String method = "readToken()";
95  658 Trace.begin(CLASS, this, method);
96  658 StringBuffer token = new StringBuffer();
97  658 tokenWhiteSpaceLines = 0;
98  658 try {
99  658 do {
100  836 tokenWhiteSpaceLines += readPureWhitespace();
101  836 if (tokenWhiteSpaceLines > 1) {
102  0 break;
103    }
104  836 if (eof()) {
105  32 if (token.length() <= 0) {
106  32 token = null;
107    }
108  32 break;
109    }
110  804 final int c = getChar();
111  804 if (Character.isDigit((char) c)) {
112  8 token.append((char) readChar());
113  8 if (Character.isDigit((char) getChar())) {
114  0 continue;
115    }
116  8 break;
117    }
118  796 if (SPECIALCHARACTERS.indexOf(c) >= 0) {
119  697 switch (c) {
120  0 case '&':
121  0 case '%':
122  22 case '~':
123  134 case '$': // TODO mime 20060504 or break in this case?
124  156 readChar();
125  156 continue;
126  252 case '\\':
127  252 final String t = readBackslashToken();
128  252 if (t.equals(" ") || t.equals("quad") || t.equals("qquad")) {
129  34 continue;
130    }
131  218 token.append(t);
132  218 if ('_' == getChar() || '^' == getChar()) {
133  0 token.append((char) readChar());
134  0 continue;
135    }
136  218 break;
137  0 case '{':
138  0 readChar();
139  0 token.append("(");
140  0 break;
141  0 case '}':
142  0 readChar();
143  0 token.append(")");
144  0 break;
145  289 default:
146  289 readChar();
147  289 token.append((char) c);
148  289 if ('_' == getChar() || '^' == getChar()) {
149  0 token.append((char) readChar());
150  0 continue;
151    }
152    }
153  507 break;
154    }
155  99 token.append((char) readChar());
156  99 if ('_' == getChar() || '^' == getChar()) {
157  8 token.append((char) readChar());
158  8 continue;
159    }
160  91 break;
161    /*
162    String operator = null;
163    markPosition();
164    while (!eof() && (Character.isLetterOrDigit((char) getChar()) || '_' == getChar()
165    || '^' == getChar())) {
166    token.append((char) readChar());
167    if (null != getOperator(token.toString())) {
168    operator = token.toString();
169    clearMark();
170    markPosition();
171    }
172    }
173    if (operator != null) {
174    rewindPosition();
175    token.setLength(0);
176    token.append(operator);
177    } else {
178    clearMark();
179    }
180    */
181  198 } while (!eof());
182  658 Trace.param(CLASS, this, method, "return token", token);
183  658 return (token != null ? token.toString() : null);
184    } finally {
185  658 Trace.end(CLASS, this, method);
186    }
187    }
188   
 
189  252 toggle private String readBackslashToken() {
190  252 final String method = "readBackslashToken()";
191  252 Trace.begin(CLASS, this, method);
192  252 if (getChar() != '\\') {
193  0 throw new IllegalArgumentException("\\ expected");
194    }
195  252 readChar(); // read \
196  252 if (eof()) {
197  0 Trace.param(CLASS, this, method, "return", null);
198  0 Trace.end(CLASS, this, method);
199  0 return null;
200    }
201  252 if (!Character.isLetter((char) getChar())) {
202  34 Trace.param(CLASS, this, method, "return", (char) getChar());
203  34 Trace.end(CLASS, this, method);
204  34 return "" + ((char) readChar());
205    }
206  218 final StringBuffer buffer = new StringBuffer();
207  218 do {
208  880 buffer.append((char) readChar());
209  880 } while (!eof() && Character.isLetter((char) getChar()));
210  218 Trace.param(CLASS, this, method, "return", buffer.toString());
211  218 Trace.end(CLASS, this, method);
212  218 return buffer.toString();
213    }
214   
 
215  836 toggle private int readPureWhitespace() {
216  836 int lines = 0;
217  1130 while (getChar() != -1 && Character.isWhitespace((char) getChar())) {
218  294 if ('\n' == (char) getChar()) {
219  60 lines++;
220    }
221  294 readChar();
222    }
223  836 return lines;
224    }
225   
 
226  177 toggle protected final Operator getOperator(final String token) {
227  177 Operator result = null;
228  177 if (token == null) {
229  0 return null;
230    }
231  3249 for (int i = 0; i < getOperators().size(); i++) {
232  3156 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
233  84 result = (Operator) getOperators().get(i);
234  84 break;
235    }
236    }
237  177 if (result != null) {
238  84 return result;
239    }
240    // mime 20080725: no operator found -> return subject variable
241  93 if (SPECIALCHARACTERS.indexOf(token) < 0) {
242  0 return new Operator(token, null, null, "VAR", token, 200, 0, 0);
243    }
244  93 return null;
245    }
246   
 
247  140 toggle protected final List getOperators(final String token) {
248  140 final List result = new ArrayList();
249  140 if (token == null) {
250  0 return result;
251    }
252  3780 for (int i = 0; i < getOperators().size(); i++) {
253  3640 if (token.equals(((Operator) getOperators().get(i)).getStartSymbol())) {
254  102 result.add(getOperators().get(i));
255    }
256    }
257    // mime 20080725: no operator found -> return subject variable
258  140 if (result.size() <= 0 && SPECIALCHARACTERS.indexOf(token) < 0) {
259  0 result.add(new Operator(token, null, null, "VAR", token, 200, 0, 0));
260    }
261  140 return result;
262    }
263   
 
264  162 toggle protected boolean eot(final String token) {
265  162 return token == null || token.trim().length() == 0;
266    }
267   
268    }