1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
2 | * |
3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
4 | * |
5 | * "Hilbert II" is free software; you can redistribute |
6 | * it and/or modify it under the terms of the GNU General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | */ |
15 | package org.qedeq.kernel.xml.tracker; |
16 | |
17 | import java.io.File; |
18 | import java.io.FileInputStream; |
19 | import java.io.IOException; |
20 | import java.io.InputStream; |
21 | import java.io.Reader; |
22 | import java.util.ArrayList; |
23 | import java.util.HashMap; |
24 | import java.util.List; |
25 | import java.util.Map; |
26 | |
27 | import javax.xml.parsers.ParserConfigurationException; |
28 | import javax.xml.parsers.SAXParser; |
29 | import javax.xml.parsers.SAXParserFactory; |
30 | |
31 | import org.qedeq.base.io.IoUtility; |
32 | import org.qedeq.base.io.SourceArea; |
33 | import org.qedeq.base.io.SourcePosition; |
34 | import org.qedeq.base.io.TextInput; |
35 | import org.qedeq.base.trace.Trace; |
36 | import org.qedeq.base.utility.Enumerator; |
37 | import org.qedeq.kernel.xml.handler.common.SimpleHandler; |
38 | import org.xml.sax.Attributes; |
39 | import org.xml.sax.InputSource; |
40 | import org.xml.sax.SAXException; |
41 | import org.xml.sax.XMLReader; |
42 | |
43 | import com.sun.syndication.io.XmlReader; |
44 | |
45 | /** |
46 | * Parser for XML files. Search simple XPath within an XML file. |
47 | * Usage: |
48 | * <pre> |
49 | * final XPathLocationParser parser = new XPathLocationParser(xpath); |
50 | * parser.parse(xmlFile, original); |
51 | * return parser.getFind(); |
52 | * |
53 | * </pre> |
54 | * |
55 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
56 | * exception is thrown if the path is not found. |
57 | * |
58 | * @author Michael Meyling |
59 | */ |
60 | public final class XPathLocationParser extends SimpleHandler { |
61 | |
62 | /** This class. */ |
63 | private static final Class CLASS = XPathLocationParser.class; |
64 | |
65 | /** Namespaces feature id (http://xml.org/sax/features/namespaces). */ |
66 | private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; |
67 | |
68 | /** Validation feature id (http://xml.org/sax/features/validation). */ |
69 | private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; |
70 | |
71 | /** SAX parser. */ |
72 | private final XMLReader reader; |
73 | |
74 | /** Search for this simple XPath expression. */ |
75 | private final SimpleXPath find; |
76 | |
77 | /** We are currently at this position. */ |
78 | private SimpleXPath current; |
79 | |
80 | /** We are currently at this position if we count only occurrences and take every element. The |
81 | * elements are all named "*". */ |
82 | private SimpleXPath summary; |
83 | |
84 | /** This object is parsed. */ |
85 | private File xmlFile; |
86 | |
87 | /** Element stack. */ |
88 | private final List elements; |
89 | |
90 | /** Current stack level. */ |
91 | private int level; |
92 | |
93 | /** Add this to found position. */ |
94 | private SourcePosition startDelta; |
95 | |
96 | /** Add this to found position. */ |
97 | private SourcePosition endDelta; |
98 | |
99 | /** Here the found element starts. */ |
100 | private SourcePosition start; |
101 | |
102 | /** Here the found element ends. */ |
103 | private SourcePosition end; |
104 | |
105 | /** |
106 | * Search simple XPath within an XML file. |
107 | * |
108 | * @param xmlFile Search this file. |
109 | * @param xpath Search for this simple XPath. |
110 | * @return Source position information. |
111 | * @throws ParserConfigurationException Parser configuration problem. |
112 | * @throws SAXException XML problem. |
113 | * @throws IOException IO problem. |
114 | */ |
115 | // public static final SimpleXPath getXPathLocation(final File xmlFile, final String xpath) |
116 | // throws ParserConfigurationException, SAXException, IOException { |
117 | // return getXPathLocation(xmlFile, new SimpleXPath(xpath)); |
118 | // } |
119 | |
120 | /** |
121 | * Search simple XPath within an XML file. |
122 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
123 | * exception is thrown if the path is not found. |
124 | * |
125 | * @param address Name description (for example URL) for this XML file. |
126 | * @param xpath Search for this simple XPath. |
127 | * @param startDelta Skip position (relative to location start). Could be |
128 | * <code>null</code>. |
129 | * @param endDelta Mark until this column (relative to location start). Could |
130 | * be <code>null</code>. |
131 | * @param file Search this file. |
132 | * @return Source position information. |
133 | */ |
134 | public static SourceArea findSourceArea(final String address, final SimpleXPath xpath, |
135 | final SourcePosition startDelta, final SourcePosition endDelta, final File file) { |
136 | final String method = "findSourceArea(String, SimpleXPath, SourcePosition, SourcePosition, File)"; |
137 | final String message = "Could not find \"" + xpath + "\" within \"" + file + "\""; |
138 | try { |
139 | XPathLocationParser parser = new XPathLocationParser(xpath, startDelta, endDelta); |
140 | parser.parse(file); |
141 | if (parser.getStart() == null || parser.getEnd() == null) { |
142 | Trace.fatal(CLASS, method, message, null); |
143 | if (Boolean.TRUE.toString().equalsIgnoreCase( |
144 | System.getProperty("qedeq.test.xmlLocationFailures"))) { |
145 | throw new RuntimeException(message); |
146 | } |
147 | return new SourceArea(address); |
148 | } |
149 | return new SourceArea(address, parser.getStart(), parser.getEnd()); |
150 | } catch (ParserConfigurationException e) { |
151 | Trace.fatal(CLASS, method, message, e); |
152 | } catch (SAXException e) { |
153 | Trace.fatal(CLASS, method, message, e); |
154 | } catch (IOException e) { |
155 | Trace.fatal(CLASS, method, message, e); |
156 | } catch (RuntimeException e) { |
157 | Trace.fatal(CLASS, method, message, e); |
158 | } |
159 | return null; |
160 | } |
161 | |
162 | /** |
163 | * Search simple XPath within an XML file. |
164 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
165 | * exception is thrown if the path is not found. |
166 | * |
167 | * @param file Search this file. |
168 | * @param xpath Search for this simple XPath. |
169 | * @return Source position information. |
170 | */ |
171 | public static SourceArea findSourceArea(final File file, final SimpleXPath xpath) { |
172 | return findSourceArea(file.toString(), xpath, null, null, file); |
173 | } |
174 | |
175 | /** |
176 | * Constructor. |
177 | * |
178 | * @param xpath XML file path. |
179 | * @param startDelta Skip position (relative to location start). Could be |
180 | * <code>null</code>. |
181 | * @param endDelta Mark until this column (relative to location start). Could |
182 | * be <code>null</code>. |
183 | * @throws ParserConfigurationException Severe parser configuration problem. |
184 | * @throws SAXException XML problem. |
185 | */ |
186 | public XPathLocationParser(final SimpleXPath xpath, final SourcePosition startDelta, |
187 | final SourcePosition endDelta) throws ParserConfigurationException, |
188 | SAXException { |
189 | super(); |
190 | |
191 | this.find = xpath; |
192 | this.startDelta = startDelta; |
193 | this.endDelta = endDelta; |
194 | elements = new ArrayList(20); |
195 | level = 0; |
196 | |
197 | final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory"); |
198 | if (factoryImpl == null) { |
199 | System.setProperty("javax.xml.parsers.SAXParserFactory", |
200 | "org.apache.xerces.jaxp.SAXParserFactoryImpl"); |
201 | } |
202 | SAXParserFactory factory = SAXParserFactory.newInstance(); |
203 | factory.setNamespaceAware(false); |
204 | factory.setValidating(false); |
205 | |
206 | factory.setFeature(NAMESPACES_FEATURE_ID, false); |
207 | factory.setFeature(VALIDATION_FEATURE_ID, false); |
208 | |
209 | final SAXParser parser = factory.newSAXParser(); |
210 | |
211 | reader = parser.getXMLReader(); |
212 | |
213 | // set parser features |
214 | reader.setFeature(NAMESPACES_FEATURE_ID, false); |
215 | reader.setFeature(VALIDATION_FEATURE_ID, false); |
216 | } |
217 | |
218 | /** |
219 | * Parses XML file. |
220 | * |
221 | * @param file Parse this input. |
222 | * @throws IOException Technical problem occurred. |
223 | * @throws SAXException Parsing problem. |
224 | */ |
225 | public final void parse(final File file) throws IOException, SAXException { |
226 | xmlFile = file; |
227 | elements.clear(); |
228 | level = 0; |
229 | InputStream stream = null; |
230 | try { |
231 | current = new SimpleXPath(); |
232 | summary = new SimpleXPath(); |
233 | reader.setContentHandler(this); |
234 | // LATER 20110316 m31: this seems to have no effect, the error handler don't get the Exceptions! Why? |
235 | // reader.setErrorHandler(new ErrorHandler() { |
236 | // |
237 | // public void error(SAXParseException exception) throws SAXException { |
238 | // exception.printStackTrace(System.out); |
239 | //// throw exception; |
240 | // } |
241 | // |
242 | // public void fatalError(SAXParseException exception) { |
243 | // exception.printStackTrace(System.out); |
244 | // } |
245 | // |
246 | // public void warning(SAXParseException exception) |
247 | // throws SAXException { |
248 | // exception.printStackTrace(System.out); |
249 | // }}); |
250 | stream = new FileInputStream(file); |
251 | reader.parse(new InputSource(stream)); |
252 | } catch (XPathLocationFoundException e) { |
253 | // this is what we want!!! |
254 | } catch (SAXException e) { |
255 | Trace.trace(CLASS, this, "parse", e); |
256 | throw e; |
257 | } finally { |
258 | IoUtility.close(stream); |
259 | } |
260 | } |
261 | |
262 | /* |
263 | * (non-Javadoc) |
264 | * |
265 | * @see org.xml.sax.ContentHandler#endDocument() |
266 | */ |
267 | public void endDocument() throws SAXException { |
268 | elements.clear(); |
269 | level = 0; |
270 | } |
271 | |
272 | /* |
273 | * (non-Javadoc) |
274 | * |
275 | * @see org.xml.sax.ContentHandler#startDocument() |
276 | */ |
277 | public void startDocument() throws SAXException { |
278 | elements.clear(); |
279 | level = 0; |
280 | } |
281 | |
282 | /* |
283 | * (non-Javadoc) |
284 | * |
285 | * @see org.xml.sax.ContentHandler#characters(char[], int, int) |
286 | */ |
287 | public void characters(final char[] ch, final int start, final int length) throws SAXException { |
288 | // nothing to do |
289 | } |
290 | |
291 | /* |
292 | * (non-Javadoc) |
293 | * |
294 | * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) |
295 | */ |
296 | public void ignorableWhitespace(final char[] ch, final int start, final int length) |
297 | throws SAXException { |
298 | // nothing to do |
299 | } |
300 | |
301 | /* |
302 | * (non-Javadoc) |
303 | * |
304 | * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) |
305 | */ |
306 | public void endPrefixMapping(final String prefix) throws SAXException { |
307 | // nothing to do |
308 | } |
309 | |
310 | /* |
311 | * (non-Javadoc) |
312 | * |
313 | * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) |
314 | */ |
315 | public void skippedEntity(final String name) throws SAXException { |
316 | // nothing to do |
317 | } |
318 | |
319 | /* |
320 | * (non-Javadoc) |
321 | * |
322 | * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) |
323 | */ |
324 | public void processingInstruction(final String target, final String data) throws SAXException { |
325 | // nothing to do |
326 | } |
327 | |
328 | /* |
329 | * (non-Javadoc) |
330 | * |
331 | * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) |
332 | */ |
333 | public void startPrefixMapping(final String prefix, final String uri) throws SAXException { |
334 | // nothing to do |
335 | } |
336 | |
337 | /* |
338 | * (non-Javadoc) |
339 | * |
340 | * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, |
341 | * java.lang.String, org.xml.sax.Attributes) |
342 | */ |
343 | public void startElement(final String namespaceURI, final String localName, final String qName, |
344 | final Attributes atts) throws SAXException { |
345 | final String method = "startElement(String, String, Attributes)"; |
346 | level++; |
347 | summary.addElement("*", addOccurence("*")); |
348 | current.addElement(qName, addOccurence(qName)); |
349 | |
350 | // LATER mime 20070109: just for testing is the next if |
351 | /* |
352 | if (find.matchesElementsBegining(current, summary)) { |
353 | System.out.println("part match " + qName); |
354 | xml.setRow(locator.getLineNumber()); |
355 | xml.setColumn(locator.getColumnNumber()); |
356 | try { |
357 | xml.skipBackToBeginOfXmlTag(); |
358 | } catch (RuntimeException e) { |
359 | Trace.trace(this, method, e); |
360 | } |
361 | find.setStartLocation(new SourcePosition(xml.getLocalAddress(), xml.getRow(), xml |
362 | .getColumn())); |
363 | } |
364 | */ |
365 | if (getLocator() == null) { |
366 | throw new SAXException("Locator unexpectedly null"); |
367 | } |
368 | if (find.matchesElements(current, summary)) { |
369 | Trace.trace(CLASS, this, method, "matching elements"); |
370 | Trace.param(CLASS, this, method, qName, current); |
371 | TextInput xml = null; |
372 | Reader xmlReader = null; |
373 | try { |
374 | xmlReader = new XmlReader(xmlFile); |
375 | xml = new TextInput(xmlReader); |
376 | // LATER mime 20080608: old code |
377 | // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding())); |
378 | } catch (IOException io) { |
379 | Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io); |
380 | if (getLocator() == null) { |
381 | throw new SAXException("Locator unexpectedly null"); |
382 | } |
383 | // at least we can set the current location as find location |
384 | start = new SourcePosition( |
385 | getLocator().getLineNumber(), getLocator().getColumnNumber()); |
386 | return; |
387 | } |
388 | try { |
389 | xml.setRow(getLocator().getLineNumber()); |
390 | xml.setColumn(getLocator().getColumnNumber()); |
391 | if (startDelta != null) { |
392 | xml.skipWhiteSpace(); |
393 | final String cdata = "<![CDATA["; |
394 | final String read = xml.readString(cdata.length()); |
395 | final int cdataLength = (cdata.equals(read) ? cdata.length() : 0); |
396 | start = addDelta(xml, cdataLength, startDelta); |
397 | end = addDelta(xml, cdataLength, endDelta); |
398 | return; |
399 | } |
400 | try { |
401 | xml.skipBackToBeginOfXmlTag(); |
402 | } catch (RuntimeException e) { |
403 | Trace.trace(CLASS, this, method, e); |
404 | } |
405 | start = new SourcePosition(xml.getRow(), xml.getColumn()); |
406 | if (find.getAttribute() != null) { |
407 | xml.read(); // skip < |
408 | xml.readNextXmlName(); // must be element name |
409 | String tag; |
410 | do { |
411 | xml.skipWhiteSpace(); |
412 | int row = xml.getRow(); |
413 | int col = xml.getColumn(); |
414 | try { |
415 | tag = xml.readNextXmlName(); |
416 | } catch (IllegalArgumentException e) { |
417 | break; |
418 | } |
419 | if (tag.equals(find.getAttribute())) { |
420 | start = new SourcePosition(row, col); |
421 | xml.readNextAttributeValue(); |
422 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
423 | throw new XPathLocationFoundException(); |
424 | } |
425 | try { |
426 | xml.readNextAttributeValue(); |
427 | } catch (IllegalArgumentException e) { |
428 | break; |
429 | } |
430 | } while (true); |
431 | // did we found the attribute? if not we point to the complete xml tag |
432 | if (end == null) { |
433 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
434 | throw new XPathLocationFoundException(); |
435 | } |
436 | } |
437 | } finally { |
438 | IoUtility.close(xml); // findbugs |
439 | } |
440 | } |
441 | } |
442 | |
443 | /** |
444 | * Set text input position according to locator and add delta plus tag length. |
445 | * |
446 | * @param xml This is the stream we work on. |
447 | * @param cdataLength Length of extra skip data. |
448 | * @param delta Add this delta |
449 | * @return Resulting source position. |
450 | */ |
451 | private SourcePosition addDelta(final TextInput xml, final int cdataLength, |
452 | final SourcePosition delta) { |
453 | xml.setRow(getLocator().getLineNumber()); |
454 | xml.setColumn(getLocator().getColumnNumber()); |
455 | if (delta.getRow() == 1 && cdataLength > 0) { |
456 | xml.addColumn(cdataLength + delta.getColumn() - 1); |
457 | } else { |
458 | xml.addPosition(delta); |
459 | } |
460 | return new SourcePosition(xml.getRow(), xml.getColumn()); |
461 | } |
462 | |
463 | /** |
464 | * Add element occurrence. |
465 | * |
466 | * @param name Element that occurred. |
467 | * @return Number of occurrences including this one. |
468 | */ |
469 | private int addOccurence(final String name) { |
470 | while (level < elements.size()) { |
471 | elements.remove(elements.size() - 1); |
472 | } |
473 | while (level > elements.size()) { |
474 | elements.add(new HashMap()); |
475 | } |
476 | final Map levelMap = (Map) elements.get(level - 1); |
477 | final Enumerator counter; |
478 | if (levelMap.containsKey(name)) { |
479 | counter = (Enumerator) levelMap.get(name); |
480 | counter.increaseNumber(); |
481 | } else { |
482 | counter = new Enumerator(1); |
483 | levelMap.put(name, counter); |
484 | } |
485 | return counter.getNumber(); |
486 | } |
487 | |
488 | /* |
489 | * (non-Javadoc) |
490 | * |
491 | * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, |
492 | * java.lang.String) |
493 | */ |
494 | public void endElement(final String namespaceURI, final String localName, final String qName) |
495 | throws SAXException { |
496 | final String method = "endElement(String, String, Attributes)"; |
497 | level--; |
498 | if (getLocator() == null) { |
499 | current.deleteLastElement(); |
500 | summary.deleteLastElement(); |
501 | throw new SAXException("Locator unexpectly null"); |
502 | } |
503 | if (find.matchesElements(current, summary) && find.getAttribute() == null |
504 | && startDelta == null) { |
505 | TextInput xml = null; |
506 | Reader xmlReader = null; |
507 | try { |
508 | xmlReader = new XmlReader(xmlFile); |
509 | xml = new TextInput(xmlReader); |
510 | // LATER mime 20080608: old code |
511 | // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding())); |
512 | } catch (IOException io) { |
513 | Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io); |
514 | if (getLocator() == null) { |
515 | throw new SAXException("Locator unexpectedly null"); |
516 | } |
517 | // at least we can set the current location as find location |
518 | start = new SourcePosition(getLocator().getLineNumber(), |
519 | getLocator().getColumnNumber()); |
520 | return; |
521 | } finally { |
522 | IoUtility.close(xmlReader); |
523 | } |
524 | try { |
525 | xml.setRow(getLocator().getLineNumber()); |
526 | xml.setColumn(getLocator().getColumnNumber()); |
527 | // xml.skipForwardToEndOfXmlTag(); // LATER mime 20050810: remove? comment in? |
528 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
529 | throw new XPathLocationFoundException(); |
530 | } finally { |
531 | IoUtility.close(xml); // findbugs |
532 | } |
533 | } |
534 | current.deleteLastElement(); |
535 | summary.deleteLastElement(); |
536 | } |
537 | |
538 | /** |
539 | * Get starting source position of found element. Could be <code>null</code>. |
540 | * |
541 | * @return Start position. |
542 | */ |
543 | private SourcePosition getStart() { |
544 | return start; |
545 | } |
546 | |
547 | /** |
548 | * Get ending source position of found element. Could be <code>null</code>. |
549 | * |
550 | * @return End position. |
551 | */ |
552 | private SourcePosition getEnd() { |
553 | return end; |
554 | } |
555 | |
556 | } |