001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015 package org.qedeq.kernel.xml.tracker;
016
017 import java.io.File;
018 import java.io.FileInputStream;
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.util.ArrayList;
023 import java.util.HashMap;
024 import java.util.List;
025 import java.util.Map;
026
027 import javax.xml.parsers.ParserConfigurationException;
028 import javax.xml.parsers.SAXParser;
029 import javax.xml.parsers.SAXParserFactory;
030
031 import org.qedeq.base.io.IoUtility;
032 import org.qedeq.base.io.SourceArea;
033 import org.qedeq.base.io.SourcePosition;
034 import org.qedeq.base.io.TextInput;
035 import org.qedeq.base.trace.Trace;
036 import org.qedeq.base.utility.Enumerator;
037 import org.qedeq.kernel.xml.handler.common.SimpleHandler;
038 import org.xml.sax.Attributes;
039 import org.xml.sax.InputSource;
040 import org.xml.sax.SAXException;
041 import org.xml.sax.XMLReader;
042
043 import com.sun.syndication.io.XmlReader;
044
045 /**
046 * Parser for XML files. Search simple XPath within an XML file.
047 * Usage:
048 * <pre>
049 * final XPathLocationParser parser = new XPathLocationParser(xpath);
050 * parser.parse(xmlFile, original);
051 * return parser.getFind();
052 *
053 * </pre>
054 *
055 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
056 * exception is thrown if the path is not found.
057 *
058 * @author Michael Meyling
059 */
060 public final class XPathLocationParser extends SimpleHandler {
061
062 /** This class. */
063 private static final Class CLASS = XPathLocationParser.class;
064
065 /** Namespaces feature id (http://xml.org/sax/features/namespaces). */
066 private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces";
067
068 /** Validation feature id (http://xml.org/sax/features/validation). */
069 private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
070
071 /** SAX parser. */
072 private final XMLReader reader;
073
074 /** Search for this simple XPath expression. */
075 private final SimpleXPath find;
076
077 /** We are currently at this position. */
078 private SimpleXPath current;
079
080 /** We are currently at this position if we count only occurrences and take every element. The
081 * elements are all named "*". */
082 private SimpleXPath summary;
083
084 /** This object is parsed. */
085 private File xmlFile;
086
087 /** Element stack. */
088 private final List elements;
089
090 /** Current stack level. */
091 private int level;
092
093 /** Add this to found position. */
094 private SourcePosition startDelta;
095
096 /** Add this to found position. */
097 private SourcePosition endDelta;
098
099 /** Here the found element starts. */
100 private SourcePosition start;
101
102 /** Here the found element ends. */
103 private SourcePosition end;
104
105 /**
106 * Search simple XPath within an XML file.
107 *
108 * @param xmlFile Search this file.
109 * @param xpath Search for this simple XPath.
110 * @return Source position information.
111 * @throws ParserConfigurationException Parser configuration problem.
112 * @throws SAXException XML problem.
113 * @throws IOException IO problem.
114 */
115 // public static final SimpleXPath getXPathLocation(final File xmlFile, final String xpath)
116 // throws ParserConfigurationException, SAXException, IOException {
117 // return getXPathLocation(xmlFile, new SimpleXPath(xpath));
118 // }
119
120 /**
121 * Search simple XPath within an XML file.
122 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
123 * exception is thrown if the path is not found.
124 *
125 * @param address Name description (for example URL) for this XML file.
126 * @param xpath Search for this simple XPath.
127 * @param startDelta Skip position (relative to location start). Could be
128 * <code>null</code>.
129 * @param endDelta Mark until this column (relative to location start). Could
130 * be <code>null</code>.
131 * @param file Search this file.
132 * @return Source position information.
133 */
134 public static SourceArea findSourceArea(final String address, final SimpleXPath xpath,
135 final SourcePosition startDelta, final SourcePosition endDelta, final File file) {
136 final String method = "findSourceArea(String, SimpleXPath, SourcePosition, SourcePosition, File)";
137 final String message = "Could not find \"" + xpath + "\" within \"" + file + "\"";
138 try {
139 XPathLocationParser parser = new XPathLocationParser(xpath, startDelta, endDelta);
140 parser.parse(file);
141 if (parser.getStart() == null || parser.getEnd() == null) {
142 Trace.fatal(CLASS, method, message, null);
143 if (Boolean.TRUE.toString().equalsIgnoreCase(
144 System.getProperty("qedeq.test.xmlLocationFailures"))) {
145 throw new RuntimeException(message);
146 }
147 return new SourceArea(address);
148 }
149 return new SourceArea(address, parser.getStart(), parser.getEnd());
150 } catch (ParserConfigurationException e) {
151 Trace.fatal(CLASS, method, message, e);
152 } catch (SAXException e) {
153 Trace.fatal(CLASS, method, message, e);
154 } catch (IOException e) {
155 Trace.fatal(CLASS, method, message, e);
156 } catch (RuntimeException e) {
157 Trace.fatal(CLASS, method, message, e);
158 }
159 return null;
160 }
161
162 /**
163 * Search simple XPath within an XML file.
164 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
165 * exception is thrown if the path is not found.
166 *
167 * @param file Search this file.
168 * @param xpath Search for this simple XPath.
169 * @return Source position information.
170 */
171 public static SourceArea findSourceArea(final File file, final SimpleXPath xpath) {
172 return findSourceArea(file.toString(), xpath, null, null, file);
173 }
174
175 /**
176 * Constructor.
177 *
178 * @param xpath XML file path.
179 * @param startDelta Skip position (relative to location start). Could be
180 * <code>null</code>.
181 * @param endDelta Mark until this column (relative to location start). Could
182 * be <code>null</code>.
183 * @throws ParserConfigurationException Severe parser configuration problem.
184 * @throws SAXException XML problem.
185 */
186 public XPathLocationParser(final SimpleXPath xpath, final SourcePosition startDelta,
187 final SourcePosition endDelta) throws ParserConfigurationException,
188 SAXException {
189 super();
190
191 this.find = xpath;
192 this.startDelta = startDelta;
193 this.endDelta = endDelta;
194 elements = new ArrayList(20);
195 level = 0;
196
197 final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory");
198 if (factoryImpl == null) {
199 System.setProperty("javax.xml.parsers.SAXParserFactory",
200 "org.apache.xerces.jaxp.SAXParserFactoryImpl");
201 }
202 SAXParserFactory factory = SAXParserFactory.newInstance();
203 factory.setNamespaceAware(false);
204 factory.setValidating(false);
205
206 factory.setFeature(NAMESPACES_FEATURE_ID, false);
207 factory.setFeature(VALIDATION_FEATURE_ID, false);
208
209 final SAXParser parser = factory.newSAXParser();
210
211 reader = parser.getXMLReader();
212
213 // set parser features
214 reader.setFeature(NAMESPACES_FEATURE_ID, false);
215 reader.setFeature(VALIDATION_FEATURE_ID, false);
216 }
217
218 /**
219 * Parses XML file.
220 *
221 * @param file Parse this input.
222 * @throws IOException Technical problem occurred.
223 * @throws SAXException Parsing problem.
224 */
225 public final void parse(final File file) throws IOException, SAXException {
226 xmlFile = file;
227 elements.clear();
228 level = 0;
229 InputStream stream = null;
230 try {
231 current = new SimpleXPath();
232 summary = new SimpleXPath();
233 reader.setContentHandler(this);
234 // LATER 20110316 m31: this seems to have no effect, the error handler don't get the Exceptions! Why?
235 // reader.setErrorHandler(new ErrorHandler() {
236 //
237 // public void error(SAXParseException exception) throws SAXException {
238 // exception.printStackTrace(System.out);
239 //// throw exception;
240 // }
241 //
242 // public void fatalError(SAXParseException exception) {
243 // exception.printStackTrace(System.out);
244 // }
245 //
246 // public void warning(SAXParseException exception)
247 // throws SAXException {
248 // exception.printStackTrace(System.out);
249 // }});
250 stream = new FileInputStream(file);
251 reader.parse(new InputSource(stream));
252 } catch (LocationFoundException e) {
253 // this is what we want!!!
254 } catch (SAXException e) {
255 Trace.trace(CLASS, this, "parse", e);
256 throw e;
257 } finally {
258 IoUtility.close(stream);
259 }
260 }
261
262 /*
263 * (non-Javadoc)
264 *
265 * @see org.xml.sax.ContentHandler#endDocument()
266 */
267 public void endDocument() throws SAXException {
268 elements.clear();
269 level = 0;
270 }
271
272 /*
273 * (non-Javadoc)
274 *
275 * @see org.xml.sax.ContentHandler#startDocument()
276 */
277 public void startDocument() throws SAXException {
278 elements.clear();
279 level = 0;
280 }
281
282 /*
283 * (non-Javadoc)
284 *
285 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
286 */
287 public void characters(final char[] ch, final int start, final int length) throws SAXException {
288 }
289
290 /*
291 * (non-Javadoc)
292 *
293 * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
294 */
295 public void ignorableWhitespace(final char[] ch, final int start, final int length)
296 throws SAXException {
297 }
298
299 /*
300 * (non-Javadoc)
301 *
302 * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
303 */
304 public void endPrefixMapping(final String prefix) throws SAXException {
305 }
306
307 /*
308 * (non-Javadoc)
309 *
310 * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
311 */
312 public void skippedEntity(final String name) throws SAXException {
313 }
314
315 /*
316 * (non-Javadoc)
317 *
318 * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
319 */
320 public void processingInstruction(final String target, final String data) throws SAXException {
321 }
322
323 /*
324 * (non-Javadoc)
325 *
326 * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
327 */
328 public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
329 }
330
331 /*
332 * (non-Javadoc)
333 *
334 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
335 * java.lang.String, org.xml.sax.Attributes)
336 */
337 public void startElement(final String namespaceURI, final String localName, final String qName,
338 final Attributes atts) throws SAXException {
339 final String method = "startElement(String, String, Attributes)";
340 level++;
341 summary.addElement("*", addOccurence("*"));
342 current.addElement(qName, addOccurence(qName));
343
344 // LATER mime 20070109: just for testing is the next if
345 /*
346 if (find.matchesElementsBegining(current, summary)) {
347 System.out.println("part match " + qName);
348 xml.setRow(locator.getLineNumber());
349 xml.setColumn(locator.getColumnNumber());
350 try {
351 xml.skipBackToBeginOfXmlTag();
352 } catch (RuntimeException e) {
353 Trace.trace(this, method, e);
354 }
355 find.setStartLocation(new SourcePosition(xml.getLocalAddress(), xml.getRow(), xml
356 .getColumn()));
357 }
358 */
359 if (getLocator() == null) {
360 throw new SAXException("Locator unexpectedly null");
361 }
362 if (find.matchesElements(current, summary)) {
363 Trace.trace(CLASS, this, method, "matching elements");
364 Trace.param(CLASS, this, method, qName, current);
365 TextInput xml = null;
366 Reader xmlReader = null;
367 try {
368 xmlReader = new XmlReader(xmlFile);
369 xml = new TextInput(xmlReader);
370 // LATER mime 20080608: old code
371 // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
372 } catch (IOException io) {
373 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
374 if (getLocator() == null) {
375 throw new SAXException("Locator unexpectedly null");
376 }
377 // at least we can set the current location as find location
378 start = new SourcePosition(
379 getLocator().getLineNumber(), getLocator().getColumnNumber());
380 return;
381 }
382 try {
383 xml.setRow(getLocator().getLineNumber());
384 xml.setColumn(getLocator().getColumnNumber());
385 if (startDelta != null) {
386 xml.skipWhiteSpace();
387 final String cdata = "<![CDATA[";
388 final String read = xml.readString(cdata.length());
389 final int cdataLength = (cdata.equals(read) ? cdata.length() : 0);
390 start = addDelta(xml, cdataLength, startDelta);
391 end = addDelta(xml, cdataLength, endDelta);
392 return;
393 }
394 try {
395 xml.skipBackToBeginOfXmlTag();
396 } catch (RuntimeException e) {
397 Trace.trace(CLASS, this, method, e);
398 }
399 start = new SourcePosition(xml.getRow(), xml.getColumn());
400 if (find.getAttribute() != null) {
401 xml.read(); // skip <
402 xml.readNextXmlName(); // must be element name
403 String tag;
404 do {
405 xml.skipWhiteSpace();
406 int row = xml.getRow();
407 int col = xml.getColumn();
408 try {
409 tag = xml.readNextXmlName();
410 } catch (IllegalArgumentException e) {
411 break;
412 }
413 if (tag.equals(find.getAttribute())) {
414 start = new SourcePosition(row, col);
415 xml.readNextAttributeValue();
416 end = new SourcePosition(xml.getRow(), xml.getColumn());
417 throw new LocationFoundException();
418 }
419 try {
420 xml.readNextAttributeValue();
421 } catch (IllegalArgumentException e) {
422 break;
423 }
424 } while (true);
425 // did we found the attribute? if not we point to the complete xml tag
426 if (end == null) {
427 end = new SourcePosition(xml.getRow(), xml.getColumn());
428 throw new LocationFoundException();
429 }
430 }
431 } finally {
432 IoUtility.close(xml); // findbugs
433 }
434 }
435 }
436
437 /**
438 * Set text input position according to locator and add delta plus tag length.
439 *
440 * @param xml This is the stream we work on.
441 * @param cdataLength Length of extra skip data.
442 * @param delta Add this delta
443 * @return Resulting source position.
444 */
445 private SourcePosition addDelta(final TextInput xml, final int cdataLength,
446 final SourcePosition delta) {
447 xml.setRow(getLocator().getLineNumber());
448 xml.setColumn(getLocator().getColumnNumber());
449 if (delta.getRow() == 1 && cdataLength > 0) {
450 xml.addColumn(cdataLength + delta.getColumn() - 1);
451 } else {
452 xml.addPosition(delta);
453 }
454 return new SourcePosition(xml.getRow(), xml.getColumn());
455 }
456
457 /**
458 * Add element occurrence.
459 *
460 * @param name Element that occurred.
461 * @return Number of occurrences including this one.
462 */
463 private int addOccurence(final String name) {
464 while (level < elements.size()) {
465 elements.remove(elements.size() - 1);
466 }
467 while (level > elements.size()) {
468 elements.add(new HashMap());
469 }
470 final Map levelMap = (Map) elements.get(level - 1);
471 final Enumerator counter;
472 if (levelMap.containsKey(name)) {
473 counter = (Enumerator) levelMap.get(name);
474 counter.increaseNumber();
475 } else {
476 counter = new Enumerator(1);
477 levelMap.put(name, counter);
478 }
479 return counter.getNumber();
480 }
481
482 /*
483 * (non-Javadoc)
484 *
485 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String,
486 * java.lang.String)
487 */
488 public void endElement(final String namespaceURI, final String localName, final String qName)
489 throws SAXException {
490 final String method = "endElement(String, String, Attributes)";
491 level--;
492 if (getLocator() == null) {
493 current.deleteLastElement();
494 summary.deleteLastElement();
495 throw new SAXException("Locator unexpectly null");
496 }
497 if (find.matchesElements(current, summary) && find.getAttribute() == null
498 && startDelta == null) {
499 TextInput xml = null;
500 Reader xmlReader = null;
501 try {
502 xmlReader = new XmlReader(xmlFile);
503 xml = new TextInput(xmlReader);
504 // LATER mime 20080608: old code
505 // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
506 } catch (IOException io) {
507 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
508 if (getLocator() == null) {
509 throw new SAXException("Locator unexpectedly null");
510 }
511 // at least we can set the current location as find location
512 start = new SourcePosition(getLocator().getLineNumber(),
513 getLocator().getColumnNumber());
514 return;
515 } finally {
516 IoUtility.close(xmlReader);
517 }
518 try {
519 xml.setRow(getLocator().getLineNumber());
520 xml.setColumn(getLocator().getColumnNumber());
521 // xml.skipForwardToEndOfXmlTag(); // LATER mime 20050810: remove? comment in?
522 end = new SourcePosition(xml.getRow(), xml.getColumn());
523 throw new LocationFoundException();
524 } finally {
525 IoUtility.close(xml); // findbugs
526 }
527 }
528 current.deleteLastElement();
529 summary.deleteLastElement();
530 }
531
532 /**
533 * Get starting source position of found element. Could be <code>null</code>.
534 *
535 * @return Start position.
536 */
537 private SourcePosition getStart() {
538 return start;
539 }
540
541 /**
542 * Get ending source position of found element. Could be <code>null</code>.
543 *
544 * @return End position.
545 */
546 private SourcePosition getEnd() {
547 return end;
548 }
549
550 }
|