001 /* This file is part of the project "Hilbert II" - http://www.qedeq.org
002 *
003 * Copyright 2000-2013, Michael Meyling <mime@qedeq.org>.
004 *
005 * "Hilbert II" is free software; you can redistribute
006 * it and/or modify it under the terms of the GNU General Public
007 * License as published by the Free Software Foundation; either
008 * version 2 of the License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 */
015 package org.qedeq.kernel.xml.tracker;
016
017 import java.io.File;
018 import java.io.FileInputStream;
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.util.ArrayList;
023 import java.util.HashMap;
024 import java.util.List;
025 import java.util.Map;
026
027 import javax.xml.parsers.ParserConfigurationException;
028 import javax.xml.parsers.SAXParser;
029 import javax.xml.parsers.SAXParserFactory;
030
031 import org.qedeq.base.io.IoUtility;
032 import org.qedeq.base.io.SourceArea;
033 import org.qedeq.base.io.SourcePosition;
034 import org.qedeq.base.io.TextInput;
035 import org.qedeq.base.trace.Trace;
036 import org.qedeq.base.utility.Enumerator;
037 import org.qedeq.kernel.xml.handler.common.SimpleHandler;
038 import org.xml.sax.Attributes;
039 import org.xml.sax.InputSource;
040 import org.xml.sax.SAXException;
041 import org.xml.sax.XMLReader;
042
043 import com.sun.syndication.io.XmlReader;
044
045 /**
046 * Parser for XML files. Search simple XPath within an XML file.
047 * Usage:
048 * <pre>
049 * final XPathLocationParser parser = new XPathLocationParser(xpath);
050 * parser.parse(xmlFile, original);
051 * return parser.getFind();
052 *
053 * </pre>
054 *
055 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
056 * exception is thrown if the path is not found.
057 *
058 * @author Michael Meyling
059 */
060 public final class XPathLocationParser extends SimpleHandler {
061
062 /** This class. */
063 private static final Class CLASS = XPathLocationParser.class;
064
065 /** Namespaces feature id (http://xml.org/sax/features/namespaces). */
066 private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces";
067
068 /** Validation feature id (http://xml.org/sax/features/validation). */
069 private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
070
071 /** SAX parser. */
072 private final XMLReader reader;
073
074 /** Search for this simple XPath expression. */
075 private final SimpleXPath find;
076
077 /** We are currently at this position. */
078 private SimpleXPath current;
079
080 /** We are currently at this position if we count only occurrences and take every element. The
081 * elements are all named "*". */
082 private SimpleXPath summary;
083
084 /** This object is parsed. */
085 private File xmlFile;
086
087 /** Element stack. */
088 private final List elements;
089
090 /** Current stack level. */
091 private int level;
092
093 /** Add this to found position. */
094 private SourcePosition startDelta;
095
096 /** Add this to found position. */
097 private SourcePosition endDelta;
098
099 /** Here the found element starts. */
100 private SourcePosition start;
101
102 /** Here the found element ends. */
103 private SourcePosition end;
104
105 /**
106 * Search simple XPath within an XML file.
107 *
108 * @param xmlFile Search this file.
109 * @param xpath Search for this simple XPath.
110 * @return Source position information.
111 * @throws ParserConfigurationException Parser configuration problem.
112 * @throws SAXException XML problem.
113 * @throws IOException IO problem.
114 */
115 // public static final SimpleXPath getXPathLocation(final File xmlFile, final String xpath)
116 // throws ParserConfigurationException, SAXException, IOException {
117 // return getXPathLocation(xmlFile, new SimpleXPath(xpath));
118 // }
119
120 /**
121 * Search simple XPath within an XML file.
122 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
123 * exception is thrown if the path is not found.
124 *
125 * @param address Name description (for example URL) for this XML file.
126 * @param xpath Search for this simple XPath.
127 * @param startDelta Skip position (relative to location start). Could be
128 * <code>null</code>.
129 * @param endDelta Mark until this column (relative to location start). Could
130 * be <code>null</code>.
131 * @param file Search this file.
132 * @return Source position information.
133 */
134 public static SourceArea findSourceArea(final String address, final SimpleXPath xpath,
135 final SourcePosition startDelta, final SourcePosition endDelta, final File file) {
136 final String method = "findSourceArea(String, SimpleXPath, SourcePosition, SourcePosition, File)";
137 final String message = "Could not find \"" + xpath + "\" within \"" + file + "\"";
138 try {
139 XPathLocationParser parser = new XPathLocationParser(xpath, startDelta, endDelta);
140 parser.parse(file);
141 if (parser.getStart() == null || parser.getEnd() == null) {
142 Trace.fatal(CLASS, method, message, null);
143 if (Boolean.TRUE.toString().equalsIgnoreCase(
144 System.getProperty("qedeq.test.xmlLocationFailures"))) {
145 throw new RuntimeException(message);
146 }
147 return new SourceArea(address);
148 }
149 return new SourceArea(address, parser.getStart(), parser.getEnd());
150 } catch (ParserConfigurationException e) {
151 Trace.fatal(CLASS, method, message, e);
152 } catch (SAXException e) {
153 Trace.fatal(CLASS, method, message, e);
154 } catch (IOException e) {
155 Trace.fatal(CLASS, method, message, e);
156 } catch (RuntimeException e) {
157 Trace.fatal(CLASS, method, message, e);
158 }
159 return null;
160 }
161
162 /**
163 * Search simple XPath within an XML file.
164 * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
165 * exception is thrown if the path is not found.
166 *
167 * @param file Search this file.
168 * @param xpath Search for this simple XPath.
169 * @return Source position information.
170 */
171 public static SourceArea findSourceArea(final File file, final SimpleXPath xpath) {
172 return findSourceArea(file.toString(), xpath, null, null, file);
173 }
174
175 /**
176 * Constructor.
177 *
178 * @param xpath XML file path.
179 * @param startDelta Skip position (relative to location start). Could be
180 * <code>null</code>.
181 * @param endDelta Mark until this column (relative to location start). Could
182 * be <code>null</code>.
183 * @throws ParserConfigurationException Severe parser configuration problem.
184 * @throws SAXException XML problem.
185 */
186 public XPathLocationParser(final SimpleXPath xpath, final SourcePosition startDelta,
187 final SourcePosition endDelta) throws ParserConfigurationException,
188 SAXException {
189 super();
190
191 this.find = xpath;
192 this.startDelta = startDelta;
193 this.endDelta = endDelta;
194 elements = new ArrayList(20);
195 level = 0;
196
197 final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory");
198 if (factoryImpl == null) {
199 System.setProperty("javax.xml.parsers.SAXParserFactory",
200 "org.apache.xerces.jaxp.SAXParserFactoryImpl");
201 }
202 SAXParserFactory factory = SAXParserFactory.newInstance();
203 factory.setNamespaceAware(false);
204 factory.setValidating(false);
205
206 factory.setFeature(NAMESPACES_FEATURE_ID, false);
207 factory.setFeature(VALIDATION_FEATURE_ID, false);
208
209 final SAXParser parser = factory.newSAXParser();
210
211 reader = parser.getXMLReader();
212
213 // set parser features
214 reader.setFeature(NAMESPACES_FEATURE_ID, false);
215 reader.setFeature(VALIDATION_FEATURE_ID, false);
216 }
217
218 /**
219 * Parses XML file.
220 *
221 * @param file Parse this input.
222 * @throws IOException Technical problem occurred.
223 * @throws SAXException Parsing problem.
224 */
225 public final void parse(final File file) throws IOException, SAXException {
226 xmlFile = file;
227 elements.clear();
228 level = 0;
229 InputStream stream = null;
230 try {
231 current = new SimpleXPath();
232 summary = new SimpleXPath();
233 reader.setContentHandler(this);
234 // LATER 20110316 m31: this seems to have no effect, the error handler don't get the Exceptions! Why?
235 // reader.setErrorHandler(new ErrorHandler() {
236 //
237 // public void error(SAXParseException exception) throws SAXException {
238 // exception.printStackTrace(System.out);
239 //// throw exception;
240 // }
241 //
242 // public void fatalError(SAXParseException exception) {
243 // exception.printStackTrace(System.out);
244 // }
245 //
246 // public void warning(SAXParseException exception)
247 // throws SAXException {
248 // exception.printStackTrace(System.out);
249 // }});
250 stream = new FileInputStream(file);
251 reader.parse(new InputSource(stream));
252 } catch (LocationFoundException e) {
253 // this is what we want!!!
254 } catch (SAXException e) {
255 Trace.trace(CLASS, this, "parse", e);
256 throw e;
257 } finally {
258 IoUtility.close(stream);
259 }
260 }
261
262 /*
263 * (non-Javadoc)
264 *
265 * @see org.xml.sax.ContentHandler#endDocument()
266 */
267 public void endDocument() throws SAXException {
268 elements.clear();
269 level = 0;
270 }
271
272 /*
273 * (non-Javadoc)
274 *
275 * @see org.xml.sax.ContentHandler#startDocument()
276 */
277 public void startDocument() throws SAXException {
278 elements.clear();
279 level = 0;
280 }
281
282 /*
283 * (non-Javadoc)
284 *
285 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
286 */
287 public void characters(final char[] ch, final int start, final int length) throws SAXException {
288 // nothing to do
289 }
290
291 /*
292 * (non-Javadoc)
293 *
294 * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
295 */
296 public void ignorableWhitespace(final char[] ch, final int start, final int length)
297 throws SAXException {
298 // nothing to do
299 }
300
301 /*
302 * (non-Javadoc)
303 *
304 * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
305 */
306 public void endPrefixMapping(final String prefix) throws SAXException {
307 // nothing to do
308 }
309
310 /*
311 * (non-Javadoc)
312 *
313 * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
314 */
315 public void skippedEntity(final String name) throws SAXException {
316 // nothing to do
317 }
318
319 /*
320 * (non-Javadoc)
321 *
322 * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
323 */
324 public void processingInstruction(final String target, final String data) throws SAXException {
325 // nothing to do
326 }
327
328 /*
329 * (non-Javadoc)
330 *
331 * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
332 */
333 public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
334 // nothing to do
335 }
336
337 /*
338 * (non-Javadoc)
339 *
340 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
341 * java.lang.String, org.xml.sax.Attributes)
342 */
343 public void startElement(final String namespaceURI, final String localName, final String qName,
344 final Attributes atts) throws SAXException {
345 final String method = "startElement(String, String, Attributes)";
346 level++;
347 summary.addElement("*", addOccurence("*"));
348 current.addElement(qName, addOccurence(qName));
349
350 // LATER mime 20070109: just for testing is the next if
351 /*
352 if (find.matchesElementsBegining(current, summary)) {
353 System.out.println("part match " + qName);
354 xml.setRow(locator.getLineNumber());
355 xml.setColumn(locator.getColumnNumber());
356 try {
357 xml.skipBackToBeginOfXmlTag();
358 } catch (RuntimeException e) {
359 Trace.trace(this, method, e);
360 }
361 find.setStartLocation(new SourcePosition(xml.getLocalAddress(), xml.getRow(), xml
362 .getColumn()));
363 }
364 */
365 if (getLocator() == null) {
366 throw new SAXException("Locator unexpectedly null");
367 }
368 if (find.matchesElements(current, summary)) {
369 Trace.trace(CLASS, this, method, "matching elements");
370 Trace.param(CLASS, this, method, qName, current);
371 TextInput xml = null;
372 Reader xmlReader = null;
373 try {
374 xmlReader = new XmlReader(xmlFile);
375 xml = new TextInput(xmlReader);
376 // LATER mime 20080608: old code
377 // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
378 } catch (IOException io) {
379 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
380 if (getLocator() == null) {
381 throw new SAXException("Locator unexpectedly null");
382 }
383 // at least we can set the current location as find location
384 start = new SourcePosition(
385 getLocator().getLineNumber(), getLocator().getColumnNumber());
386 return;
387 }
388 try {
389 xml.setRow(getLocator().getLineNumber());
390 xml.setColumn(getLocator().getColumnNumber());
391 if (startDelta != null) {
392 xml.skipWhiteSpace();
393 final String cdata = "<![CDATA[";
394 final String read = xml.readString(cdata.length());
395 final int cdataLength = (cdata.equals(read) ? cdata.length() : 0);
396 start = addDelta(xml, cdataLength, startDelta);
397 end = addDelta(xml, cdataLength, endDelta);
398 return;
399 }
400 try {
401 xml.skipBackToBeginOfXmlTag();
402 } catch (RuntimeException e) {
403 Trace.trace(CLASS, this, method, e);
404 }
405 start = new SourcePosition(xml.getRow(), xml.getColumn());
406 if (find.getAttribute() != null) {
407 xml.read(); // skip <
408 xml.readNextXmlName(); // must be element name
409 String tag;
410 do {
411 xml.skipWhiteSpace();
412 int row = xml.getRow();
413 int col = xml.getColumn();
414 try {
415 tag = xml.readNextXmlName();
416 } catch (IllegalArgumentException e) {
417 break;
418 }
419 if (tag.equals(find.getAttribute())) {
420 start = new SourcePosition(row, col);
421 xml.readNextAttributeValue();
422 end = new SourcePosition(xml.getRow(), xml.getColumn());
423 throw new LocationFoundException();
424 }
425 try {
426 xml.readNextAttributeValue();
427 } catch (IllegalArgumentException e) {
428 break;
429 }
430 } while (true);
431 // did we found the attribute? if not we point to the complete xml tag
432 if (end == null) {
433 end = new SourcePosition(xml.getRow(), xml.getColumn());
434 throw new LocationFoundException();
435 }
436 }
437 } finally {
438 IoUtility.close(xml); // findbugs
439 }
440 }
441 }
442
443 /**
444 * Set text input position according to locator and add delta plus tag length.
445 *
446 * @param xml This is the stream we work on.
447 * @param cdataLength Length of extra skip data.
448 * @param delta Add this delta
449 * @return Resulting source position.
450 */
451 private SourcePosition addDelta(final TextInput xml, final int cdataLength,
452 final SourcePosition delta) {
453 xml.setRow(getLocator().getLineNumber());
454 xml.setColumn(getLocator().getColumnNumber());
455 if (delta.getRow() == 1 && cdataLength > 0) {
456 xml.addColumn(cdataLength + delta.getColumn() - 1);
457 } else {
458 xml.addPosition(delta);
459 }
460 return new SourcePosition(xml.getRow(), xml.getColumn());
461 }
462
463 /**
464 * Add element occurrence.
465 *
466 * @param name Element that occurred.
467 * @return Number of occurrences including this one.
468 */
469 private int addOccurence(final String name) {
470 while (level < elements.size()) {
471 elements.remove(elements.size() - 1);
472 }
473 while (level > elements.size()) {
474 elements.add(new HashMap());
475 }
476 final Map levelMap = (Map) elements.get(level - 1);
477 final Enumerator counter;
478 if (levelMap.containsKey(name)) {
479 counter = (Enumerator) levelMap.get(name);
480 counter.increaseNumber();
481 } else {
482 counter = new Enumerator(1);
483 levelMap.put(name, counter);
484 }
485 return counter.getNumber();
486 }
487
488 /*
489 * (non-Javadoc)
490 *
491 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String,
492 * java.lang.String)
493 */
494 public void endElement(final String namespaceURI, final String localName, final String qName)
495 throws SAXException {
496 final String method = "endElement(String, String, Attributes)";
497 level--;
498 if (getLocator() == null) {
499 current.deleteLastElement();
500 summary.deleteLastElement();
501 throw new SAXException("Locator unexpectly null");
502 }
503 if (find.matchesElements(current, summary) && find.getAttribute() == null
504 && startDelta == null) {
505 TextInput xml = null;
506 Reader xmlReader = null;
507 try {
508 xmlReader = new XmlReader(xmlFile);
509 xml = new TextInput(xmlReader);
510 // LATER mime 20080608: old code
511 // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
512 } catch (IOException io) {
513 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
514 if (getLocator() == null) {
515 throw new SAXException("Locator unexpectedly null");
516 }
517 // at least we can set the current location as find location
518 start = new SourcePosition(getLocator().getLineNumber(),
519 getLocator().getColumnNumber());
520 return;
521 } finally {
522 IoUtility.close(xmlReader);
523 }
524 try {
525 xml.setRow(getLocator().getLineNumber());
526 xml.setColumn(getLocator().getColumnNumber());
527 // xml.skipForwardToEndOfXmlTag(); // LATER mime 20050810: remove? comment in?
528 end = new SourcePosition(xml.getRow(), xml.getColumn());
529 throw new LocationFoundException();
530 } finally {
531 IoUtility.close(xml); // findbugs
532 }
533 }
534 current.deleteLastElement();
535 summary.deleteLastElement();
536 }
537
538 /**
539 * Get starting source position of found element. Could be <code>null</code>.
540 *
541 * @return Start position.
542 */
543 private SourcePosition getStart() {
544 return start;
545 }
546
547 /**
548 * Get ending source position of found element. Could be <code>null</code>.
549 *
550 * @return End position.
551 */
552 private SourcePosition getEnd() {
553 return end;
554 }
555
556 }
|