View Javadoc

1   /* This file is part of the project "Hilbert II" - http://www.qedeq.org" target="alexandria_uri">http://www.qedeq.org
2    *
3    * Copyright 2000-2014,  Michael Meyling <mime@qedeq.org>.
4    *
5    * "Hilbert II" is free software; you can redistribute
6    * it and/or modify it under the terms of the GNU General Public
7    * License as published by the Free Software Foundation; either
8    * version 2 of the License, or (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13   * GNU General Public License for more details.
14   */
15  package org.qedeq.kernel.xml.tracker;
16  
17  import java.io.File;
18  import java.io.FileInputStream;
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Reader;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  import javax.xml.parsers.ParserConfigurationException;
28  import javax.xml.parsers.SAXParser;
29  import javax.xml.parsers.SAXParserFactory;
30  
31  import org.qedeq.base.io.IoUtility;
32  import org.qedeq.base.io.SourceArea;
33  import org.qedeq.base.io.SourcePosition;
34  import org.qedeq.base.io.TextInput;
35  import org.qedeq.base.trace.Trace;
36  import org.qedeq.base.utility.Enumerator;
37  import org.qedeq.kernel.xml.handler.common.SimpleHandler;
38  import org.xml.sax.Attributes;
39  import org.xml.sax.InputSource;
40  import org.xml.sax.SAXException;
41  import org.xml.sax.XMLReader;
42  
43  import com.sun.syndication.io.XmlReader;
44  
45  /**
46   * Parser for XML files. Search simple XPath within an XML file.
47   * Usage:
48   * <pre>
49   *      final XPathLocationParser parser = new XPathLocationParser(xpath);
50   *      parser.parse(xmlFile, original);
51   *      return parser.getFind();
52   *
53   * </pre>
54   *
55   * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
56   * exception is thrown if the path is not found.
57   *
58   * @author  Michael Meyling
59   */
60  public final class XPathLocationParser extends SimpleHandler {
61  
62      /** This class. */
63      private static final Class CLASS = XPathLocationParser.class;
64  
65      /** Namespaces feature id (http://xml.org/sax/features/namespaces)." target="alexandria_uri">http://xml.org/sax/features/namespaces). */
66      private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces";
67  
68      /** Validation feature id (http://xml.org/sax/features/validation)." target="alexandria_uri">http://xml.org/sax/features/validation). */
69      private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
70  
71      /** SAX parser. */
72      private final XMLReader reader;
73  
74      /** Search for this simple XPath expression. */
75      private final SimpleXPath find;
76  
77      /** We are currently at this position. */
78      private SimpleXPath current;
79  
80      /** We are currently at this position if we count only occurrences and take every element. The
81       * elements are all named "*". */
82      private SimpleXPath summary;
83  
84      /** This object is parsed. */
85      private File xmlFile;
86  
87      /** Element stack. */
88      private final List elements;
89  
90      /** Current stack level. */
91      private int level;
92  
93      /** Add this to found position. */
94      private SourcePosition startDelta;
95  
96      /** Add this to found position. */
97      private SourcePosition endDelta;
98  
99      /** Here the found element starts. */
100     private SourcePosition start;
101 
102     /** Here the found element ends. */
103     private SourcePosition end;
104 
105     /**
106      * Search simple XPath within an XML file.
107      *
108      * @param   xmlFile Search this file.
109      * @param   xpath   Search for this simple XPath.
110      * @return  Source position information.
111      * @throws  ParserConfigurationException    Parser configuration problem.
112      * @throws  SAXException                    XML problem.
113      * @throws  IOException                     IO problem.
114      */
115 //    public static final SimpleXPath getXPathLocation(final File xmlFile, final String xpath)
116 //            throws ParserConfigurationException, SAXException, IOException {
117 //        return getXPathLocation(xmlFile, new SimpleXPath(xpath));
118 //    }
119 
120     /**
121      * Search simple XPath within an XML file.
122      * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
123      * exception is thrown if the path is not found.
124      *
125      * @param   address     Name description (for example URL) for this XML file.
126      * @param   xpath       Search for this simple XPath.
127      * @param   startDelta  Skip position (relative to location start). Could be
128      *                      <code>null</code>.
129      * @param   endDelta    Mark until this column (relative to location start). Could
130      *                      be <code>null</code>.
131      * @param   file        Search this file.
132      * @return  Source position information.
133      */
134     public static SourceArea findSourceArea(final String address, final SimpleXPath xpath,
135             final SourcePosition startDelta, final SourcePosition endDelta,  final File file) {
136         final String method = "findSourceArea(String, SimpleXPath, SourcePosition, SourcePosition, File)";
137         final String message = "Could not find \"" + xpath + "\" within \"" + file + "\"";
138         try {
139             XPathLocationParser parser = new XPathLocationParser(xpath, startDelta, endDelta);
140             parser.parse(file);
141             if (parser.getStart() == null || parser.getEnd() == null) {
142                 Trace.fatal(CLASS, method, message, null);
143                 if (Boolean.TRUE.toString().equalsIgnoreCase(
144                         System.getProperty("qedeq.test.xmlLocationFailures"))) {
145                     throw new RuntimeException(message);
146                 }
147                 return new SourceArea(address);
148             }
149             return new SourceArea(address, parser.getStart(), parser.getEnd());
150         } catch (ParserConfigurationException e) {
151             Trace.fatal(CLASS, method, message, e);
152         } catch (SAXException e) {
153             Trace.fatal(CLASS, method, message, e);
154         } catch (IOException e) {
155             Trace.fatal(CLASS, method, message, e);
156         } catch (RuntimeException e) {
157             Trace.fatal(CLASS, method, message, e);
158         }
159         return null;
160     }
161 
162     /**
163      * Search simple XPath within an XML file.
164      * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime
165      * exception is thrown if the path is not found.
166      *
167      * @param   file        Search this file.
168      * @param   xpath       Search for this simple XPath.
169      * @return  Source position information.
170      */
171     public static SourceArea findSourceArea(final File file, final SimpleXPath xpath) {
172         return findSourceArea(file.toString(), xpath, null, null, file);
173     }
174 
175     /**
176      * Constructor.
177      *
178      * @param   xpath                   XML file path.
179      * @param   startDelta              Skip position (relative to location start). Could be
180      *                                  <code>null</code>.
181      * @param   endDelta                Mark until this column (relative to location start). Could
182      *                                  be <code>null</code>.
183      * @throws  ParserConfigurationException    Severe parser configuration problem.
184      * @throws  SAXException                    XML problem.
185      */
186     public XPathLocationParser(final SimpleXPath xpath, final SourcePosition startDelta,
187         final SourcePosition endDelta) throws ParserConfigurationException,
188             SAXException {
189         super();
190 
191         this.find = xpath;
192         this.startDelta = startDelta;
193         this.endDelta = endDelta;
194         elements = new ArrayList(20);
195         level = 0;
196 
197         final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory");
198         if (factoryImpl == null) {
199             System.setProperty("javax.xml.parsers.SAXParserFactory",
200                 "org.apache.xerces.jaxp.SAXParserFactoryImpl");
201         }
202         SAXParserFactory factory = SAXParserFactory.newInstance();
203         factory.setNamespaceAware(false);
204         factory.setValidating(false);
205 
206         factory.setFeature(NAMESPACES_FEATURE_ID, false);
207         factory.setFeature(VALIDATION_FEATURE_ID, false);
208 
209         final SAXParser parser = factory.newSAXParser();
210 
211         reader = parser.getXMLReader();
212 
213         // set parser features
214         reader.setFeature(NAMESPACES_FEATURE_ID, false);
215         reader.setFeature(VALIDATION_FEATURE_ID, false);
216     }
217 
218     /**
219      * Parses XML file.
220      *
221      * @param   file            Parse this input.
222      * @throws  IOException     Technical problem occurred.
223      * @throws  SAXException    Parsing problem.
224      */
225     public final void parse(final File file) throws IOException,  SAXException {
226         xmlFile = file;
227         elements.clear();
228         level = 0;
229         InputStream stream = null;
230         try {
231             current = new SimpleXPath();
232             summary = new SimpleXPath();
233             reader.setContentHandler(this);
234 // LATER 20110316 m31: this seems to have no effect, the error handler don't get the Exceptions! Why?
235 //            reader.setErrorHandler(new ErrorHandler() {
236 //
237 //                public void error(SAXParseException exception) throws SAXException {
238 //                    exception.printStackTrace(System.out);
239 ////                    throw exception;
240 //                }
241 //
242 //                public void fatalError(SAXParseException exception) {
243 //                    exception.printStackTrace(System.out);
244 //                }
245 //
246 //                public void warning(SAXParseException exception)
247 //                        throws SAXException {
248 //                    exception.printStackTrace(System.out);
249 //                }});
250             stream = new FileInputStream(file);
251             reader.parse(new InputSource(stream));
252         } catch (XPathLocationFoundException e) {
253             // this is what we want!!!
254         } catch (SAXException e) {
255             Trace.trace(CLASS, this, "parse", e);
256             throw e;
257         } finally {
258             IoUtility.close(stream);
259         }
260     }
261 
262     /*
263      * (non-Javadoc)
264      *
265      * @see org.xml.sax.ContentHandler#endDocument()
266      */
267     public void endDocument() throws SAXException {
268         elements.clear();
269         level = 0;
270     }
271 
272     /*
273      * (non-Javadoc)
274      *
275      * @see org.xml.sax.ContentHandler#startDocument()
276      */
277     public void startDocument() throws SAXException {
278         elements.clear();
279         level = 0;
280     }
281 
282     /*
283      * (non-Javadoc)
284      *
285      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
286      */
287     public void characters(final char[] ch, final int start, final int length) throws SAXException {
288         // nothing to do
289     }
290 
291     /*
292      * (non-Javadoc)
293      *
294      * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
295      */
296     public void ignorableWhitespace(final char[] ch, final int start, final int length)
297             throws SAXException {
298         // nothing to do
299     }
300 
301     /*
302      * (non-Javadoc)
303      *
304      * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
305      */
306     public void endPrefixMapping(final String prefix) throws SAXException {
307         // nothing to do
308     }
309 
310     /*
311      * (non-Javadoc)
312      *
313      * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
314      */
315     public void skippedEntity(final String name) throws SAXException {
316         // nothing to do
317     }
318 
319     /*
320      * (non-Javadoc)
321      *
322      * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
323      */
324     public void processingInstruction(final String target, final String data) throws SAXException {
325         // nothing to do
326     }
327 
328     /*
329      * (non-Javadoc)
330      *
331      * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
332      */
333     public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
334         // nothing to do
335     }
336 
337     /*
338      * (non-Javadoc)
339      *
340      * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
341      *      java.lang.String, org.xml.sax.Attributes)
342      */
343     public void startElement(final String namespaceURI, final String localName, final String qName,
344             final Attributes atts) throws SAXException {
345         final String method = "startElement(String, String, Attributes)";
346         level++;
347         summary.addElement("*", addOccurence("*"));
348         current.addElement(qName, addOccurence(qName));
349 
350         // LATER mime 20070109: just for testing is the next if
351 /*
352         if (find.matchesElementsBegining(current, summary)) {
353             System.out.println("part match " + qName);
354             xml.setRow(locator.getLineNumber());
355             xml.setColumn(locator.getColumnNumber());
356             try {
357                 xml.skipBackToBeginOfXmlTag();
358             } catch (RuntimeException e) {
359                 Trace.trace(this, method, e);
360             }
361             find.setStartLocation(new SourcePosition(xml.getLocalAddress(), xml.getRow(), xml
362                 .getColumn()));
363         }
364 */
365         if (getLocator() == null) {
366             throw new SAXException("Locator unexpectedly null");
367         }
368         if (find.matchesElements(current, summary)) {
369             Trace.trace(CLASS, this, method, "matching elements");
370             Trace.param(CLASS, this, method, qName, current);
371             TextInput xml = null;
372             Reader xmlReader = null;
373             try {
374                 xmlReader = new XmlReader(xmlFile);
375                 xml = new TextInput(xmlReader);
376 // LATER mime 20080608: old code
377 //                xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
378             } catch (IOException io) {
379                 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
380                 if (getLocator() == null) {
381                     throw new SAXException("Locator unexpectedly null");
382                 }
383                 // at least we can set the current location as find location
384                 start = new SourcePosition(
385                     getLocator().getLineNumber(), getLocator().getColumnNumber());
386                 return;
387             }
388             try {
389                 xml.setRow(getLocator().getLineNumber());
390                 xml.setColumn(getLocator().getColumnNumber());
391                 if (startDelta != null) {
392                     xml.skipWhiteSpace();
393                     final String cdata = "<![CDATA[";
394                     final String read = xml.readString(cdata.length());
395                     final int cdataLength = (cdata.equals(read) ? cdata.length() : 0);
396                     start = addDelta(xml, cdataLength, startDelta);
397                     end = addDelta(xml, cdataLength, endDelta);
398                     return;
399                 }
400                 try {
401                     xml.skipBackToBeginOfXmlTag();
402                 } catch (RuntimeException e) {
403                     Trace.trace(CLASS, this, method, e);
404                 }
405                 start = new SourcePosition(xml.getRow(), xml.getColumn());
406                 if (find.getAttribute() != null) {
407                     xml.read(); // skip <
408                     xml.readNextXmlName(); // must be element name
409                     String tag;
410                     do {
411                         xml.skipWhiteSpace();
412                         int row = xml.getRow();
413                         int col = xml.getColumn();
414                         try {
415                             tag = xml.readNextXmlName();
416                         } catch (IllegalArgumentException e) {
417                             break;
418                         }
419                         if (tag.equals(find.getAttribute())) {
420                             start = new SourcePosition(row, col);
421                             xml.readNextAttributeValue();
422                             end = new SourcePosition(xml.getRow(), xml.getColumn());
423                             throw new XPathLocationFoundException();
424                         }
425                         try {
426                             xml.readNextAttributeValue();
427                         } catch (IllegalArgumentException e) {
428                             break;
429                         }
430                     } while (true);
431                     // did we found the attribute? if not we point to the complete xml tag
432                     if (end == null) {
433                         end = new SourcePosition(xml.getRow(), xml.getColumn());
434                         throw new XPathLocationFoundException();
435                     }
436                 }
437             } finally {
438                 IoUtility.close(xml);   // findbugs
439             }
440         }
441     }
442 
443     /**
444      * Set text input position according to locator and add delta plus tag length.
445      *
446      * @param   xml         This is the stream we work on.
447      * @param   cdataLength Length of extra skip data.
448      * @param   delta       Add this delta
449      * @return  Resulting source position.
450      */
451     private SourcePosition addDelta(final TextInput xml, final int cdataLength,
452             final SourcePosition delta) {
453         xml.setRow(getLocator().getLineNumber());
454         xml.setColumn(getLocator().getColumnNumber());
455         if (delta.getRow() == 1 && cdataLength > 0) {
456             xml.addColumn(cdataLength + delta.getColumn() - 1);
457         } else {
458             xml.addPosition(delta);
459         }
460         return new SourcePosition(xml.getRow(), xml.getColumn());
461     }
462 
463     /**
464      * Add element occurrence.
465      *
466      * @param name Element that occurred.
467      * @return Number of occurrences including this one.
468      */
469     private int addOccurence(final String name) {
470         while (level < elements.size()) {
471             elements.remove(elements.size() - 1);
472         }
473         while (level > elements.size()) {
474             elements.add(new HashMap());
475         }
476         final Map levelMap = (Map) elements.get(level - 1);
477         final Enumerator counter;
478         if (levelMap.containsKey(name)) {
479             counter = (Enumerator) levelMap.get(name);
480             counter.increaseNumber();
481         } else {
482             counter = new Enumerator(1);
483             levelMap.put(name, counter);
484         }
485         return counter.getNumber();
486     }
487 
488     /*
489      * (non-Javadoc)
490      *
491      * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String,
492      *      java.lang.String)
493      */
494     public void endElement(final String namespaceURI, final String localName, final String qName)
495             throws SAXException {
496         final String method = "endElement(String, String, Attributes)";
497         level--;
498         if (getLocator() == null) {
499             current.deleteLastElement();
500             summary.deleteLastElement();
501             throw new SAXException("Locator unexpectly null");
502         }
503         if (find.matchesElements(current, summary) && find.getAttribute() == null
504                 && startDelta == null) {
505             TextInput xml = null;
506             Reader xmlReader = null;
507             try {
508                 xmlReader = new XmlReader(xmlFile);
509                 xml = new TextInput(xmlReader);
510 // LATER mime 20080608: old code
511 //                xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding()));
512             } catch (IOException io) {
513                 Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io);
514                 if (getLocator() == null) {
515                     throw new SAXException("Locator unexpectedly null");
516                 }
517                 // at least we can set the current location as find location
518                 start = new SourcePosition(getLocator().getLineNumber(),
519                     getLocator().getColumnNumber());
520                 return;
521             } finally {
522                 IoUtility.close(xmlReader);
523             }
524             try {
525                 xml.setRow(getLocator().getLineNumber());
526                 xml.setColumn(getLocator().getColumnNumber());
527                 // xml.skipForwardToEndOfXmlTag(); // LATER mime 20050810: remove? comment in?
528                 end = new SourcePosition(xml.getRow(), xml.getColumn());
529                 throw new XPathLocationFoundException();
530             } finally {
531                 IoUtility.close(xml);   // findbugs
532             }
533         }
534         current.deleteLastElement();
535         summary.deleteLastElement();
536     }
537 
538     /**
539      * Get starting source position of found element. Could be <code>null</code>.
540      *
541      * @return  Start position.
542      */
543     private SourcePosition getStart() {
544         return start;
545     }
546 
547     /**
548      * Get ending source position of found element. Could be <code>null</code>.
549      *
550      * @return  End position.
551      */
552     private SourcePosition getEnd() {
553         return end;
554     }
555 
556 }