| 1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
| 2 | * |
| 3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
| 4 | * |
| 5 | * "Hilbert II" is free software; you can redistribute |
| 6 | * it and/or modify it under the terms of the GNU General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | */ |
| 15 | package org.qedeq.kernel.xml.tracker; |
| 16 | |
| 17 | import java.io.File; |
| 18 | import java.io.FileInputStream; |
| 19 | import java.io.IOException; |
| 20 | import java.io.InputStream; |
| 21 | import java.io.Reader; |
| 22 | import java.util.ArrayList; |
| 23 | import java.util.HashMap; |
| 24 | import java.util.List; |
| 25 | import java.util.Map; |
| 26 | |
| 27 | import javax.xml.parsers.ParserConfigurationException; |
| 28 | import javax.xml.parsers.SAXParser; |
| 29 | import javax.xml.parsers.SAXParserFactory; |
| 30 | |
| 31 | import org.qedeq.base.io.IoUtility; |
| 32 | import org.qedeq.base.io.SourceArea; |
| 33 | import org.qedeq.base.io.SourcePosition; |
| 34 | import org.qedeq.base.io.TextInput; |
| 35 | import org.qedeq.base.trace.Trace; |
| 36 | import org.qedeq.base.utility.Enumerator; |
| 37 | import org.qedeq.kernel.xml.handler.common.SimpleHandler; |
| 38 | import org.xml.sax.Attributes; |
| 39 | import org.xml.sax.InputSource; |
| 40 | import org.xml.sax.SAXException; |
| 41 | import org.xml.sax.XMLReader; |
| 42 | |
| 43 | import com.sun.syndication.io.XmlReader; |
| 44 | |
| 45 | /** |
| 46 | * Parser for XML files. Search simple XPath within an XML file. |
| 47 | * Usage: |
| 48 | * <pre> |
| 49 | * final XPathLocationParser parser = new XPathLocationParser(xpath); |
| 50 | * parser.parse(xmlFile, original); |
| 51 | * return parser.getFind(); |
| 52 | * |
| 53 | * </pre> |
| 54 | * |
| 55 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
| 56 | * exception is thrown if the path is not found. |
| 57 | * |
| 58 | * @author Michael Meyling |
| 59 | */ |
| 60 | public final class XPathLocationParser extends SimpleHandler { |
| 61 | |
| 62 | /** This class. */ |
| 63 | private static final Class CLASS = XPathLocationParser.class; |
| 64 | |
| 65 | /** Namespaces feature id (http://xml.org/sax/features/namespaces). */ |
| 66 | private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; |
| 67 | |
| 68 | /** Validation feature id (http://xml.org/sax/features/validation). */ |
| 69 | private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; |
| 70 | |
| 71 | /** SAX parser. */ |
| 72 | private final XMLReader reader; |
| 73 | |
| 74 | /** Search for this simple XPath expression. */ |
| 75 | private final SimpleXPath find; |
| 76 | |
| 77 | /** We are currently at this position. */ |
| 78 | private SimpleXPath current; |
| 79 | |
| 80 | /** We are currently at this position if we count only occurrences and take every element. The |
| 81 | * elements are all named "*". */ |
| 82 | private SimpleXPath summary; |
| 83 | |
| 84 | /** This object is parsed. */ |
| 85 | private File xmlFile; |
| 86 | |
| 87 | /** Element stack. */ |
| 88 | private final List elements; |
| 89 | |
| 90 | /** Current stack level. */ |
| 91 | private int level; |
| 92 | |
| 93 | /** Add this to found position. */ |
| 94 | private SourcePosition startDelta; |
| 95 | |
| 96 | /** Add this to found position. */ |
| 97 | private SourcePosition endDelta; |
| 98 | |
| 99 | /** Here the found element starts. */ |
| 100 | private SourcePosition start; |
| 101 | |
| 102 | /** Here the found element ends. */ |
| 103 | private SourcePosition end; |
| 104 | |
| 105 | /** |
| 106 | * Search simple XPath within an XML file. |
| 107 | * |
| 108 | * @param xmlFile Search this file. |
| 109 | * @param xpath Search for this simple XPath. |
| 110 | * @return Source position information. |
| 111 | * @throws ParserConfigurationException Parser configuration problem. |
| 112 | * @throws SAXException XML problem. |
| 113 | * @throws IOException IO problem. |
| 114 | */ |
| 115 | // public static final SimpleXPath getXPathLocation(final File xmlFile, final String xpath) |
| 116 | // throws ParserConfigurationException, SAXException, IOException { |
| 117 | // return getXPathLocation(xmlFile, new SimpleXPath(xpath)); |
| 118 | // } |
| 119 | |
| 120 | /** |
| 121 | * Search simple XPath within an XML file. |
| 122 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
| 123 | * exception is thrown if the path is not found. |
| 124 | * |
| 125 | * @param address Name description (for example URL) for this XML file. |
| 126 | * @param xpath Search for this simple XPath. |
| 127 | * @param startDelta Skip position (relative to location start). Could be |
| 128 | * <code>null</code>. |
| 129 | * @param endDelta Mark until this column (relative to location start). Could |
| 130 | * be <code>null</code>. |
| 131 | * @param file Search this file. |
| 132 | * @return Source position information. |
| 133 | */ |
| 134 | public static SourceArea findSourceArea(final String address, final SimpleXPath xpath, |
| 135 | final SourcePosition startDelta, final SourcePosition endDelta, final File file) { |
| 136 | final String method = "findSourceArea(String, SimpleXPath, SourcePosition, SourcePosition, File)"; |
| 137 | final String message = "Could not find \"" + xpath + "\" within \"" + file + "\""; |
| 138 | try { |
| 139 | XPathLocationParser parser = new XPathLocationParser(xpath, startDelta, endDelta); |
| 140 | parser.parse(file); |
| 141 | if (parser.getStart() == null || parser.getEnd() == null) { |
| 142 | Trace.fatal(CLASS, method, message, null); |
| 143 | if (Boolean.TRUE.toString().equalsIgnoreCase( |
| 144 | System.getProperty("qedeq.test.xmlLocationFailures"))) { |
| 145 | throw new RuntimeException(message); |
| 146 | } |
| 147 | return new SourceArea(address); |
| 148 | } |
| 149 | return new SourceArea(address, parser.getStart(), parser.getEnd()); |
| 150 | } catch (ParserConfigurationException e) { |
| 151 | Trace.fatal(CLASS, method, message, e); |
| 152 | } catch (SAXException e) { |
| 153 | Trace.fatal(CLASS, method, message, e); |
| 154 | } catch (IOException e) { |
| 155 | Trace.fatal(CLASS, method, message, e); |
| 156 | } catch (RuntimeException e) { |
| 157 | Trace.fatal(CLASS, method, message, e); |
| 158 | } |
| 159 | return null; |
| 160 | } |
| 161 | |
| 162 | /** |
| 163 | * Search simple XPath within an XML file. |
| 164 | * If the system property "qedeq.test.xmlLocationFailures" is set to "true" a runtime |
| 165 | * exception is thrown if the path is not found. |
| 166 | * |
| 167 | * @param file Search this file. |
| 168 | * @param xpath Search for this simple XPath. |
| 169 | * @return Source position information. |
| 170 | */ |
| 171 | public static SourceArea findSourceArea(final File file, final SimpleXPath xpath) { |
| 172 | return findSourceArea(file.toString(), xpath, null, null, file); |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * Constructor. |
| 177 | * |
| 178 | * @param xpath XML file path. |
| 179 | * @param startDelta Skip position (relative to location start). Could be |
| 180 | * <code>null</code>. |
| 181 | * @param endDelta Mark until this column (relative to location start). Could |
| 182 | * be <code>null</code>. |
| 183 | * @throws ParserConfigurationException Severe parser configuration problem. |
| 184 | * @throws SAXException XML problem. |
| 185 | */ |
| 186 | public XPathLocationParser(final SimpleXPath xpath, final SourcePosition startDelta, |
| 187 | final SourcePosition endDelta) throws ParserConfigurationException, |
| 188 | SAXException { |
| 189 | super(); |
| 190 | |
| 191 | this.find = xpath; |
| 192 | this.startDelta = startDelta; |
| 193 | this.endDelta = endDelta; |
| 194 | elements = new ArrayList(20); |
| 195 | level = 0; |
| 196 | |
| 197 | final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory"); |
| 198 | if (factoryImpl == null) { |
| 199 | System.setProperty("javax.xml.parsers.SAXParserFactory", |
| 200 | "org.apache.xerces.jaxp.SAXParserFactoryImpl"); |
| 201 | } |
| 202 | SAXParserFactory factory = SAXParserFactory.newInstance(); |
| 203 | factory.setNamespaceAware(false); |
| 204 | factory.setValidating(false); |
| 205 | |
| 206 | factory.setFeature(NAMESPACES_FEATURE_ID, false); |
| 207 | factory.setFeature(VALIDATION_FEATURE_ID, false); |
| 208 | |
| 209 | final SAXParser parser = factory.newSAXParser(); |
| 210 | |
| 211 | reader = parser.getXMLReader(); |
| 212 | |
| 213 | // set parser features |
| 214 | reader.setFeature(NAMESPACES_FEATURE_ID, false); |
| 215 | reader.setFeature(VALIDATION_FEATURE_ID, false); |
| 216 | } |
| 217 | |
| 218 | /** |
| 219 | * Parses XML file. |
| 220 | * |
| 221 | * @param file Parse this input. |
| 222 | * @throws IOException Technical problem occurred. |
| 223 | * @throws SAXException Parsing problem. |
| 224 | */ |
| 225 | public final void parse(final File file) throws IOException, SAXException { |
| 226 | xmlFile = file; |
| 227 | elements.clear(); |
| 228 | level = 0; |
| 229 | InputStream stream = null; |
| 230 | try { |
| 231 | current = new SimpleXPath(); |
| 232 | summary = new SimpleXPath(); |
| 233 | reader.setContentHandler(this); |
| 234 | // LATER 20110316 m31: this seems to have no effect, the error handler don't get the Exceptions! Why? |
| 235 | // reader.setErrorHandler(new ErrorHandler() { |
| 236 | // |
| 237 | // public void error(SAXParseException exception) throws SAXException { |
| 238 | // exception.printStackTrace(System.out); |
| 239 | //// throw exception; |
| 240 | // } |
| 241 | // |
| 242 | // public void fatalError(SAXParseException exception) { |
| 243 | // exception.printStackTrace(System.out); |
| 244 | // } |
| 245 | // |
| 246 | // public void warning(SAXParseException exception) |
| 247 | // throws SAXException { |
| 248 | // exception.printStackTrace(System.out); |
| 249 | // }}); |
| 250 | stream = new FileInputStream(file); |
| 251 | reader.parse(new InputSource(stream)); |
| 252 | } catch (XPathLocationFoundException e) { |
| 253 | // this is what we want!!! |
| 254 | } catch (SAXException e) { |
| 255 | Trace.trace(CLASS, this, "parse", e); |
| 256 | throw e; |
| 257 | } finally { |
| 258 | IoUtility.close(stream); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | /* |
| 263 | * (non-Javadoc) |
| 264 | * |
| 265 | * @see org.xml.sax.ContentHandler#endDocument() |
| 266 | */ |
| 267 | public void endDocument() throws SAXException { |
| 268 | elements.clear(); |
| 269 | level = 0; |
| 270 | } |
| 271 | |
| 272 | /* |
| 273 | * (non-Javadoc) |
| 274 | * |
| 275 | * @see org.xml.sax.ContentHandler#startDocument() |
| 276 | */ |
| 277 | public void startDocument() throws SAXException { |
| 278 | elements.clear(); |
| 279 | level = 0; |
| 280 | } |
| 281 | |
| 282 | /* |
| 283 | * (non-Javadoc) |
| 284 | * |
| 285 | * @see org.xml.sax.ContentHandler#characters(char[], int, int) |
| 286 | */ |
| 287 | public void characters(final char[] ch, final int start, final int length) throws SAXException { |
| 288 | // nothing to do |
| 289 | } |
| 290 | |
| 291 | /* |
| 292 | * (non-Javadoc) |
| 293 | * |
| 294 | * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) |
| 295 | */ |
| 296 | public void ignorableWhitespace(final char[] ch, final int start, final int length) |
| 297 | throws SAXException { |
| 298 | // nothing to do |
| 299 | } |
| 300 | |
| 301 | /* |
| 302 | * (non-Javadoc) |
| 303 | * |
| 304 | * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) |
| 305 | */ |
| 306 | public void endPrefixMapping(final String prefix) throws SAXException { |
| 307 | // nothing to do |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * (non-Javadoc) |
| 312 | * |
| 313 | * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) |
| 314 | */ |
| 315 | public void skippedEntity(final String name) throws SAXException { |
| 316 | // nothing to do |
| 317 | } |
| 318 | |
| 319 | /* |
| 320 | * (non-Javadoc) |
| 321 | * |
| 322 | * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) |
| 323 | */ |
| 324 | public void processingInstruction(final String target, final String data) throws SAXException { |
| 325 | // nothing to do |
| 326 | } |
| 327 | |
| 328 | /* |
| 329 | * (non-Javadoc) |
| 330 | * |
| 331 | * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) |
| 332 | */ |
| 333 | public void startPrefixMapping(final String prefix, final String uri) throws SAXException { |
| 334 | // nothing to do |
| 335 | } |
| 336 | |
| 337 | /* |
| 338 | * (non-Javadoc) |
| 339 | * |
| 340 | * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, |
| 341 | * java.lang.String, org.xml.sax.Attributes) |
| 342 | */ |
| 343 | public void startElement(final String namespaceURI, final String localName, final String qName, |
| 344 | final Attributes atts) throws SAXException { |
| 345 | final String method = "startElement(String, String, Attributes)"; |
| 346 | level++; |
| 347 | summary.addElement("*", addOccurence("*")); |
| 348 | current.addElement(qName, addOccurence(qName)); |
| 349 | |
| 350 | // LATER mime 20070109: just for testing is the next if |
| 351 | /* |
| 352 | if (find.matchesElementsBegining(current, summary)) { |
| 353 | System.out.println("part match " + qName); |
| 354 | xml.setRow(locator.getLineNumber()); |
| 355 | xml.setColumn(locator.getColumnNumber()); |
| 356 | try { |
| 357 | xml.skipBackToBeginOfXmlTag(); |
| 358 | } catch (RuntimeException e) { |
| 359 | Trace.trace(this, method, e); |
| 360 | } |
| 361 | find.setStartLocation(new SourcePosition(xml.getLocalAddress(), xml.getRow(), xml |
| 362 | .getColumn())); |
| 363 | } |
| 364 | */ |
| 365 | if (getLocator() == null) { |
| 366 | throw new SAXException("Locator unexpectedly null"); |
| 367 | } |
| 368 | if (find.matchesElements(current, summary)) { |
| 369 | Trace.trace(CLASS, this, method, "matching elements"); |
| 370 | Trace.param(CLASS, this, method, qName, current); |
| 371 | TextInput xml = null; |
| 372 | Reader xmlReader = null; |
| 373 | try { |
| 374 | xmlReader = new XmlReader(xmlFile); |
| 375 | xml = new TextInput(xmlReader); |
| 376 | // LATER mime 20080608: old code |
| 377 | // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding())); |
| 378 | } catch (IOException io) { |
| 379 | Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io); |
| 380 | if (getLocator() == null) { |
| 381 | throw new SAXException("Locator unexpectedly null"); |
| 382 | } |
| 383 | // at least we can set the current location as find location |
| 384 | start = new SourcePosition( |
| 385 | getLocator().getLineNumber(), getLocator().getColumnNumber()); |
| 386 | return; |
| 387 | } |
| 388 | try { |
| 389 | xml.setRow(getLocator().getLineNumber()); |
| 390 | xml.setColumn(getLocator().getColumnNumber()); |
| 391 | if (startDelta != null) { |
| 392 | xml.skipWhiteSpace(); |
| 393 | final String cdata = "<![CDATA["; |
| 394 | final String read = xml.readString(cdata.length()); |
| 395 | final int cdataLength = (cdata.equals(read) ? cdata.length() : 0); |
| 396 | start = addDelta(xml, cdataLength, startDelta); |
| 397 | end = addDelta(xml, cdataLength, endDelta); |
| 398 | return; |
| 399 | } |
| 400 | try { |
| 401 | xml.skipBackToBeginOfXmlTag(); |
| 402 | } catch (RuntimeException e) { |
| 403 | Trace.trace(CLASS, this, method, e); |
| 404 | } |
| 405 | start = new SourcePosition(xml.getRow(), xml.getColumn()); |
| 406 | if (find.getAttribute() != null) { |
| 407 | xml.read(); // skip < |
| 408 | xml.readNextXmlName(); // must be element name |
| 409 | String tag; |
| 410 | do { |
| 411 | xml.skipWhiteSpace(); |
| 412 | int row = xml.getRow(); |
| 413 | int col = xml.getColumn(); |
| 414 | try { |
| 415 | tag = xml.readNextXmlName(); |
| 416 | } catch (IllegalArgumentException e) { |
| 417 | break; |
| 418 | } |
| 419 | if (tag.equals(find.getAttribute())) { |
| 420 | start = new SourcePosition(row, col); |
| 421 | xml.readNextAttributeValue(); |
| 422 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
| 423 | throw new XPathLocationFoundException(); |
| 424 | } |
| 425 | try { |
| 426 | xml.readNextAttributeValue(); |
| 427 | } catch (IllegalArgumentException e) { |
| 428 | break; |
| 429 | } |
| 430 | } while (true); |
| 431 | // did we found the attribute? if not we point to the complete xml tag |
| 432 | if (end == null) { |
| 433 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
| 434 | throw new XPathLocationFoundException(); |
| 435 | } |
| 436 | } |
| 437 | } finally { |
| 438 | IoUtility.close(xml); // findbugs |
| 439 | } |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | /** |
| 444 | * Set text input position according to locator and add delta plus tag length. |
| 445 | * |
| 446 | * @param xml This is the stream we work on. |
| 447 | * @param cdataLength Length of extra skip data. |
| 448 | * @param delta Add this delta |
| 449 | * @return Resulting source position. |
| 450 | */ |
| 451 | private SourcePosition addDelta(final TextInput xml, final int cdataLength, |
| 452 | final SourcePosition delta) { |
| 453 | xml.setRow(getLocator().getLineNumber()); |
| 454 | xml.setColumn(getLocator().getColumnNumber()); |
| 455 | if (delta.getRow() == 1 && cdataLength > 0) { |
| 456 | xml.addColumn(cdataLength + delta.getColumn() - 1); |
| 457 | } else { |
| 458 | xml.addPosition(delta); |
| 459 | } |
| 460 | return new SourcePosition(xml.getRow(), xml.getColumn()); |
| 461 | } |
| 462 | |
| 463 | /** |
| 464 | * Add element occurrence. |
| 465 | * |
| 466 | * @param name Element that occurred. |
| 467 | * @return Number of occurrences including this one. |
| 468 | */ |
| 469 | private int addOccurence(final String name) { |
| 470 | while (level < elements.size()) { |
| 471 | elements.remove(elements.size() - 1); |
| 472 | } |
| 473 | while (level > elements.size()) { |
| 474 | elements.add(new HashMap()); |
| 475 | } |
| 476 | final Map levelMap = (Map) elements.get(level - 1); |
| 477 | final Enumerator counter; |
| 478 | if (levelMap.containsKey(name)) { |
| 479 | counter = (Enumerator) levelMap.get(name); |
| 480 | counter.increaseNumber(); |
| 481 | } else { |
| 482 | counter = new Enumerator(1); |
| 483 | levelMap.put(name, counter); |
| 484 | } |
| 485 | return counter.getNumber(); |
| 486 | } |
| 487 | |
| 488 | /* |
| 489 | * (non-Javadoc) |
| 490 | * |
| 491 | * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, |
| 492 | * java.lang.String) |
| 493 | */ |
| 494 | public void endElement(final String namespaceURI, final String localName, final String qName) |
| 495 | throws SAXException { |
| 496 | final String method = "endElement(String, String, Attributes)"; |
| 497 | level--; |
| 498 | if (getLocator() == null) { |
| 499 | current.deleteLastElement(); |
| 500 | summary.deleteLastElement(); |
| 501 | throw new SAXException("Locator unexpectly null"); |
| 502 | } |
| 503 | if (find.matchesElements(current, summary) && find.getAttribute() == null |
| 504 | && startDelta == null) { |
| 505 | TextInput xml = null; |
| 506 | Reader xmlReader = null; |
| 507 | try { |
| 508 | xmlReader = new XmlReader(xmlFile); |
| 509 | xml = new TextInput(xmlReader); |
| 510 | // LATER mime 20080608: old code |
| 511 | // xml = new TextInput(xmlFile, IoUtility.getWorkingEncoding(getEncoding())); |
| 512 | } catch (IOException io) { |
| 513 | Trace.fatal(CLASS, this, method, "File \"" + xmlFile + "\" should be readable", io); |
| 514 | if (getLocator() == null) { |
| 515 | throw new SAXException("Locator unexpectedly null"); |
| 516 | } |
| 517 | // at least we can set the current location as find location |
| 518 | start = new SourcePosition(getLocator().getLineNumber(), |
| 519 | getLocator().getColumnNumber()); |
| 520 | return; |
| 521 | } finally { |
| 522 | IoUtility.close(xmlReader); |
| 523 | } |
| 524 | try { |
| 525 | xml.setRow(getLocator().getLineNumber()); |
| 526 | xml.setColumn(getLocator().getColumnNumber()); |
| 527 | // xml.skipForwardToEndOfXmlTag(); // LATER mime 20050810: remove? comment in? |
| 528 | end = new SourcePosition(xml.getRow(), xml.getColumn()); |
| 529 | throw new XPathLocationFoundException(); |
| 530 | } finally { |
| 531 | IoUtility.close(xml); // findbugs |
| 532 | } |
| 533 | } |
| 534 | current.deleteLastElement(); |
| 535 | summary.deleteLastElement(); |
| 536 | } |
| 537 | |
| 538 | /** |
| 539 | * Get starting source position of found element. Could be <code>null</code>. |
| 540 | * |
| 541 | * @return Start position. |
| 542 | */ |
| 543 | private SourcePosition getStart() { |
| 544 | return start; |
| 545 | } |
| 546 | |
| 547 | /** |
| 548 | * Get ending source position of found element. Could be <code>null</code>. |
| 549 | * |
| 550 | * @return End position. |
| 551 | */ |
| 552 | private SourcePosition getEnd() { |
| 553 | return end; |
| 554 | } |
| 555 | |
| 556 | } |