| 1 | /* This file is part of the project "Hilbert II" - http://www.qedeq.org |
| 2 | * |
| 3 | * Copyright 2000-2014, Michael Meyling <mime@qedeq.org>. |
| 4 | * |
| 5 | * "Hilbert II" is free software; you can redistribute |
| 6 | * it and/or modify it under the terms of the GNU General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | */ |
| 15 | package org.qedeq.kernel.xml.parser; |
| 16 | |
| 17 | import java.io.File; |
| 18 | import java.io.FileInputStream; |
| 19 | import java.io.IOException; |
| 20 | import java.io.InputStream; |
| 21 | import java.util.MissingResourceException; |
| 22 | |
| 23 | import javax.xml.parsers.ParserConfigurationException; |
| 24 | import javax.xml.parsers.SAXParser; |
| 25 | import javax.xml.parsers.SAXParserFactory; |
| 26 | |
| 27 | import org.qedeq.base.trace.Trace; |
| 28 | import org.qedeq.kernel.se.common.ModuleService; |
| 29 | import org.qedeq.kernel.se.common.SourceFileException; |
| 30 | import org.qedeq.kernel.se.common.SourceFileExceptionList; |
| 31 | import org.qedeq.kernel.xml.common.XmlSyntaxException; |
| 32 | import org.qedeq.kernel.xml.handler.common.SaxDefaultHandler; |
| 33 | import org.qedeq.kernel.xml.handler.common.SimpleHandler; |
| 34 | import org.xml.sax.InputSource; |
| 35 | import org.xml.sax.SAXException; |
| 36 | import org.xml.sax.SAXNotRecognizedException; |
| 37 | import org.xml.sax.XMLReader; |
| 38 | |
| 39 | |
| 40 | /** |
| 41 | * Parser for XML files. This class uses features specific for Xerces. |
| 42 | * |
| 43 | * @author Michael Meyling |
| 44 | */ |
| 45 | public final class SaxParser { |
| 46 | |
| 47 | /** This class. */ |
| 48 | private static final Class CLASS = SaxParser.class; |
| 49 | |
| 50 | /** Namespaces feature id (http://xml.org/sax/features/namespaces). */ |
| 51 | private static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; |
| 52 | |
| 53 | /** Validation feature id (http://xml.org/sax/features/validation). */ |
| 54 | private static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; |
| 55 | |
| 56 | /** Schema validation feature id (http://apache.org/xml/features/validation/schema). */ |
| 57 | private static final String SCHEMA_VALIDATION_FEATURE_ID |
| 58 | = "http://apache.org/xml/features/validation/schema"; |
| 59 | |
| 60 | /** Schema full checking feature id |
| 61 | * (http://apache.org/xml/features/validation/schema-full-checking). */ |
| 62 | protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID |
| 63 | = "http://apache.org/xml/features/validation/schema-full-checking"; |
| 64 | |
| 65 | /** Handler which deals with the XML contents. */ |
| 66 | private SaxDefaultHandler handler; |
| 67 | |
| 68 | /** SAX parser. */ |
| 69 | private XMLReader reader; |
| 70 | |
| 71 | /** Simple handler for validation purpose only. */ |
| 72 | private final SimpleHandler deflt; |
| 73 | |
| 74 | /** Saved errors of parsing. */ |
| 75 | private SourceFileExceptionList exceptionList; |
| 76 | |
| 77 | /** Plugin we work for. */ |
| 78 | private ModuleService plugin; |
| 79 | |
| 80 | /** |
| 81 | * Constructor. |
| 82 | * |
| 83 | * @param plugin We work for this plugin. |
| 84 | * @param handler Default handler for this application. |
| 85 | * @throws ParserConfigurationException Severe parser configuration problem. |
| 86 | * @throws SAXException Option not recognized or supported. |
| 87 | */ |
| 88 | public SaxParser(final ModuleService plugin, final SaxDefaultHandler handler) |
| 89 | throws ParserConfigurationException, SAXException { |
| 90 | super(); |
| 91 | |
| 92 | this.handler = handler; |
| 93 | this.deflt = new SimpleHandler(); |
| 94 | this.plugin = plugin; |
| 95 | |
| 96 | final String factoryImpl = System.getProperty("javax.xml.parsers.SAXParserFactory"); |
| 97 | if (factoryImpl == null) { |
| 98 | System.setProperty("javax.xml.parsers.SAXParserFactory", |
| 99 | "org.apache.xerces.jaxp.SAXParserFactoryImpl"); |
| 100 | } |
| 101 | SAXParserFactory factory = SAXParserFactory.newInstance(); |
| 102 | factory.setNamespaceAware(true); |
| 103 | factory.setValidating(true); |
| 104 | |
| 105 | factory.setFeature(NAMESPACES_FEATURE_ID, true); |
| 106 | factory.setFeature(VALIDATION_FEATURE_ID, true); |
| 107 | |
| 108 | try { |
| 109 | factory.setFeature(SCHEMA_VALIDATION_FEATURE_ID, true); |
| 110 | } catch (SAXNotRecognizedException e) { |
| 111 | Trace.trace(CLASS, this, "constructor", e); |
| 112 | // ignore |
| 113 | } |
| 114 | try { |
| 115 | factory.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, true); |
| 116 | } catch (SAXNotRecognizedException e) { |
| 117 | Trace.trace(CLASS, this, "constructor", e); |
| 118 | // ignore |
| 119 | } |
| 120 | |
| 121 | final SAXParser parser = factory.newSAXParser(); |
| 122 | if (!parser.isNamespaceAware()) { |
| 123 | throw new ParserConfigurationException( |
| 124 | "Current XML parser doesn't support namespaces."); |
| 125 | } |
| 126 | if (!parser.isValidating()) { |
| 127 | throw new ParserConfigurationException( |
| 128 | "Current XML parser doesn't support schema validation."); |
| 129 | } |
| 130 | |
| 131 | reader = parser.getXMLReader(); |
| 132 | reader.setEntityResolver(new SaxEntityResolver(handler)); |
| 133 | |
| 134 | // set parser features |
| 135 | reader.setFeature(NAMESPACES_FEATURE_ID, true); |
| 136 | reader.setFeature(VALIDATION_FEATURE_ID, true); |
| 137 | try { |
| 138 | reader.setFeature(SCHEMA_VALIDATION_FEATURE_ID, true); |
| 139 | } catch (SAXNotRecognizedException e) { |
| 140 | Trace.trace(CLASS, this, "constructor", e); |
| 141 | // ignore |
| 142 | } |
| 143 | try { |
| 144 | reader.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, true); |
| 145 | } catch (SAXNotRecognizedException e) { |
| 146 | Trace.trace(CLASS, this, "constructor", e); |
| 147 | // ignore |
| 148 | } |
| 149 | |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Parse input source. |
| 154 | * @param in Parse data from this file source. |
| 155 | * @param validateOnly validate with {@link #deflt} or parse with {@link #handler}. |
| 156 | * @param original Original URL for the file. If this is <code>null</code> same as |
| 157 | * file name. |
| 158 | * |
| 159 | * @throws SourceFileExceptionList Loading failed. |
| 160 | */ |
| 161 | private void parse(final File in, final boolean validateOnly, final String original) |
| 162 | throws SourceFileExceptionList { |
| 163 | final String method = "parse(URL, boolean, InputStream)"; |
| 164 | InputStream stream = null; |
| 165 | exceptionList = new SourceFileExceptionList(); |
| 166 | try { |
| 167 | stream = new FileInputStream(in); |
| 168 | final InputSource input = new InputSource(stream); |
| 169 | reader.setErrorHandler(new SaxErrorHandler(plugin, original, exceptionList)); |
| 170 | handler.setUrl(original); |
| 171 | deflt.setUrl(original); |
| 172 | if (validateOnly) { |
| 173 | try { |
| 174 | reader.setContentHandler(deflt); |
| 175 | reader.parse(input); |
| 176 | } catch (MissingResourceException ex) { |
| 177 | throw new SAXException("For " + ex.getClassName() + " we searched for value" |
| 178 | + " of " + ex.getKey(), ex); |
| 179 | } |
| 180 | } else { |
| 181 | handler.setExceptionList(exceptionList); |
| 182 | reader.setContentHandler(handler); |
| 183 | reader.parse(input); |
| 184 | } |
| 185 | } catch (SAXException e) { |
| 186 | if (exceptionList.size() <= 0) { // do we have already exceptions? |
| 187 | // no, we must add this one |
| 188 | final XmlSyntaxException xml = XmlSyntaxException.createBySAXException(e); |
| 189 | exceptionList.add(new SourceFileException(plugin, xml, handler.createSourceArea(), null)); |
| 190 | } |
| 191 | throw exceptionList; |
| 192 | } catch (IOException e) { |
| 193 | final XmlSyntaxException xml = XmlSyntaxException.createByIOException(e); |
| 194 | exceptionList.add(new SourceFileException(plugin, xml, handler.createSourceArea(), null)); |
| 195 | throw exceptionList; |
| 196 | } finally { |
| 197 | if (stream != null) { |
| 198 | try { |
| 199 | stream.close(); |
| 200 | } catch (Exception e) { |
| 201 | Trace.trace(CLASS, this, method, e); |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | if (exceptionList.size() > 0) { |
| 206 | throw exceptionList; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /** |
| 211 | * Parses XML file. |
| 212 | * |
| 213 | * @param fileName File name. |
| 214 | * @param original Original URL for the file. If this is <code>null</code> same as |
| 215 | * file name. |
| 216 | * @throws SourceFileExceptionList Loading failed. |
| 217 | */ |
| 218 | public final void parse(final String fileName, final String original) |
| 219 | throws SourceFileExceptionList { |
| 220 | final File file = new File(fileName); |
| 221 | parse(file.getAbsoluteFile(), original); |
| 222 | } |
| 223 | |
| 224 | /** |
| 225 | * Parses the XML file. |
| 226 | * |
| 227 | * @param file File to parse. |
| 228 | * @param original Original URL for the file. If this is <code>null</code> same as |
| 229 | * file. |
| 230 | * @throws SourceFileExceptionList Loading failed. |
| 231 | */ |
| 232 | public final void parse(final File file, final String original) throws SourceFileExceptionList { |
| 233 | String org = original; |
| 234 | if (org == null) { |
| 235 | org = "" + file; |
| 236 | } |
| 237 | parse(file, true, org); |
| 238 | parse(file, false, org); |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * Get errors that occurred during last parsing. |
| 243 | * |
| 244 | * @return List with collected Exceptions. |
| 245 | */ |
| 246 | public SourceFileExceptionList getExceptionList() { |
| 247 | return exceptionList; |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * Get encoding of XML document. This value is set during parsing the document. |
| 252 | * |
| 253 | * @return Encoding. Maybe <code>null</code>. |
| 254 | */ |
| 255 | public String getEncoding() { |
| 256 | return deflt.getEncoding(); |
| 257 | } |
| 258 | |
| 259 | } |