XmlReader.java


001 /*

002  * Copyright 2004 Sun Microsystems, Inc.

003  *

004  * Licensed under the Apache License, Version 2.0 (the "License");

005  * you may not use this file except in compliance with the License.

006  * You may obtain a copy of the License at

007  *

008  *     http://www.apache.org/licenses/LICENSE-2.0

009  *

010  * Unless required by applicable law or agreed to in writing, software

011  * distributed under the License is distributed on an "AS IS" BASIS,

012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

013  * See the License for the specific language governing permissions and

014  * limitations under the License.

015  *

016  */

017 package com.sun.syndication.io;

018 

019 import java.io.*;

020 import java.net.URL;

021 import java.net.URLConnection;

022 import java.net.HttpURLConnection;

023 import java.util.regex.Pattern;

024 import java.util.regex.Matcher;

025 import java.text.MessageFormat;

026 

027 /**

028  * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out

029  * the charset encoding of the XML document within the stream.

030  * <p>

031  * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a

032  * character stream.

033  * <p>

034  * All this has to be done without consuming characters from the stream, if not the XML parser

035  * will not recognized the document as a valid XML. This is not 100% true, but it's close enough

036  * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all

037  * parsers).

038  * <p>

039  * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and

040  * HTTP streams by offering a wide set of constructors.

041  * <P>

042  * By default the charset encoding detection is lenient, the constructor with the lenient flag

043  * can be used for an script (following HTTP MIME and XML specifications).

044  * All this is nicely explained by Mark Pilgrim in his blog,

045  * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">

046  * Determining the character encoding of a feed</a>.

047  * <p>

048  * @author Alejandro Abdelnur

049  * @version revision 1.18 taken on 2008-03-06 from Rome (see

050  *          https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java)

051  */

052 public class XmlReader extends Reader {

053     private static final int BUFFER_SIZE = 4096;

054 

055     private static final String UTF_8 = "UTF-8";

056     private static final String US_ASCII = "US-ASCII";

057     private static final String UTF_16BE = "UTF-16BE";

058     private static final String UTF_16LE = "UTF-16LE";

059     private static final String UTF_16 = "UTF-16";

060 

061     private static String _staticDefaultEncoding = null;

062 

063     private Reader _reader;

064     private String _encoding;

065     private String _defaultEncoding;

066 

067     /**

068      * Sets the default encoding to use if none is set in HTTP content-type,

069      * XML prolog and the rules based on content-type are not adequate.

070      * <p/>

071      * If it is set to NULL the content-type based rules are used.

072      * <p/>

073      * By default it is NULL.

074      * <p/>

075      *

076      * @param encoding charset encoding to default to.

077      */

078     public static void setDefaultEncoding(String encoding) {

079         _staticDefaultEncoding = encoding;

080     }

081 

082     /**

083      * Returns the default encoding to use if none is set in HTTP content-type,

084      * XML prolog and the rules based on content-type are not adequate.

085      * <p/>

086      * If it is NULL the content-type based rules are used.

087      * <p/>

088      *

089      * @return the default encoding to use.

090      */

091     public static String getDefaultEncoding() {

092         return _staticDefaultEncoding;

093     }

094 

095     /**

096      * Creates a Reader for a File.

097      * <p>

098      * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also

099      * missing defaults to UTF-8.

100      * <p>

101      * It does a lenient charset encoding detection, check the constructor with the lenient parameter

102      * for details.

103      * <p>

104      * @param file File to create a Reader from.

105      * @throws IOException thrown if there is a problem reading the file.

106      *

107      */

108     public XmlReader(File file) throws IOException {

109         this(new FileInputStream(file));

110     }

111 

112     /**

113      * Creates a Reader for a raw InputStream.

114      * <p>

115      * It follows the same logic used for files.

116      * <p>

117      * It does a lenient charset encoding detection, check the constructor with the lenient parameter

118      * for details.

119      * <p>

120      * @param is InputStream to create a Reader from.

121      * @throws IOException thrown if there is a problem reading the stream.

122      *

123      */

124     public XmlReader(InputStream is) throws IOException {

125         this(is,true);

126     }

127 

128     /**

129      * Creates a Reader for a raw InputStream.

130      * <p>

131      * It follows the same logic used for files.

132      * <p>

133      * If lenient detection is indicated and the detection above fails as per specifications it then attempts

134      * the following:

135      * <p>

136      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.

137      * <p>

138      * Else if the XML prolog had a charset encoding that encoding is used.

139      * <p>

140      * Else if the content type had a charset encoding that encoding is used.

141      * <p>

142      * Else 'UTF-8' is used.

143      * <p>

144      * If lenient detection is indicated an XmlReaderException is never thrown.

145      * <p>

146      * @param is InputStream to create a Reader from.

147      * @param lenient indicates if the charset encoding detection should be relaxed.

148      * @throws IOException thrown if there is a problem reading the stream.

149      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.

150      *

151      */

152     public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {

153         _defaultEncoding = _staticDefaultEncoding;

154         try {

155             doRawStream(is,lenient);

156         }

157         catch (XmlReaderException ex) {

158             if (!lenient) {

159                 throw ex;

160             }

161             else {

162                 doLenientDetection(null,ex);

163             }

164         }

165     }

166 

167     /**

168      * Creates a Reader using the InputStream of a URL.

169      * <p>

170      * If the URL is not of type HTTP and there is not 'content-type' header in the fetched

171      * data it uses the same logic used for Files.

172      * <p>

173      * If the URL is a HTTP Url or there is a 'content-type' header in the fetched

174      * data it uses the same logic used for an InputStream with content-type.

175      * <p>

176      * It does a lenient charset encoding detection, check the constructor with the lenient parameter

177      * for details.

178      * <p>

179      * @param url URL to create a Reader from.

180      * @throws IOException thrown if there is a problem reading the stream of the URL.

181      *

182      */

183     public XmlReader(URL url) throws IOException {

184         this(url.openConnection());

185     }

186 

187     /**

188      * Creates a Reader using the InputStream of a URLConnection.

189      * <p>

190      * If the URLConnection is not of type HttpURLConnection and there is not

191      * 'content-type' header in the fetched data it uses the same logic used for files.

192      * <p>

193      * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched

194      * data it uses the same logic used for an InputStream with content-type.

195      * <p>

196      * It does a lenient charset encoding detection, check the constructor with the lenient parameter

197      * for details.

198      * <p>

199      * @param conn URLConnection to create a Reader from.

200      * @throws IOException thrown if there is a problem reading the stream of the URLConnection.

201      *

202      */

203     public XmlReader(URLConnection conn) throws IOException {

204         _defaultEncoding = _staticDefaultEncoding;

205         boolean lenient = true;

206         if (conn instanceof HttpURLConnection) {

207             try {

208                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);

209             }

210             catch (XmlReaderException ex) {

211                 doLenientDetection(conn.getContentType(),ex);

212             }

213         }

214         else

215         if (conn.getContentType()!=null) {

216             try {

217                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);

218             }

219             catch (XmlReaderException ex) {

220                 doLenientDetection(conn.getContentType(),ex);

221             }

222         }

223         else {

224             try {

225                 doRawStream(conn.getInputStream(),lenient);

226             }

227             catch (XmlReaderException ex) {

228                 doLenientDetection(null,ex);

229             }

230         }

231     }

232 

233     /**

234      * Creates a Reader using an InputStream an the associated content-type header.

235      * <p>

236      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.

237      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML

238      * prolog encoding uses the default encoding mandated by the content-type MIME type.

239      * <p>

240      * It does a lenient charset encoding detection, check the constructor with the lenient parameter

241      * for details.

242      * <p>

243      * @param is InputStream to create the reader from.

244      * @param httpContentType content-type header to use for the resolution of the charset encoding.

245      * @throws IOException thrown if there is a problem reading the file.

246      *

247      */

248     public XmlReader(InputStream is,String httpContentType) throws IOException {

249         this(is,httpContentType,true);

250     }

251 

252     /**

253      * Creates a Reader using an InputStream an the associated content-type header. This constructor is

254      * lenient regarding the encoding detection.

255      * <p>

256      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.

257      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML

258      * prolog encoding uses the default encoding mandated by the content-type MIME type.

259      * <p>

260      * If lenient detection is indicated and the detection above fails as per specifications it then attempts

261      * the following:

262      * <p>

263      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.

264      * <p>

265      * Else if the XML prolog had a charset encoding that encoding is used.

266      * <p>

267      * Else if the content type had a charset encoding that encoding is used.

268      * <p>

269      * Else 'UTF-8' is used.

270      * <p>

271      * If lenient detection is indicated an XmlReaderException is never thrown.

272      * <p>

273      * @param is InputStream to create the reader from.

274      * @param httpContentType content-type header to use for the resolution of the charset encoding.

275      * @param lenient indicates if the charset encoding detection should be relaxed.

276      * @throws IOException thrown if there is a problem reading the file.

277      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.

278      *

279      */

280     public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)

281         throws IOException, XmlReaderException {

282         _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;

283         try {

284             doHttpStream(is,httpContentType,lenient);

285         }

286         catch (XmlReaderException ex) {

287             if (!lenient) {

288                 throw ex;

289             }

290             else {

291                 doLenientDetection(httpContentType,ex);

292             }

293         }

294     }

295 

296     /**

297      * Creates a Reader using an InputStream an the associated content-type header. This constructor is

298      * lenient regarding the encoding detection.

299      * <p>

300      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.

301      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML

302      * prolog encoding uses the default encoding mandated by the content-type MIME type.

303      * <p>

304      * If lenient detection is indicated and the detection above fails as per specifications it then attempts

305      * the following:

306      * <p>

307      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.

308      * <p>

309      * Else if the XML prolog had a charset encoding that encoding is used.

310      * <p>

311      * Else if the content type had a charset encoding that encoding is used.

312      * <p>

313      * Else 'UTF-8' is used.

314      * <p>

315      * If lenient detection is indicated an XmlReaderException is never thrown.

316      * <p>

317      * @param is InputStream to create the reader from.

318      * @param httpContentType content-type header to use for the resolution of the charset encoding.

319      * @param lenient indicates if the charset encoding detection should be relaxed.

320      * @throws IOException thrown if there is a problem reading the file.

321      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.

322      *

323      */

324     public XmlReader(InputStream is, String httpContentType, boolean lenient)

325         throws IOException, XmlReaderException {

326         this(is, httpContentType, lenient, null);

327     }

328 

329     private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {

330         if (httpContentType!=null) {

331             if (httpContentType.startsWith("text/html")) {

332                 httpContentType = httpContentType.substring("text/html".length());

333                 httpContentType = "text/xml" + httpContentType;

334                 try {

335                     doHttpStream(ex.getInputStream(),httpContentType,true);

336                     ex = null;

337                 }

338                 catch (XmlReaderException ex2) {

339                     ex = ex2;

340                 }

341             }

342         }

343         if (ex!=null) {

344             String encoding = ex.getXmlEncoding();

345             if (encoding==null) {

346                 encoding = ex.getContentTypeEncoding();

347             }

348             if (encoding==null) {

349               encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;

350             }

351             prepareReader(ex.getInputStream(),encoding);

352         }

353     }

354 

355     /**

356      * Returns the charset encoding of the XmlReader.

357      * <p>

358      * @return charset encoding.

359      *

360      */

361     public String getEncoding() {

362         return _encoding;

363     }

364 

365     public int read(char[] buf,int offset,int len) throws IOException {

366         return _reader.read(buf,offset,len);

367     }

368 

369     /**

370      * Closes the XmlReader stream.

371      * <p>

372      * @throws IOException thrown if there was a problem closing the stream.

373      *

374      */

375     public void close() throws IOException {

376         _reader.close();

377     }

378 

379     private void doRawStream(InputStream is,boolean lenient) throws IOException {

380         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);

381         String bomEnc = getBOMEncoding(pis);

382         String xmlGuessEnc =  getXMLGuessEncoding(pis);

383         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);

384         String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);

385         prepareReader(pis,encoding);

386     }

387 

388     private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {

389         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);

390         String cTMime = getContentTypeMime(httpContentType);

391         String cTEnc  = getContentTypeEncoding(httpContentType);

392         String bomEnc = getBOMEncoding(pis);

393         String xmlGuessEnc =  getXMLGuessEncoding(pis);

394         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);

395         String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);

396         prepareReader(pis,encoding);

397     }

398 

399     private void prepareReader(InputStream is,String encoding) throws IOException {

400         _reader = new InputStreamReader(is,encoding);

401         _encoding = encoding;

402     }

403 

404     // InputStream is passed for XmlReaderException creation only

405     private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {

406         String encoding;

407         if (bomEnc==null) {

408             if (xmlGuessEnc==null || xmlEnc==null) {

409                 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;

410             }

411             else

412             if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {

413                 encoding = xmlGuessEnc;

414             }

415             else {

416                 encoding = xmlEnc;

417             }

418         }

419         else

420         if (bomEnc.equals(UTF_8)) {

421             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {

422                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),

423                                              bomEnc,xmlGuessEnc,xmlEnc,is);

424             }

425             if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {

426                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),

427                                              bomEnc,xmlGuessEnc,xmlEnc,is);

428             }

429             encoding = UTF_8;

430         }

431         else

432         if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {

433             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {

434                 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));

435             }

436             if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {

437                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),

438                                              bomEnc,xmlGuessEnc,xmlEnc,is);

439             }

440             encoding =bomEnc;

441         }

442         else {

443             throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),

444                                          bomEnc,xmlGuessEnc,xmlEnc,is);

445         }

446         return encoding;

447     }

448 

449     // InputStream is passed for XmlReaderException creation only

450     private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {

451         String encoding;

452         if (lenient & xmlEnc!=null) {

453             encoding = xmlEnc;

454         }

455         else {

456             boolean appXml = isAppXml(cTMime);

457             boolean textXml = isTextXml(cTMime);

458             if (appXml || textXml) {

459                 if (cTEnc==null) {

460                     if (appXml) {

461                         encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);

462                     }

463                     else {

464                         encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;

465                     }

466                 }

467                 else

468                 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {

469                     throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),

470                                                  cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);

471                 }

472                 else

473                 if (cTEnc.equals(UTF_16)) {

474                     if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {

475                         encoding = bomEnc;

476                     }

477                     else {

478                         throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),

479                                                      cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);

480                     }

481                 }

482                 else {

483                     encoding = cTEnc;

484                 }

485             }

486             else {

487                 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),

488                                              cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);

489             }

490         }

491         return encoding;

492     }

493 

494     // returns MIME type or NULL if httpContentType is NULL

495     private static String getContentTypeMime(String httpContentType) {

496         String mime = null;

497         if (httpContentType!=null) {

498             int i = httpContentType.indexOf(";");

499             mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();

500         }

501         return mime;

502     }

503 

504     private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");

505 

506     // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL

507     private static String getContentTypeEncoding(String httpContentType) {

508         String encoding = null;

509         if (httpContentType!=null) {

510             int i = httpContentType.indexOf(";");

511             if (i>-1) {

512                 String postMime = httpContentType.substring(i+1);

513                 Matcher m = CHARSET_PATTERN.matcher(postMime);

514                 encoding = (m.find()) ? m.group(1) : null;

515                 encoding = (encoding!=null) ? encoding.toUpperCase() : null;

516             }

517             if (encoding != null &&

518                     ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||

519                      (encoding.startsWith("'") && encoding.endsWith("'"))

520                     )) {

521                 encoding = encoding.substring(1, encoding.length() - 1);

522             }

523         }

524         return encoding;

525     }

526 

527     // returns the BOM in the stream, NULL if not present,

528     // if there was BOM the in the stream it is consumed

529     private static String getBOMEncoding(BufferedInputStream is) throws IOException {

530         String encoding = null;

531         int[] bytes = new int[3];

532         is.mark(3);

533         bytes[0] = is.read();

534         bytes[1] = is.read();

535         bytes[2] = is.read();

536 

537         if (bytes[0] == 0xFE && bytes[1] == 0xFF) {

538             encoding = UTF_16BE;

539             is.reset();

540             is.read();

541             is.read();

542         }

543         else

544         if (bytes[0] == 0xFF && bytes[1] == 0xFE) {

545             encoding = UTF_16LE;

546             is.reset();

547             is.read();

548             is.read();

549         }

550         else

551         if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {

552             encoding = UTF_8;

553         }

554         else {

555             is.reset();

556         }

557         return encoding;

558     }

559 

560     // returns the best guess for the encoding by looking the first bytes of the stream, '<?'

561     private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {

562         String encoding = null;

563         int[] bytes = new int[4];

564         is.mark(4);

565         bytes[0] = is.read();

566         bytes[1] = is.read();

567         bytes[2] = is.read();

568         bytes[3] = is.read();

569         is.reset();

570 

571         if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {

572                 encoding = UTF_16BE;

573         }

574         else

575         if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {

576                 encoding = UTF_16LE;

577         }

578         else

579         if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {

580             encoding = UTF_8;

581         }

582         return encoding;

583     }

584 

585 

586     private static final Pattern ENCODING_PATTERN =

587         Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);

588 

589     // returns the encoding declared in the <?xml encoding=...?>,  NULL if none

590     private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {

591         String encoding = null;

592         if (guessedEnc!=null) {

593             byte[] bytes = new byte[BUFFER_SIZE];

594             is.mark(BUFFER_SIZE);

595             int offset = 0;

596             int max = BUFFER_SIZE;

597             int c = is.read(bytes,offset,max);

598             int firstGT = -1;

599             while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {

600                 offset += c;

601                 max -= c;

602                 c = is.read(bytes,offset,max);

603                 firstGT = new String(bytes, 0, offset).indexOf(">");

604             }

605             if (firstGT == -1) {

606                 if (c == -1) {

607                     throw new IOException("Unexpected end of XML stream");

608                 }

609                 else {

610                     throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");

611                 }

612             }

613             int bytesRead = offset;

614             if (bytesRead>0) {

615                 is.reset();

616                 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);

617                 BufferedReader bReader = new BufferedReader(reader);

618                 StringBuffer prolog = new StringBuffer();

619                 String line = bReader.readLine();

620                 while (line != null) {

621                     prolog.append(line);

622                     line = bReader.readLine();

623                 }

624                 Matcher m = ENCODING_PATTERN.matcher(prolog);

625                 if (m.find()) {

626                     encoding = m.group(1).toUpperCase();

627                     encoding = encoding.substring(1,encoding.length()-1);

628                 }

629             }

630         }

631         return encoding;

632     }

633 

634     // indicates if the MIME type belongs to the APPLICATION XML family

635     private static boolean isAppXml(String mime) {

636         return mime!=null &&

637                (mime.equals("application/xml") ||

638                 mime.equals("application/xml-dtd") ||

639                 mime.equals("application/xml-external-parsed-entity") ||

640                 (mime.startsWith("application/") && mime.endsWith("+xml")));

641     }

642 

643     // indicates if the MIME type belongs to the TEXT XML family

644     private static boolean isTextXml(String mime) {

645         return mime!=null &&

646                (mime.equals("text/xml") ||

647                 mime.equals("text/xml-external-parsed-entity") ||

648                 (mime.startsWith("text/") && mime.endsWith("+xml")));

649     }

650 

651     private static final MessageFormat RAW_EX_1 = new MessageFormat(

652             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");

653 

654     private static final MessageFormat RAW_EX_2 = new MessageFormat(

655             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");

656 

657     private static final MessageFormat HTTP_EX_1 = new MessageFormat(

658             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");

659 

660     private static final MessageFormat HTTP_EX_2 = new MessageFormat(

661             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");

662 

663     private static final MessageFormat HTTP_EX_3 = new MessageFormat(

664             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");

665 

666 }