001 /*
002 * Copyright 2004 Sun Microsystems, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 */
017 package com.sun.syndication.io;
018
019 import java.io.*;
020 import java.net.URL;
021 import java.net.URLConnection;
022 import java.net.HttpURLConnection;
023 import java.util.regex.Pattern;
024 import java.util.regex.Matcher;
025 import java.text.MessageFormat;
026
027 /**
028 * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out
029 * the charset encoding of the XML document within the stream.
030 * <p>
031 * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
032 * character stream.
033 * <p>
034 * All this has to be done without consuming characters from the stream, if not the XML parser
035 * will not recognized the document as a valid XML. This is not 100% true, but it's close enough
036 * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all
037 * parsers).
038 * <p>
039 * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and
040 * HTTP streams by offering a wide set of constructors.
041 * <P>
042 * By default the charset encoding detection is lenient, the constructor with the lenient flag
043 * can be used for an script (following HTTP MIME and XML specifications).
044 * All this is nicely explained by Mark Pilgrim in his blog,
045 * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">
046 * Determining the character encoding of a feed</a>.
047 * <p>
048 * @author Alejandro Abdelnur
049 * @version revision 1.18 taken on 2008-03-06 from Rome (see
050 * https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java)
051 */
052 public class XmlReader extends Reader {
053 private static final int BUFFER_SIZE = 4096;
054
055 private static final String UTF_8 = "UTF-8";
056 private static final String US_ASCII = "US-ASCII";
057 private static final String UTF_16BE = "UTF-16BE";
058 private static final String UTF_16LE = "UTF-16LE";
059 private static final String UTF_16 = "UTF-16";
060
061 private static String _staticDefaultEncoding = null;
062
063 private Reader _reader;
064 private String _encoding;
065 private String _defaultEncoding;
066
067 /**
068 * Sets the default encoding to use if none is set in HTTP content-type,
069 * XML prolog and the rules based on content-type are not adequate.
070 * <p/>
071 * If it is set to NULL the content-type based rules are used.
072 * <p/>
073 * By default it is NULL.
074 * <p/>
075 *
076 * @param encoding charset encoding to default to.
077 */
078 public static void setDefaultEncoding(String encoding) {
079 _staticDefaultEncoding = encoding;
080 }
081
082 /**
083 * Returns the default encoding to use if none is set in HTTP content-type,
084 * XML prolog and the rules based on content-type are not adequate.
085 * <p/>
086 * If it is NULL the content-type based rules are used.
087 * <p/>
088 *
089 * @return the default encoding to use.
090 */
091 public static String getDefaultEncoding() {
092 return _staticDefaultEncoding;
093 }
094
095 /**
096 * Creates a Reader for a File.
097 * <p>
098 * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
099 * missing defaults to UTF-8.
100 * <p>
101 * It does a lenient charset encoding detection, check the constructor with the lenient parameter
102 * for details.
103 * <p>
104 * @param file File to create a Reader from.
105 * @throws IOException thrown if there is a problem reading the file.
106 *
107 */
108 public XmlReader(File file) throws IOException {
109 this(new FileInputStream(file));
110 }
111
112 /**
113 * Creates a Reader for a raw InputStream.
114 * <p>
115 * It follows the same logic used for files.
116 * <p>
117 * It does a lenient charset encoding detection, check the constructor with the lenient parameter
118 * for details.
119 * <p>
120 * @param is InputStream to create a Reader from.
121 * @throws IOException thrown if there is a problem reading the stream.
122 *
123 */
124 public XmlReader(InputStream is) throws IOException {
125 this(is,true);
126 }
127
128 /**
129 * Creates a Reader for a raw InputStream.
130 * <p>
131 * It follows the same logic used for files.
132 * <p>
133 * If lenient detection is indicated and the detection above fails as per specifications it then attempts
134 * the following:
135 * <p>
136 * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
137 * <p>
138 * Else if the XML prolog had a charset encoding that encoding is used.
139 * <p>
140 * Else if the content type had a charset encoding that encoding is used.
141 * <p>
142 * Else 'UTF-8' is used.
143 * <p>
144 * If lenient detection is indicated an XmlReaderException is never thrown.
145 * <p>
146 * @param is InputStream to create a Reader from.
147 * @param lenient indicates if the charset encoding detection should be relaxed.
148 * @throws IOException thrown if there is a problem reading the stream.
149 * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
150 *
151 */
152 public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {
153 _defaultEncoding = _staticDefaultEncoding;
154 try {
155 doRawStream(is,lenient);
156 }
157 catch (XmlReaderException ex) {
158 if (!lenient) {
159 throw ex;
160 }
161 else {
162 doLenientDetection(null,ex);
163 }
164 }
165 }
166
167 /**
168 * Creates a Reader using the InputStream of a URL.
169 * <p>
170 * If the URL is not of type HTTP and there is not 'content-type' header in the fetched
171 * data it uses the same logic used for Files.
172 * <p>
173 * If the URL is a HTTP Url or there is a 'content-type' header in the fetched
174 * data it uses the same logic used for an InputStream with content-type.
175 * <p>
176 * It does a lenient charset encoding detection, check the constructor with the lenient parameter
177 * for details.
178 * <p>
179 * @param url URL to create a Reader from.
180 * @throws IOException thrown if there is a problem reading the stream of the URL.
181 *
182 */
183 public XmlReader(URL url) throws IOException {
184 this(url.openConnection());
185 }
186
187 /**
188 * Creates a Reader using the InputStream of a URLConnection.
189 * <p>
190 * If the URLConnection is not of type HttpURLConnection and there is not
191 * 'content-type' header in the fetched data it uses the same logic used for files.
192 * <p>
193 * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched
194 * data it uses the same logic used for an InputStream with content-type.
195 * <p>
196 * It does a lenient charset encoding detection, check the constructor with the lenient parameter
197 * for details.
198 * <p>
199 * @param conn URLConnection to create a Reader from.
200 * @throws IOException thrown if there is a problem reading the stream of the URLConnection.
201 *
202 */
203 public XmlReader(URLConnection conn) throws IOException {
204 _defaultEncoding = _staticDefaultEncoding;
205 boolean lenient = true;
206 if (conn instanceof HttpURLConnection) {
207 try {
208 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
209 }
210 catch (XmlReaderException ex) {
211 doLenientDetection(conn.getContentType(),ex);
212 }
213 }
214 else
215 if (conn.getContentType()!=null) {
216 try {
217 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
218 }
219 catch (XmlReaderException ex) {
220 doLenientDetection(conn.getContentType(),ex);
221 }
222 }
223 else {
224 try {
225 doRawStream(conn.getInputStream(),lenient);
226 }
227 catch (XmlReaderException ex) {
228 doLenientDetection(null,ex);
229 }
230 }
231 }
232
233 /**
234 * Creates a Reader using an InputStream an the associated content-type header.
235 * <p>
236 * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
237 * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
238 * prolog encoding uses the default encoding mandated by the content-type MIME type.
239 * <p>
240 * It does a lenient charset encoding detection, check the constructor with the lenient parameter
241 * for details.
242 * <p>
243 * @param is InputStream to create the reader from.
244 * @param httpContentType content-type header to use for the resolution of the charset encoding.
245 * @throws IOException thrown if there is a problem reading the file.
246 *
247 */
248 public XmlReader(InputStream is,String httpContentType) throws IOException {
249 this(is,httpContentType,true);
250 }
251
252 /**
253 * Creates a Reader using an InputStream an the associated content-type header. This constructor is
254 * lenient regarding the encoding detection.
255 * <p>
256 * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
257 * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
258 * prolog encoding uses the default encoding mandated by the content-type MIME type.
259 * <p>
260 * If lenient detection is indicated and the detection above fails as per specifications it then attempts
261 * the following:
262 * <p>
263 * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
264 * <p>
265 * Else if the XML prolog had a charset encoding that encoding is used.
266 * <p>
267 * Else if the content type had a charset encoding that encoding is used.
268 * <p>
269 * Else 'UTF-8' is used.
270 * <p>
271 * If lenient detection is indicated an XmlReaderException is never thrown.
272 * <p>
273 * @param is InputStream to create the reader from.
274 * @param httpContentType content-type header to use for the resolution of the charset encoding.
275 * @param lenient indicates if the charset encoding detection should be relaxed.
276 * @throws IOException thrown if there is a problem reading the file.
277 * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
278 *
279 */
280 public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)
281 throws IOException, XmlReaderException {
282 _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;
283 try {
284 doHttpStream(is,httpContentType,lenient);
285 }
286 catch (XmlReaderException ex) {
287 if (!lenient) {
288 throw ex;
289 }
290 else {
291 doLenientDetection(httpContentType,ex);
292 }
293 }
294 }
295
296 /**
297 * Creates a Reader using an InputStream an the associated content-type header. This constructor is
298 * lenient regarding the encoding detection.
299 * <p>
300 * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
301 * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
302 * prolog encoding uses the default encoding mandated by the content-type MIME type.
303 * <p>
304 * If lenient detection is indicated and the detection above fails as per specifications it then attempts
305 * the following:
306 * <p>
307 * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
308 * <p>
309 * Else if the XML prolog had a charset encoding that encoding is used.
310 * <p>
311 * Else if the content type had a charset encoding that encoding is used.
312 * <p>
313 * Else 'UTF-8' is used.
314 * <p>
315 * If lenient detection is indicated an XmlReaderException is never thrown.
316 * <p>
317 * @param is InputStream to create the reader from.
318 * @param httpContentType content-type header to use for the resolution of the charset encoding.
319 * @param lenient indicates if the charset encoding detection should be relaxed.
320 * @throws IOException thrown if there is a problem reading the file.
321 * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
322 *
323 */
324 public XmlReader(InputStream is, String httpContentType, boolean lenient)
325 throws IOException, XmlReaderException {
326 this(is, httpContentType, lenient, null);
327 }
328
329 private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {
330 if (httpContentType!=null) {
331 if (httpContentType.startsWith("text/html")) {
332 httpContentType = httpContentType.substring("text/html".length());
333 httpContentType = "text/xml" + httpContentType;
334 try {
335 doHttpStream(ex.getInputStream(),httpContentType,true);
336 ex = null;
337 }
338 catch (XmlReaderException ex2) {
339 ex = ex2;
340 }
341 }
342 }
343 if (ex!=null) {
344 String encoding = ex.getXmlEncoding();
345 if (encoding==null) {
346 encoding = ex.getContentTypeEncoding();
347 }
348 if (encoding==null) {
349 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
350 }
351 prepareReader(ex.getInputStream(),encoding);
352 }
353 }
354
355 /**
356 * Returns the charset encoding of the XmlReader.
357 * <p>
358 * @return charset encoding.
359 *
360 */
361 public String getEncoding() {
362 return _encoding;
363 }
364
365 public int read(char[] buf,int offset,int len) throws IOException {
366 return _reader.read(buf,offset,len);
367 }
368
369 /**
370 * Closes the XmlReader stream.
371 * <p>
372 * @throws IOException thrown if there was a problem closing the stream.
373 *
374 */
375 public void close() throws IOException {
376 _reader.close();
377 }
378
379 private void doRawStream(InputStream is,boolean lenient) throws IOException {
380 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
381 String bomEnc = getBOMEncoding(pis);
382 String xmlGuessEnc = getXMLGuessEncoding(pis);
383 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
384 String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
385 prepareReader(pis,encoding);
386 }
387
388 private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {
389 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
390 String cTMime = getContentTypeMime(httpContentType);
391 String cTEnc = getContentTypeEncoding(httpContentType);
392 String bomEnc = getBOMEncoding(pis);
393 String xmlGuessEnc = getXMLGuessEncoding(pis);
394 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
395 String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);
396 prepareReader(pis,encoding);
397 }
398
399 private void prepareReader(InputStream is,String encoding) throws IOException {
400 _reader = new InputStreamReader(is,encoding);
401 _encoding = encoding;
402 }
403
404 // InputStream is passed for XmlReaderException creation only
405 private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {
406 String encoding;
407 if (bomEnc==null) {
408 if (xmlGuessEnc==null || xmlEnc==null) {
409 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
410 }
411 else
412 if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {
413 encoding = xmlGuessEnc;
414 }
415 else {
416 encoding = xmlEnc;
417 }
418 }
419 else
420 if (bomEnc.equals(UTF_8)) {
421 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {
422 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
423 bomEnc,xmlGuessEnc,xmlEnc,is);
424 }
425 if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {
426 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
427 bomEnc,xmlGuessEnc,xmlEnc,is);
428 }
429 encoding = UTF_8;
430 }
431 else
432 if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {
433 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {
434 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));
435 }
436 if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {
437 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
438 bomEnc,xmlGuessEnc,xmlEnc,is);
439 }
440 encoding =bomEnc;
441 }
442 else {
443 throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
444 bomEnc,xmlGuessEnc,xmlEnc,is);
445 }
446 return encoding;
447 }
448
449 // InputStream is passed for XmlReaderException creation only
450 private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {
451 String encoding;
452 if (lenient & xmlEnc!=null) {
453 encoding = xmlEnc;
454 }
455 else {
456 boolean appXml = isAppXml(cTMime);
457 boolean textXml = isTextXml(cTMime);
458 if (appXml || textXml) {
459 if (cTEnc==null) {
460 if (appXml) {
461 encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);
462 }
463 else {
464 encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;
465 }
466 }
467 else
468 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {
469 throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
470 cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
471 }
472 else
473 if (cTEnc.equals(UTF_16)) {
474 if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {
475 encoding = bomEnc;
476 }
477 else {
478 throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
479 cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
480 }
481 }
482 else {
483 encoding = cTEnc;
484 }
485 }
486 else {
487 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
488 cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
489 }
490 }
491 return encoding;
492 }
493
494 // returns MIME type or NULL if httpContentType is NULL
495 private static String getContentTypeMime(String httpContentType) {
496 String mime = null;
497 if (httpContentType!=null) {
498 int i = httpContentType.indexOf(";");
499 mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();
500 }
501 return mime;
502 }
503
504 private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
505
506 // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL
507 private static String getContentTypeEncoding(String httpContentType) {
508 String encoding = null;
509 if (httpContentType!=null) {
510 int i = httpContentType.indexOf(";");
511 if (i>-1) {
512 String postMime = httpContentType.substring(i+1);
513 Matcher m = CHARSET_PATTERN.matcher(postMime);
514 encoding = (m.find()) ? m.group(1) : null;
515 encoding = (encoding!=null) ? encoding.toUpperCase() : null;
516 }
517 if (encoding != null &&
518 ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||
519 (encoding.startsWith("'") && encoding.endsWith("'"))
520 )) {
521 encoding = encoding.substring(1, encoding.length() - 1);
522 }
523 }
524 return encoding;
525 }
526
527 // returns the BOM in the stream, NULL if not present,
528 // if there was BOM the in the stream it is consumed
529 private static String getBOMEncoding(BufferedInputStream is) throws IOException {
530 String encoding = null;
531 int[] bytes = new int[3];
532 is.mark(3);
533 bytes[0] = is.read();
534 bytes[1] = is.read();
535 bytes[2] = is.read();
536
537 if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
538 encoding = UTF_16BE;
539 is.reset();
540 is.read();
541 is.read();
542 }
543 else
544 if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
545 encoding = UTF_16LE;
546 is.reset();
547 is.read();
548 is.read();
549 }
550 else
551 if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
552 encoding = UTF_8;
553 }
554 else {
555 is.reset();
556 }
557 return encoding;
558 }
559
560 // returns the best guess for the encoding by looking the first bytes of the stream, '<?'
561 private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {
562 String encoding = null;
563 int[] bytes = new int[4];
564 is.mark(4);
565 bytes[0] = is.read();
566 bytes[1] = is.read();
567 bytes[2] = is.read();
568 bytes[3] = is.read();
569 is.reset();
570
571 if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {
572 encoding = UTF_16BE;
573 }
574 else
575 if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {
576 encoding = UTF_16LE;
577 }
578 else
579 if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {
580 encoding = UTF_8;
581 }
582 return encoding;
583 }
584
585
586 private static final Pattern ENCODING_PATTERN =
587 Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
588
589 // returns the encoding declared in the <?xml encoding=...?>, NULL if none
590 private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {
591 String encoding = null;
592 if (guessedEnc!=null) {
593 byte[] bytes = new byte[BUFFER_SIZE];
594 is.mark(BUFFER_SIZE);
595 int offset = 0;
596 int max = BUFFER_SIZE;
597 int c = is.read(bytes,offset,max);
598 int firstGT = -1;
599 while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {
600 offset += c;
601 max -= c;
602 c = is.read(bytes,offset,max);
603 firstGT = new String(bytes, 0, offset).indexOf(">");
604 }
605 if (firstGT == -1) {
606 if (c == -1) {
607 throw new IOException("Unexpected end of XML stream");
608 }
609 else {
610 throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");
611 }
612 }
613 int bytesRead = offset;
614 if (bytesRead>0) {
615 is.reset();
616 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);
617 BufferedReader bReader = new BufferedReader(reader);
618 StringBuffer prolog = new StringBuffer();
619 String line = bReader.readLine();
620 while (line != null) {
621 prolog.append(line);
622 line = bReader.readLine();
623 }
624 Matcher m = ENCODING_PATTERN.matcher(prolog);
625 if (m.find()) {
626 encoding = m.group(1).toUpperCase();
627 encoding = encoding.substring(1,encoding.length()-1);
628 }
629 }
630 }
631 return encoding;
632 }
633
634 // indicates if the MIME type belongs to the APPLICATION XML family
635 private static boolean isAppXml(String mime) {
636 return mime!=null &&
637 (mime.equals("application/xml") ||
638 mime.equals("application/xml-dtd") ||
639 mime.equals("application/xml-external-parsed-entity") ||
640 (mime.startsWith("application/") && mime.endsWith("+xml")));
641 }
642
643 // indicates if the MIME type belongs to the TEXT XML family
644 private static boolean isTextXml(String mime) {
645 return mime!=null &&
646 (mime.equals("text/xml") ||
647 mime.equals("text/xml-external-parsed-entity") ||
648 (mime.startsWith("text/") && mime.endsWith("+xml")));
649 }
650
651 private static final MessageFormat RAW_EX_1 = new MessageFormat(
652 "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");
653
654 private static final MessageFormat RAW_EX_2 = new MessageFormat(
655 "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");
656
657 private static final MessageFormat HTTP_EX_1 = new MessageFormat(
658 "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");
659
660 private static final MessageFormat HTTP_EX_2 = new MessageFormat(
661 "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");
662
663 private static final MessageFormat HTTP_EX_3 = new MessageFormat(
664 "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");
665
666 }
|