View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io;
18  
19  import java.io.*;
20  import java.net.URL;
21  import java.net.URLConnection;
22  import java.net.HttpURLConnection;
23  import java.util.regex.Pattern;
24  import java.util.regex.Matcher;
25  import java.text.MessageFormat;
26  
27  /**
28   * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out
29   * the charset encoding of the XML document within the stream.
30   * <p>
31   * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
32   * character stream.
33   * <p>
34   * All this has to be done without consuming characters from the stream, if not the XML parser
35   * will not recognized the document as a valid XML. This is not 100% true, but it's close enough
36   * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all
37   * parsers).
38   * <p>
39   * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and
40   * HTTP streams by offering a wide set of constructors.
41   * <P>
42   * By default the charset encoding detection is lenient, the constructor with the lenient flag
43   * can be used for an script (following HTTP MIME and XML specifications).
44   * All this is nicely explained by Mark Pilgrim in his blog,
45   * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">
46   * Determining the character encoding of a feed</a>.
47   * <p>
48   * @author Alejandro Abdelnur
49   * @version revision 1.18 taken on 2008-03-06 from Rome (see
50   *          https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java)
51   */
52  public class XmlReader extends Reader {
53      private static final int BUFFER_SIZE = 4096;
54  
55      private static final String UTF_8 = "UTF-8";
56      private static final String US_ASCII = "US-ASCII";
57      private static final String UTF_16BE = "UTF-16BE";
58      private static final String UTF_16LE = "UTF-16LE";
59      private static final String UTF_16 = "UTF-16";
60  
61      private static String _staticDefaultEncoding = null;
62  
63      private Reader _reader;
64      private String _encoding;
65      private String _defaultEncoding;
66  
67      /**
68       * Sets the default encoding to use if none is set in HTTP content-type,
69       * XML prolog and the rules based on content-type are not adequate.
70       * <p/>
71       * If it is set to NULL the content-type based rules are used.
72       * <p/>
73       * By default it is NULL.
74       * <p/>
75       *
76       * @param encoding charset encoding to default to.
77       */
78      public static void setDefaultEncoding(String encoding) {
79          _staticDefaultEncoding = encoding;
80      }
81  
82      /**
83       * Returns the default encoding to use if none is set in HTTP content-type,
84       * XML prolog and the rules based on content-type are not adequate.
85       * <p/>
86       * If it is NULL the content-type based rules are used.
87       * <p/>
88       *
89       * @return the default encoding to use.
90       */
91      public static String getDefaultEncoding() {
92          return _staticDefaultEncoding;
93      }
94  
95      /**
96       * Creates a Reader for a File.
97       * <p>
98       * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
99       * missing defaults to UTF-8.
100      * <p>
101      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
102      * for details.
103      * <p>
104      * @param file File to create a Reader from.
105      * @throws IOException thrown if there is a problem reading the file.
106      *
107      */
108     public XmlReader(File file) throws IOException {
109         this(new FileInputStream(file));
110     }
111 
112     /**
113      * Creates a Reader for a raw InputStream.
114      * <p>
115      * It follows the same logic used for files.
116      * <p>
117      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
118      * for details.
119      * <p>
120      * @param is InputStream to create a Reader from.
121      * @throws IOException thrown if there is a problem reading the stream.
122      *
123      */
124     public XmlReader(InputStream is) throws IOException {
125         this(is,true);
126     }
127 
128     /**
129      * Creates a Reader for a raw InputStream.
130      * <p>
131      * It follows the same logic used for files.
132      * <p>
133      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
134      * the following:
135      * <p>
136      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
137      * <p>
138      * Else if the XML prolog had a charset encoding that encoding is used.
139      * <p>
140      * Else if the content type had a charset encoding that encoding is used.
141      * <p>
142      * Else 'UTF-8' is used.
143      * <p>
144      * If lenient detection is indicated an XmlReaderException is never thrown.
145      * <p>
146      * @param is InputStream to create a Reader from.
147      * @param lenient indicates if the charset encoding detection should be relaxed.
148      * @throws IOException thrown if there is a problem reading the stream.
149      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
150      *
151      */
152     public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {
153         _defaultEncoding = _staticDefaultEncoding;
154         try {
155             doRawStream(is,lenient);
156         }
157         catch (XmlReaderException ex) {
158             if (!lenient) {
159                 throw ex;
160             }
161             else {
162                 doLenientDetection(null,ex);
163             }
164         }
165     }
166 
167     /**
168      * Creates a Reader using the InputStream of a URL.
169      * <p>
170      * If the URL is not of type HTTP and there is not 'content-type' header in the fetched
171      * data it uses the same logic used for Files.
172      * <p>
173      * If the URL is a HTTP Url or there is a 'content-type' header in the fetched
174      * data it uses the same logic used for an InputStream with content-type.
175      * <p>
176      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
177      * for details.
178      * <p>
179      * @param url URL to create a Reader from.
180      * @throws IOException thrown if there is a problem reading the stream of the URL.
181      *
182      */
183     public XmlReader(URL url) throws IOException {
184         this(url.openConnection());
185     }
186 
187     /**
188      * Creates a Reader using the InputStream of a URLConnection.
189      * <p>
190      * If the URLConnection is not of type HttpURLConnection and there is not
191      * 'content-type' header in the fetched data it uses the same logic used for files.
192      * <p>
193      * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched
194      * data it uses the same logic used for an InputStream with content-type.
195      * <p>
196      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
197      * for details.
198      * <p>
199      * @param conn URLConnection to create a Reader from.
200      * @throws IOException thrown if there is a problem reading the stream of the URLConnection.
201      *
202      */
203     public XmlReader(URLConnection conn) throws IOException {
204         _defaultEncoding = _staticDefaultEncoding;
205         boolean lenient = true;
206         if (conn instanceof HttpURLConnection) {
207             try {
208                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
209             }
210             catch (XmlReaderException ex) {
211                 doLenientDetection(conn.getContentType(),ex);
212             }
213         }
214         else
215         if (conn.getContentType()!=null) {
216             try {
217                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
218             }
219             catch (XmlReaderException ex) {
220                 doLenientDetection(conn.getContentType(),ex);
221             }
222         }
223         else {
224             try {
225                 doRawStream(conn.getInputStream(),lenient);
226             }
227             catch (XmlReaderException ex) {
228                 doLenientDetection(null,ex);
229             }
230         }
231     }
232 
233     /**
234      * Creates a Reader using an InputStream an the associated content-type header.
235      * <p>
236      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
237      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
238      * prolog encoding uses the default encoding mandated by the content-type MIME type.
239      * <p>
240      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
241      * for details.
242      * <p>
243      * @param is InputStream to create the reader from.
244      * @param httpContentType content-type header to use for the resolution of the charset encoding.
245      * @throws IOException thrown if there is a problem reading the file.
246      *
247      */
248     public XmlReader(InputStream is,String httpContentType) throws IOException {
249         this(is,httpContentType,true);
250     }
251 
252     /**
253      * Creates a Reader using an InputStream an the associated content-type header. This constructor is
254      * lenient regarding the encoding detection.
255      * <p>
256      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
257      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
258      * prolog encoding uses the default encoding mandated by the content-type MIME type.
259      * <p>
260      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
261      * the following:
262      * <p>
263      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
264      * <p>
265      * Else if the XML prolog had a charset encoding that encoding is used.
266      * <p>
267      * Else if the content type had a charset encoding that encoding is used.
268      * <p>
269      * Else 'UTF-8' is used.
270      * <p>
271      * If lenient detection is indicated an XmlReaderException is never thrown.
272      * <p>
273      * @param is InputStream to create the reader from.
274      * @param httpContentType content-type header to use for the resolution of the charset encoding.
275      * @param lenient indicates if the charset encoding detection should be relaxed.
276      * @throws IOException thrown if there is a problem reading the file.
277      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
278      *
279      */
280     public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)
281         throws IOException, XmlReaderException {
282         _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;
283         try {
284             doHttpStream(is,httpContentType,lenient);
285         }
286         catch (XmlReaderException ex) {
287             if (!lenient) {
288                 throw ex;
289             }
290             else {
291                 doLenientDetection(httpContentType,ex);
292             }
293         }
294     }
295 
296     /**
297      * Creates a Reader using an InputStream an the associated content-type header. This constructor is
298      * lenient regarding the encoding detection.
299      * <p>
300      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
301      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
302      * prolog encoding uses the default encoding mandated by the content-type MIME type.
303      * <p>
304      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
305      * the following:
306      * <p>
307      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
308      * <p>
309      * Else if the XML prolog had a charset encoding that encoding is used.
310      * <p>
311      * Else if the content type had a charset encoding that encoding is used.
312      * <p>
313      * Else 'UTF-8' is used.
314      * <p>
315      * If lenient detection is indicated an XmlReaderException is never thrown.
316      * <p>
317      * @param is InputStream to create the reader from.
318      * @param httpContentType content-type header to use for the resolution of the charset encoding.
319      * @param lenient indicates if the charset encoding detection should be relaxed.
320      * @throws IOException thrown if there is a problem reading the file.
321      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
322      *
323      */
324     public XmlReader(InputStream is, String httpContentType, boolean lenient)
325         throws IOException, XmlReaderException {
326         this(is, httpContentType, lenient, null);
327     }
328 
329     private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {
330         if (httpContentType!=null) {
331             if (httpContentType.startsWith("text/html")) {
332                 httpContentType = httpContentType.substring("text/html".length());
333                 httpContentType = "text/xml" + httpContentType;
334                 try {
335                     doHttpStream(ex.getInputStream(),httpContentType,true);
336                     ex = null;
337                 }
338                 catch (XmlReaderException ex2) {
339                     ex = ex2;
340                 }
341             }
342         }
343         if (ex!=null) {
344             String encoding = ex.getXmlEncoding();
345             if (encoding==null) {
346                 encoding = ex.getContentTypeEncoding();
347             }
348             if (encoding==null) {
349               encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
350             }
351             prepareReader(ex.getInputStream(),encoding);
352         }
353     }
354 
355     /**
356      * Returns the charset encoding of the XmlReader.
357      * <p>
358      * @return charset encoding.
359      *
360      */
361     public String getEncoding() {
362         return _encoding;
363     }
364 
365     public int read(char[] buf,int offset,int len) throws IOException {
366         return _reader.read(buf,offset,len);
367     }
368 
369     /**
370      * Closes the XmlReader stream.
371      * <p>
372      * @throws IOException thrown if there was a problem closing the stream.
373      *
374      */
375     public void close() throws IOException {
376         _reader.close();
377     }
378 
379     private void doRawStream(InputStream is,boolean lenient) throws IOException {
380         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
381         String bomEnc = getBOMEncoding(pis);
382         String xmlGuessEnc =  getXMLGuessEncoding(pis);
383         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
384         String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
385         prepareReader(pis,encoding);
386     }
387 
388     private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {
389         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
390         String cTMime = getContentTypeMime(httpContentType);
391         String cTEnc  = getContentTypeEncoding(httpContentType);
392         String bomEnc = getBOMEncoding(pis);
393         String xmlGuessEnc =  getXMLGuessEncoding(pis);
394         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
395         String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);
396         prepareReader(pis,encoding);
397     }
398 
399     private void prepareReader(InputStream is,String encoding) throws IOException {
400         _reader = new InputStreamReader(is,encoding);
401         _encoding = encoding;
402     }
403 
404     // InputStream is passed for XmlReaderException creation only
405     private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {
406         String encoding;
407         if (bomEnc==null) {
408             if (xmlGuessEnc==null || xmlEnc==null) {
409                 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
410             }
411             else
412             if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {
413                 encoding = xmlGuessEnc;
414             }
415             else {
416                 encoding = xmlEnc;
417             }
418         }
419         else
420         if (bomEnc.equals(UTF_8)) {
421             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {
422                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
423                                              bomEnc,xmlGuessEnc,xmlEnc,is);
424             }
425             if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {
426                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
427                                              bomEnc,xmlGuessEnc,xmlEnc,is);
428             }
429             encoding = UTF_8;
430         }
431         else
432         if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {
433             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {
434                 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));
435             }
436             if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {
437                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
438                                              bomEnc,xmlGuessEnc,xmlEnc,is);
439             }
440             encoding =bomEnc;
441         }
442         else {
443             throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
444                                          bomEnc,xmlGuessEnc,xmlEnc,is);
445         }
446         return encoding;
447     }
448 
449     // InputStream is passed for XmlReaderException creation only
450     private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {
451         String encoding;
452         if (lenient & xmlEnc!=null) {
453             encoding = xmlEnc;
454         }
455         else {
456             boolean appXml = isAppXml(cTMime);
457             boolean textXml = isTextXml(cTMime);
458             if (appXml || textXml) {
459                 if (cTEnc==null) {
460                     if (appXml) {
461                         encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);
462                     }
463                     else {
464                         encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;
465                     }
466                 }
467                 else
468                 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {
469                     throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
470                                                  cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
471                 }
472                 else
473                 if (cTEnc.equals(UTF_16)) {
474                     if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {
475                         encoding = bomEnc;
476                     }
477                     else {
478                         throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
479                                                      cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
480                     }
481                 }
482                 else {
483                     encoding = cTEnc;
484                 }
485             }
486             else {
487                 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
488                                              cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
489             }
490         }
491         return encoding;
492     }
493 
494     // returns MIME type or NULL if httpContentType is NULL
495     private static String getContentTypeMime(String httpContentType) {
496         String mime = null;
497         if (httpContentType!=null) {
498             int i = httpContentType.indexOf(";");
499             mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();
500         }
501         return mime;
502     }
503 
504     private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
505 
506     // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL
507     private static String getContentTypeEncoding(String httpContentType) {
508         String encoding = null;
509         if (httpContentType!=null) {
510             int i = httpContentType.indexOf(";");
511             if (i>-1) {
512                 String postMime = httpContentType.substring(i+1);
513                 Matcher m = CHARSET_PATTERN.matcher(postMime);
514                 encoding = (m.find()) ? m.group(1) : null;
515                 encoding = (encoding!=null) ? encoding.toUpperCase() : null;
516             }
517             if (encoding != null &&
518                     ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||
519                      (encoding.startsWith("'") && encoding.endsWith("'"))
520                     )) {
521                 encoding = encoding.substring(1, encoding.length() - 1);
522             }
523         }
524         return encoding;
525     }
526 
527     // returns the BOM in the stream, NULL if not present,
528     // if there was BOM the in the stream it is consumed
529     private static String getBOMEncoding(BufferedInputStream is) throws IOException {
530         String encoding = null;
531         int[] bytes = new int[3];
532         is.mark(3);
533         bytes[0] = is.read();
534         bytes[1] = is.read();
535         bytes[2] = is.read();
536 
537         if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
538             encoding = UTF_16BE;
539             is.reset();
540             is.read();
541             is.read();
542         }
543         else
544         if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
545             encoding = UTF_16LE;
546             is.reset();
547             is.read();
548             is.read();
549         }
550         else
551         if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
552             encoding = UTF_8;
553         }
554         else {
555             is.reset();
556         }
557         return encoding;
558     }
559 
560     // returns the best guess for the encoding by looking the first bytes of the stream, '<?'
561     private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {
562         String encoding = null;
563         int[] bytes = new int[4];
564         is.mark(4);
565         bytes[0] = is.read();
566         bytes[1] = is.read();
567         bytes[2] = is.read();
568         bytes[3] = is.read();
569         is.reset();
570 
571         if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {
572                 encoding = UTF_16BE;
573         }
574         else
575         if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {
576                 encoding = UTF_16LE;
577         }
578         else
579         if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {
580             encoding = UTF_8;
581         }
582         return encoding;
583     }
584 
585 
586     private static final Pattern ENCODING_PATTERN =
587         Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
588 
589     // returns the encoding declared in the <?xml encoding=...?>,  NULL if none
590     private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {
591         String encoding = null;
592         if (guessedEnc!=null) {
593             byte[] bytes = new byte[BUFFER_SIZE];
594             is.mark(BUFFER_SIZE);
595             int offset = 0;
596             int max = BUFFER_SIZE;
597             int c = is.read(bytes,offset,max);
598             int firstGT = -1;
599             while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {
600                 offset += c;
601                 max -= c;
602                 c = is.read(bytes,offset,max);
603                 firstGT = new String(bytes, 0, offset).indexOf(">");
604             }
605             if (firstGT == -1) {
606                 if (c == -1) {
607                     throw new IOException("Unexpected end of XML stream");
608                 }
609                 else {
610                     throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");
611                 }
612             }
613             int bytesRead = offset;
614             if (bytesRead>0) {
615                 is.reset();
616                 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);
617                 BufferedReader bReader = new BufferedReader(reader);
618                 StringBuffer prolog = new StringBuffer();
619                 String line = bReader.readLine();
620                 while (line != null) {
621                     prolog.append(line);
622                     line = bReader.readLine();
623                 }
624                 Matcher m = ENCODING_PATTERN.matcher(prolog);
625                 if (m.find()) {
626                     encoding = m.group(1).toUpperCase();
627                     encoding = encoding.substring(1,encoding.length()-1);
628                 }
629             }
630         }
631         return encoding;
632     }
633 
634     // indicates if the MIME type belongs to the APPLICATION XML family
635     private static boolean isAppXml(String mime) {
636         return mime!=null &&
637                (mime.equals("application/xml") ||
638                 mime.equals("application/xml-dtd") ||
639                 mime.equals("application/xml-external-parsed-entity") ||
640                 (mime.startsWith("application/") && mime.endsWith("+xml")));
641     }
642 
643     // indicates if the MIME type belongs to the TEXT XML family
644     private static boolean isTextXml(String mime) {
645         return mime!=null &&
646                (mime.equals("text/xml") ||
647                 mime.equals("text/xml-external-parsed-entity") ||
648                 (mime.startsWith("text/") && mime.endsWith("+xml")));
649     }
650 
651     private static final MessageFormat RAW_EX_1 = new MessageFormat(
652             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");
653 
654     private static final MessageFormat RAW_EX_2 = new MessageFormat(
655             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");
656 
657     private static final MessageFormat HTTP_EX_1 = new MessageFormat(
658             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");
659 
660     private static final MessageFormat HTTP_EX_2 = new MessageFormat(
661             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");
662 
663     private static final MessageFormat HTTP_EX_3 = new MessageFormat(
664             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");
665 
666 }