Clover Coverage Report
Coverage timestamp: Sa Aug 2 2008 13:56:27 CEST
../../../../img/srcFileCovDistChart4.png 82% of files have more coverage
191   666   106   7,35
98   389   0,55   26
26     4,08  
1    
 
  XmlReader       Line # 52 191 106 33,7% 0.33650795
 
  (30)
 
1    /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10    * Unless required by applicable law or agreed to in writing, software
11    * distributed under the License is distributed on an "AS IS" BASIS,
12    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13    * See the License for the specific language governing permissions and
14    * limitations under the License.
15    *
16    */
17    package com.sun.syndication.io;
18   
19    import java.io.*;
20    import java.net.URL;
21    import java.net.URLConnection;
22    import java.net.HttpURLConnection;
23    import java.util.regex.Pattern;
24    import java.util.regex.Matcher;
25    import java.text.MessageFormat;
26   
27    /**
28    * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out
29    * the charset encoding of the XML document within the stream.
30    * <p>
31    * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
32    * character stream.
33    * <p>
34    * All this has to be done without consuming characters from the stream, if not the XML parser
35    * will not recognized the document as a valid XML. This is not 100% true, but it's close enough
36    * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all
37    * parsers).
38    * <p>
39    * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and
40    * HTTP streams by offering a wide set of constructors.
41    * <P>
42    * By default the charset encoding detection is lenient, the constructor with the lenient flag
43    * can be used for an script (following HTTP MIME and XML specifications).
44    * All this is nicely explained by Mark Pilgrim in his blog,
45    * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">
46    * Determining the character encoding of a feed</a>.
47    * <p>
48    * @author Alejandro Abdelnur
49    * @version revision 1.18 taken on 2008-03-06 from Rome (see
50    * https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java)
51    */
 
52    public class XmlReader extends Reader {
53    private static final int BUFFER_SIZE = 4096;
54   
55    private static final String UTF_8 = "UTF-8";
56    private static final String US_ASCII = "US-ASCII";
57    private static final String UTF_16BE = "UTF-16BE";
58    private static final String UTF_16LE = "UTF-16LE";
59    private static final String UTF_16 = "UTF-16";
60   
61    private static String _staticDefaultEncoding = null;
62   
63    private Reader _reader;
64    private String _encoding;
65    private String _defaultEncoding;
66   
67    /**
68    * Sets the default encoding to use if none is set in HTTP content-type,
69    * XML prolog and the rules based on content-type are not adequate.
70    * <p/>
71    * If it is set to NULL the content-type based rules are used.
72    * <p/>
73    * By default it is NULL.
74    * <p/>
75    *
76    * @param encoding charset encoding to default to.
77    */
 
78  0 toggle public static void setDefaultEncoding(String encoding) {
79  0 _staticDefaultEncoding = encoding;
80    }
81   
82    /**
83    * Returns the default encoding to use if none is set in HTTP content-type,
84    * XML prolog and the rules based on content-type are not adequate.
85    * <p/>
86    * If it is NULL the content-type based rules are used.
87    * <p/>
88    *
89    * @return the default encoding to use.
90    */
 
91  0 toggle public static String getDefaultEncoding() {
92  0 return _staticDefaultEncoding;
93    }
94   
95    /**
96    * Creates a Reader for a File.
97    * <p>
98    * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
99    * missing defaults to UTF-8.
100    * <p>
101    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
102    * for details.
103    * <p>
104    * @param file File to create a Reader from.
105    * @throws IOException thrown if there is a problem reading the file.
106    *
107    */
 
108  61693 toggle public XmlReader(File file) throws IOException {
109  61693 this(new FileInputStream(file));
110    }
111   
112    /**
113    * Creates a Reader for a raw InputStream.
114    * <p>
115    * It follows the same logic used for files.
116    * <p>
117    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
118    * for details.
119    * <p>
120    * @param is InputStream to create a Reader from.
121    * @throws IOException thrown if there is a problem reading the stream.
122    *
123    */
 
124  61693 toggle public XmlReader(InputStream is) throws IOException {
125  61693 this(is,true);
126    }
127   
128    /**
129    * Creates a Reader for a raw InputStream.
130    * <p>
131    * It follows the same logic used for files.
132    * <p>
133    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
134    * the following:
135    * <p>
136    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
137    * <p>
138    * Else if the XML prolog had a charset encoding that encoding is used.
139    * <p>
140    * Else if the content type had a charset encoding that encoding is used.
141    * <p>
142    * Else 'UTF-8' is used.
143    * <p>
144    * If lenient detection is indicated an XmlReaderException is never thrown.
145    * <p>
146    * @param is InputStream to create a Reader from.
147    * @param lenient indicates if the charset encoding detection should be relaxed.
148    * @throws IOException thrown if there is a problem reading the stream.
149    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
150    *
151    */
 
152  61693 toggle public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {
153  61693 _defaultEncoding = _staticDefaultEncoding;
154  61693 try {
155  61693 doRawStream(is,lenient);
156    }
157    catch (XmlReaderException ex) {
158  0 if (!lenient) {
159  0 throw ex;
160    }
161    else {
162  0 doLenientDetection(null,ex);
163    }
164    }
165    }
166   
167    /**
168    * Creates a Reader using the InputStream of a URL.
169    * <p>
170    * If the URL is not of type HTTP and there is not 'content-type' header in the fetched
171    * data it uses the same logic used for Files.
172    * <p>
173    * If the URL is a HTTP Url or there is a 'content-type' header in the fetched
174    * data it uses the same logic used for an InputStream with content-type.
175    * <p>
176    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
177    * for details.
178    * <p>
179    * @param url URL to create a Reader from.
180    * @throws IOException thrown if there is a problem reading the stream of the URL.
181    *
182    */
 
183  0 toggle public XmlReader(URL url) throws IOException {
184  0 this(url.openConnection());
185    }
186   
187    /**
188    * Creates a Reader using the InputStream of a URLConnection.
189    * <p>
190    * If the URLConnection is not of type HttpURLConnection and there is not
191    * 'content-type' header in the fetched data it uses the same logic used for files.
192    * <p>
193    * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched
194    * data it uses the same logic used for an InputStream with content-type.
195    * <p>
196    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
197    * for details.
198    * <p>
199    * @param conn URLConnection to create a Reader from.
200    * @throws IOException thrown if there is a problem reading the stream of the URLConnection.
201    *
202    */
 
203  0 toggle public XmlReader(URLConnection conn) throws IOException {
204  0 _defaultEncoding = _staticDefaultEncoding;
205  0 boolean lenient = true;
206  0 if (conn instanceof HttpURLConnection) {
207  0 try {
208  0 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
209    }
210    catch (XmlReaderException ex) {
211  0 doLenientDetection(conn.getContentType(),ex);
212    }
213    }
214    else
215  0 if (conn.getContentType()!=null) {
216  0 try {
217  0 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
218    }
219    catch (XmlReaderException ex) {
220  0 doLenientDetection(conn.getContentType(),ex);
221    }
222    }
223    else {
224  0 try {
225  0 doRawStream(conn.getInputStream(),lenient);
226    }
227    catch (XmlReaderException ex) {
228  0 doLenientDetection(null,ex);
229    }
230    }
231    }
232   
233    /**
234    * Creates a Reader using an InputStream an the associated content-type header.
235    * <p>
236    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
237    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
238    * prolog encoding uses the default encoding mandated by the content-type MIME type.
239    * <p>
240    * It does a lenient charset encoding detection, check the constructor with the lenient parameter
241    * for details.
242    * <p>
243    * @param is InputStream to create the reader from.
244    * @param httpContentType content-type header to use for the resolution of the charset encoding.
245    * @throws IOException thrown if there is a problem reading the file.
246    *
247    */
 
248  0 toggle public XmlReader(InputStream is,String httpContentType) throws IOException {
249  0 this(is,httpContentType,true);
250    }
251   
252    /**
253    * Creates a Reader using an InputStream an the associated content-type header. This constructor is
254    * lenient regarding the encoding detection.
255    * <p>
256    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
257    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
258    * prolog encoding uses the default encoding mandated by the content-type MIME type.
259    * <p>
260    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
261    * the following:
262    * <p>
263    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
264    * <p>
265    * Else if the XML prolog had a charset encoding that encoding is used.
266    * <p>
267    * Else if the content type had a charset encoding that encoding is used.
268    * <p>
269    * Else 'UTF-8' is used.
270    * <p>
271    * If lenient detection is indicated an XmlReaderException is never thrown.
272    * <p>
273    * @param is InputStream to create the reader from.
274    * @param httpContentType content-type header to use for the resolution of the charset encoding.
275    * @param lenient indicates if the charset encoding detection should be relaxed.
276    * @throws IOException thrown if there is a problem reading the file.
277    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
278    *
279    */
 
280  0 toggle public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)
281    throws IOException, XmlReaderException {
282  0 _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;
283  0 try {
284  0 doHttpStream(is,httpContentType,lenient);
285    }
286    catch (XmlReaderException ex) {
287  0 if (!lenient) {
288  0 throw ex;
289    }
290    else {
291  0 doLenientDetection(httpContentType,ex);
292    }
293    }
294    }
295   
296    /**
297    * Creates a Reader using an InputStream an the associated content-type header. This constructor is
298    * lenient regarding the encoding detection.
299    * <p>
300    * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
301    * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
302    * prolog encoding uses the default encoding mandated by the content-type MIME type.
303    * <p>
304    * If lenient detection is indicated and the detection above fails as per specifications it then attempts
305    * the following:
306    * <p>
307    * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
308    * <p>
309    * Else if the XML prolog had a charset encoding that encoding is used.
310    * <p>
311    * Else if the content type had a charset encoding that encoding is used.
312    * <p>
313    * Else 'UTF-8' is used.
314    * <p>
315    * If lenient detection is indicated an XmlReaderException is never thrown.
316    * <p>
317    * @param is InputStream to create the reader from.
318    * @param httpContentType content-type header to use for the resolution of the charset encoding.
319    * @param lenient indicates if the charset encoding detection should be relaxed.
320    * @throws IOException thrown if there is a problem reading the file.
321    * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
322    *
323    */
 
324  0 toggle public XmlReader(InputStream is, String httpContentType, boolean lenient)
325    throws IOException, XmlReaderException {
326  0 this(is, httpContentType, lenient, null);
327    }
328   
 
329  0 toggle private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {
330  0 if (httpContentType!=null) {
331  0 if (httpContentType.startsWith("text/html")) {
332  0 httpContentType = httpContentType.substring("text/html".length());
333  0 httpContentType = "text/xml" + httpContentType;
334  0 try {
335  0 doHttpStream(ex.getInputStream(),httpContentType,true);
336  0 ex = null;
337    }
338    catch (XmlReaderException ex2) {
339  0 ex = ex2;
340    }
341    }
342    }
343  0 if (ex!=null) {
344  0 String encoding = ex.getXmlEncoding();
345  0 if (encoding==null) {
346  0 encoding = ex.getContentTypeEncoding();
347    }
348  0 if (encoding==null) {
349  0 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
350    }
351  0 prepareReader(ex.getInputStream(),encoding);
352    }
353    }
354   
355    /**
356    * Returns the charset encoding of the XmlReader.
357    * <p>
358    * @return charset encoding.
359    *
360    */
 
361  0 toggle public String getEncoding() {
362  0 return _encoding;
363    }
364   
 
365    toggle public int read(char[] buf,int offset,int len) throws IOException {
366    return _reader.read(buf,offset,len);
367    }
368   
369    /**
370    * Closes the XmlReader stream.
371    * <p>
372    * @throws IOException thrown if there was a problem closing the stream.
373    *
374    */
 
375  61691 toggle public void close() throws IOException {
376  61691 _reader.close();
377    }
378   
 
379  61693 toggle private void doRawStream(InputStream is,boolean lenient) throws IOException {
380  61693 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
381  61693 String bomEnc = getBOMEncoding(pis);
382  61693 String xmlGuessEnc = getXMLGuessEncoding(pis);
383  61693 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
384  61693 String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
385  61693 prepareReader(pis,encoding);
386    }
387   
 
388  0 toggle private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {
389  0 BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
390  0 String cTMime = getContentTypeMime(httpContentType);
391  0 String cTEnc = getContentTypeEncoding(httpContentType);
392  0 String bomEnc = getBOMEncoding(pis);
393  0 String xmlGuessEnc = getXMLGuessEncoding(pis);
394  0 String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
395  0 String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);
396  0 prepareReader(pis,encoding);
397    }
398   
 
399  61693 toggle private void prepareReader(InputStream is,String encoding) throws IOException {
400  61693 _reader = new InputStreamReader(is,encoding);
401  61693 _encoding = encoding;
402    }
403   
404    // InputStream is passed for XmlReaderException creation only
 
405  61693 toggle private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {
406  61693 String encoding;
407  61693 if (bomEnc==null) {
408  61693 if (xmlGuessEnc==null || xmlEnc==null) {
409  0 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
410    }
411    else
412  61693 if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {
413  5 encoding = xmlGuessEnc;
414    }
415    else {
416  61688 encoding = xmlEnc;
417    }
418    }
419    else
420  0 if (bomEnc.equals(UTF_8)) {
421  0 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {
422  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
423    bomEnc,xmlGuessEnc,xmlEnc,is);
424    }
425  0 if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {
426  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
427    bomEnc,xmlGuessEnc,xmlEnc,is);
428    }
429  0 encoding = UTF_8;
430    }
431    else
432  0 if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {
433  0 if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {
434  0 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));
435    }
436  0 if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {
437  0 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
438    bomEnc,xmlGuessEnc,xmlEnc,is);
439    }
440  0 encoding =bomEnc;
441    }
442    else {
443  0 throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
444    bomEnc,xmlGuessEnc,xmlEnc,is);
445    }
446  61693 return encoding;
447    }
448   
449    // InputStream is passed for XmlReaderException creation only
 
450  0 toggle private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {
451  0 String encoding;
452  0 if (lenient & xmlEnc!=null) {
453  0 encoding = xmlEnc;
454    }
455    else {
456  0 boolean appXml = isAppXml(cTMime);
457  0 boolean textXml = isTextXml(cTMime);
458  0 if (appXml || textXml) {
459  0 if (cTEnc==null) {
460  0 if (appXml) {
461  0 encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);
462    }
463    else {
464  0 encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;
465    }
466    }
467    else
468  0 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {
469  0 throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
470    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
471    }
472    else
473  0 if (cTEnc.equals(UTF_16)) {
474  0 if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {
475  0 encoding = bomEnc;
476    }
477    else {
478  0 throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
479    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
480    }
481    }
482    else {
483  0 encoding = cTEnc;
484    }
485    }
486    else {
487  0 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
488    cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
489    }
490    }
491  0 return encoding;
492    }
493   
494    // returns MIME type or NULL if httpContentType is NULL
 
495  0 toggle private static String getContentTypeMime(String httpContentType) {
496  0 String mime = null;
497  0 if (httpContentType!=null) {
498  0 int i = httpContentType.indexOf(";");
499  0 mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();
500    }
501  0 return mime;
502    }
503   
504    private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
505   
506    // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL
 
507  0 toggle private static String getContentTypeEncoding(String httpContentType) {
508  0 String encoding = null;
509  0 if (httpContentType!=null) {
510  0 int i = httpContentType.indexOf(";");
511  0 if (i>-1) {
512  0 String postMime = httpContentType.substring(i+1);
513  0 Matcher m = CHARSET_PATTERN.matcher(postMime);
514  0 encoding = (m.find()) ? m.group(1) : null;
515  0 encoding = (encoding!=null) ? encoding.toUpperCase() : null;
516    }
517  0 if (encoding != null &&
518    ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||
519    (encoding.startsWith("'") && encoding.endsWith("'"))
520    )) {
521  0 encoding = encoding.substring(1, encoding.length() - 1);
522    }
523    }
524  0 return encoding;
525    }
526   
527    // returns the BOM in the stream, NULL if not present,
528    // if there was BOM the in the stream it is consumed
 
529  61693 toggle private static String getBOMEncoding(BufferedInputStream is) throws IOException {
530  61693 String encoding = null;
531  61693 int[] bytes = new int[3];
532  61693 is.mark(3);
533  61693 bytes[0] = is.read();
534  61693 bytes[1] = is.read();
535  61693 bytes[2] = is.read();
536   
537  61693 if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
538  0 encoding = UTF_16BE;
539  0 is.reset();
540  0 is.read();
541  0 is.read();
542    }
543    else
544  61693 if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
545  0 encoding = UTF_16LE;
546  0 is.reset();
547  0 is.read();
548  0 is.read();
549    }
550    else
551  61693 if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
552  0 encoding = UTF_8;
553    }
554    else {
555  61693 is.reset();
556    }
557  61693 return encoding;
558    }
559   
560    // returns the best guess for the encoding by looking the first bytes of the stream, '<?'
 
561  61693 toggle private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {
562  61693 String encoding = null;
563  61693 int[] bytes = new int[4];
564  61693 is.mark(4);
565  61693 bytes[0] = is.read();
566  61693 bytes[1] = is.read();
567  61693 bytes[2] = is.read();
568  61693 bytes[3] = is.read();
569  61693 is.reset();
570   
571  61693 if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {
572  5 encoding = UTF_16BE;
573    }
574    else
575  61688 if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {
576  0 encoding = UTF_16LE;
577    }
578    else
579  61688 if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {
580  61688 encoding = UTF_8;
581    }
582  61693 return encoding;
583    }
584   
585   
586    private static final Pattern ENCODING_PATTERN =
587    Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
588   
589    // returns the encoding declared in the <?xml encoding=...?>, NULL if none
 
590  61693 toggle private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {
591  61693 String encoding = null;
592  61693 if (guessedEnc!=null) {
593  61693 byte[] bytes = new byte[BUFFER_SIZE];
594  61693 is.mark(BUFFER_SIZE);
595  61693 int offset = 0;
596  61693 int max = BUFFER_SIZE;
597  61693 int c = is.read(bytes,offset,max);
598  61693 int firstGT = -1;
599  123386 while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {
600  61693 offset += c;
601  61693 max -= c;
602  61693 c = is.read(bytes,offset,max);
603  61693 firstGT = new String(bytes, 0, offset).indexOf(">");
604    }
605  61693 if (firstGT == -1) {
606  0 if (c == -1) {
607  0 throw new IOException("Unexpected end of XML stream");
608    }
609    else {
610  0 throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");
611    }
612    }
613  61693 int bytesRead = offset;
614  61693 if (bytesRead>0) {
615  61693 is.reset();
616  61693 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);
617  61693 BufferedReader bReader = new BufferedReader(reader);
618  61693 StringBuffer prolog = new StringBuffer();
619  61693 String line = bReader.readLine();
620  123386 while (line != null) {
621  61693 prolog.append(line);
622  61693 line = bReader.readLine();
623    }
624  61693 Matcher m = ENCODING_PATTERN.matcher(prolog);
625  61693 if (m.find()) {
626  61693 encoding = m.group(1).toUpperCase();
627  61693 encoding = encoding.substring(1,encoding.length()-1);
628    }
629    }
630    }
631  61693 return encoding;
632    }
633   
634    // indicates if the MIME type belongs to the APPLICATION XML family
 
635  0 toggle private static boolean isAppXml(String mime) {
636  0 return mime!=null &&
637    (mime.equals("application/xml") ||
638    mime.equals("application/xml-dtd") ||
639    mime.equals("application/xml-external-parsed-entity") ||
640    (mime.startsWith("application/") && mime.endsWith("+xml")));
641    }
642   
643    // indicates if the MIME type belongs to the TEXT XML family
 
644  0 toggle private static boolean isTextXml(String mime) {
645  0 return mime!=null &&
646    (mime.equals("text/xml") ||
647    mime.equals("text/xml-external-parsed-entity") ||
648    (mime.startsWith("text/") && mime.endsWith("+xml")));
649    }
650   
651    private static final MessageFormat RAW_EX_1 = new MessageFormat(
652    "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");
653   
654    private static final MessageFormat RAW_EX_2 = new MessageFormat(
655    "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");
656   
657    private static final MessageFormat HTTP_EX_1 = new MessageFormat(
658    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");
659   
660    private static final MessageFormat HTTP_EX_2 = new MessageFormat(
661    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");
662   
663    private static final MessageFormat HTTP_EX_3 = new MessageFormat(
664    "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");
665   
666    }