139 lines
4.4 KiB
Java
139 lines
4.4 KiB
Java
/*
|
|
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation. Oracle designates this
|
|
* particular file as subject to the "Classpath" exception as provided
|
|
* by Oracle in the LICENSE file that accompanied this code.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
package jdk.internal.util.xml.impl;
|
|
|
|
import java.io.Reader;
|
|
import java.io.InputStream;
|
|
import java.io.IOException;
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
/**
|
|
* UTF-8 transformed UCS-2 character stream reader.
|
|
*
|
|
* This reader converts UTF-8 transformed UCS-2 characters to Java characters.
|
|
* The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2
|
|
* "UTF-8 definition":
|
|
* 0000 0000-0000 007F 0xxxxxxx
|
|
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
*
|
|
* This reader will return incorrect last character on broken UTF-8 stream.
|
|
*/
|
|
public class ReaderUTF8 extends Reader {
|
|
|
|
private InputStream is;
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* @param is A byte input stream.
|
|
*/
|
|
public ReaderUTF8(InputStream is) {
|
|
this.is = is;
|
|
}
|
|
|
|
/**
|
|
* Reads characters into a portion of an array.
|
|
*
|
|
* @param cbuf Destination buffer.
|
|
* @param off Offset at which to start storing characters.
|
|
* @param len Maximum number of characters to read.
|
|
* @exception IOException If any IO errors occur.
|
|
* @exception UnsupportedEncodingException If UCS-4 character occur in the stream.
|
|
*/
|
|
public int read(char[] cbuf, int off, int len) throws IOException {
|
|
int num = 0;
|
|
int val;
|
|
while (num < len) {
|
|
if ((val = is.read()) < 0) {
|
|
return (num != 0) ? num : -1;
|
|
}
|
|
switch (val & 0xf0) {
|
|
case 0xc0:
|
|
case 0xd0:
|
|
cbuf[off++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
|
|
break;
|
|
|
|
case 0xe0:
|
|
cbuf[off++] = (char) (((val & 0x0f) << 12)
|
|
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
|
|
break;
|
|
|
|
case 0xf0: // UCS-4 character
|
|
throw new UnsupportedEncodingException("UTF-32 (or UCS-4) encoding not supported.");
|
|
|
|
default:
|
|
cbuf[off++] = (char) val;
|
|
break;
|
|
}
|
|
num++;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
/**
|
|
* Reads a single character.
|
|
*
|
|
* @return The character read, as an integer in the range 0 to 65535
|
|
* (0x00-0xffff), or -1 if the end of the stream has been reached.
|
|
* @exception IOException If any IO errors occur.
|
|
* @exception UnsupportedEncodingException If UCS-4 character occur in the stream.
|
|
*/
|
|
public int read() throws IOException {
|
|
int val;
|
|
if ((val = is.read()) < 0) {
|
|
return -1;
|
|
}
|
|
switch (val & 0xf0) {
|
|
case 0xc0:
|
|
case 0xd0:
|
|
val = ((val & 0x1f) << 6) | (is.read() & 0x3f);
|
|
break;
|
|
|
|
case 0xe0:
|
|
val = ((val & 0x0f) << 12)
|
|
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f);
|
|
break;
|
|
|
|
case 0xf0: // UCS-4 character
|
|
throw new UnsupportedEncodingException();
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
/**
|
|
* Closes the stream.
|
|
*
|
|
* @exception IOException If any IO errors occur.
|
|
*/
|
|
public void close() throws IOException {
|
|
is.close();
|
|
}
|
|
}
|