feat(jdk8): move files to new folder to avoid resources compiled.

This commit is contained in:
2025-09-07 15:25:52 +08:00
parent 3f0047bf6f
commit 8c35cfb1c0
17415 changed files with 217 additions and 213 deletions

View File

@@ -0,0 +1,344 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import java.util.EventListener;
/**
* All DTD parsing events are signaled through this interface.
*/
public interface DTDEventListener extends EventListener {
public void setDocumentLocator(Locator loc);
/**
* Receive notification of a Processing Instruction.
* Processing instructions contain information meaningful
* to the application.
*
* @param target The target of the proceessing instruction
* which should have meaning to the application.
* @param data The instruction itself which should contain
* valid XML characters.
* @throws SAXException
*/
public void processingInstruction(String target, String data)
throws SAXException;
/**
* Receive notification of a Notation Declaration.
* Notation declarations are used by elements and entities
* for identifying embedded non-XML data.
*
* @param name The notation name, referred to by entities and
* elements.
* @param publicId The public identifier
* @param systemId The system identifier
*/
public void notationDecl(String name, String publicId, String systemId)
throws SAXException;
/**
* Receive notification of an unparsed entity declaration.
* Unparsed entities are non-XML data.
*
* @param name The name of the unparsed entity.
* @param publicId The public identifier
* @param systemId The system identifier
* @param notationName The associated notation
*/
public void unparsedEntityDecl(String name, String publicId,
String systemId, String notationName)
throws SAXException;
/**
* Receive notification of a internal general entity declaration event.
*
* @param name The internal general entity name.
* @param value The value of the entity, which may include unexpanded
* entity references. Character references will have been
* expanded.
* @throws SAXException
* @see #externalGeneralEntityDecl(String, String, String)
*/
public void internalGeneralEntityDecl(String name, String value)
throws SAXException;
/**
* Receive notification of an external parsed general entity
* declaration event.
* <p/>
* <p>If a system identifier is present, and it is a relative URL, the
* parser will have resolved it fully before passing it through this
* method to a listener.</p>
*
* @param name The entity name.
* @param publicId The entity's public identifier, or null if
* none was given.
* @param systemId The entity's system identifier.
* @throws SAXException
* @see #unparsedEntityDecl(String, String, String, String)
*/
public void externalGeneralEntityDecl(String name, String publicId,
String systemId)
throws SAXException;
/**
* Receive notification of a internal parameter entity declaration
* event.
*
* @param name The internal parameter entity name.
* @param value The value of the entity, which may include unexpanded
* entity references. Character references will have been
* expanded.
* @throws SAXException
* @see #externalParameterEntityDecl(String, String, String)
*/
public void internalParameterEntityDecl(String name, String value)
throws SAXException;
/**
* Receive notification of an external parameter entity declaration
* event.
* <p/>
* <p>If a system identifier is present, and it is a relative URL, the
* parser will have resolved it fully before passing it through this
* method to a listener.</p>
*
* @param name The parameter entity name.
* @param publicId The entity's public identifier, or null if
* none was given.
* @param systemId The entity's system identifier.
* @throws SAXException
* @see #unparsedEntityDecl(String, String, String, String)
*/
public void externalParameterEntityDecl(String name, String publicId,
String systemId)
throws SAXException;
/**
* Receive notification of the beginning of the DTD.
*
* @param in Current input entity.
* @see #endDTD()
*/
public void startDTD(InputEntity in)
throws SAXException;
/**
* Receive notification of the end of a DTD. The parser will invoke
* this method only once.
*
* @throws SAXException
* @see #startDTD(InputEntity)
*/
public void endDTD()
throws SAXException;
/**
* Receive notification that a comment has been read.
* <p/>
* <P> Note that processing instructions are the mechanism designed
* to hold information for consumption by applications, not comments.
* XML systems may rely on applications being able to access information
* found in processing instructions; this is not true of comments, which
* are typically discarded.
*
* @param text the text within the comment delimiters.
* @throws SAXException
*/
public void comment(String text)
throws SAXException;
/**
* Receive notification of character data.
* <p/>
* <p>The Parser will call this method to report each chunk of
* character data. SAX parsers may return all contiguous character
* data in a single chunk, or they may split it into several
* chunks; however, all of the characters in any single event
* must come from the same external entity, so that the Locator
* provides useful information.</p>
* <p/>
* <p>The application must not attempt to read from the array
* outside of the specified range.</p>
* <p/>
* <p>Note that some parsers will report whitespace using the
* ignorableWhitespace() method rather than this one (validating
* parsers must do so).</p>
*
* @param ch The characters from the DTD.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
* @throws SAXException
* @see #ignorableWhitespace(char[], int, int)
*/
public void characters(char ch[], int start, int length)
throws SAXException;
/**
* Receive notification of ignorable whitespace in element content.
* <p/>
* <p>Validating Parsers must use this method to report each chunk
* of ignorable whitespace (see the W3C XML 1.0 recommendation,
* section 2.10): non-validating parsers may also use this method
* if they are capable of parsing and using content models.</p>
* <p/>
* <p>SAX parsers may return all contiguous whitespace in a single
* chunk, or they may split it into several chunks; however, all of
* the characters in any single event must come from the same
* external entity, so that the Locator provides useful
* information.</p>
* <p/>
* <p>The application must not attempt to read from the array
* outside of the specified range.</p>
*
* @param ch The characters from the DTD.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
* @throws SAXException
* @see #characters(char[], int, int)
*/
public void ignorableWhitespace(char ch[], int start, int length)
throws SAXException;
/**
* Receive notification that a CDATA section is beginning. Data in a
* CDATA section is is reported through the appropriate event, either
* <em>characters()</em> or <em>ignorableWhitespace</em>.
*
* @throws SAXException
* @see #endCDATA()
*/
public void startCDATA() throws SAXException;
/**
* Receive notification that the CDATA section finished.
*
* @throws SAXException
* @see #startCDATA()
*/
public void endCDATA() throws SAXException;
public void fatalError(SAXParseException e)
throws SAXException;
public void error(SAXParseException e) throws SAXException;
public void warning(SAXParseException err) throws SAXException;
public final short CONTENT_MODEL_EMPTY = 0;
public final short CONTENT_MODEL_ANY = 1;
public final short CONTENT_MODEL_MIXED = 2;
public final short CONTENT_MODEL_CHILDREN = 3;
/**
* receives notification that parsing of content model is beginning.
*
* @param elementName name of the element whose content model is going to be defined.
* @param contentModelType {@link #CONTENT_MODEL_EMPTY}
* this element has EMPTY content model. This notification
* will be immediately followed by the corresponding endContentModel.
* {@link #CONTENT_MODEL_ANY}
* this element has ANY content model. This notification
* will be immediately followed by the corresponding endContentModel.
* {@link #CONTENT_MODEL_MIXED}
* this element has mixed content model. #PCDATA will not be reported.
* each child element will be reported by mixedElement method.
* {@link #CONTENT_MODEL_CHILDREN}
* this elemen has child content model. The actual content model will
* be reported by childElement, startModelGroup, endModelGroup, and
* connector methods. Possible call sequences are:
* <p/>
* START := MODEL_GROUP
* MODEL_GROUP := startModelGroup TOKEN (connector TOKEN)* endModelGroup
* TOKEN := childElement
* | MODEL_GROUP
*/
public void startContentModel(String elementName, short contentModelType) throws SAXException;
/**
* receives notification that parsing of content model is finished.
*/
public void endContentModel(String elementName, short contentModelType) throws SAXException;
public final short USE_NORMAL = 0;
public final short USE_IMPLIED = 1;
public final short USE_FIXED = 2;
public final short USE_REQUIRED = 3;
/**
* For each entry in an ATTLIST declaration,
* this event will be fired.
* <p/>
* <p/>
* DTD allows the same attributes to be declared more than
* once, and in that case the first one wins. I think
* this method will be only fired for the first one,
* but I need to check.
*/
public void attributeDecl(String elementName, String attributeName, String attributeType,
String[] enumeration, short attributeUse, String defaultValue) throws SAXException;
public void childElement(String elementName, short occurence) throws SAXException;
/**
* receives notification of child element of mixed content model.
* this method is called for each child element.
*
* @see #startContentModel(String, short)
*/
public void mixedElement(String elementName) throws SAXException;
public void startModelGroup() throws SAXException;
public void endModelGroup(short occurence) throws SAXException;
public final short CHOICE = 0;
public final short SEQUENCE = 1;
/**
* Connectors in one model group is guaranteed to be the same.
* <p/>
* <p/>
* IOW, you'll never see an event sequence like (a|b,c)
*
* @return {@link #CHOICE} or {@link #SEQUENCE}.
*/
public void connector(short connectorType) throws SAXException;
public final short OCCURENCE_ZERO_OR_MORE = 0;
public final short OCCURENCE_ONE_OR_MORE = 1;
public final short OCCURENCE_ZERO_OR_ONE = 2;
public final short OCCURENCE_ONCE = 3;
}

View File

@@ -0,0 +1,120 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* do-nothing implementation of DTDEventHandler.
*/
public class DTDHandlerBase implements DTDEventListener {
public void processingInstruction(String target, String data)
throws SAXException {
}
public void setDocumentLocator(Locator loc) {
}
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
public void error(SAXParseException e) throws SAXException {
throw e;
}
public void warning(SAXParseException err) throws SAXException {
}
public void notationDecl(String name, String publicId, String systemId) throws SAXException {
}
public void unparsedEntityDecl(String name, String publicId,
String systemId, String notationName) throws SAXException {
}
public void endDTD() throws SAXException {
}
public void externalGeneralEntityDecl(String n, String p, String s) throws SAXException {
}
public void internalGeneralEntityDecl(String n, String v) throws SAXException {
}
public void externalParameterEntityDecl(String n, String p, String s) throws SAXException {
}
public void internalParameterEntityDecl(String n, String v) throws SAXException {
}
public void startDTD(InputEntity in) throws SAXException {
}
public void comment(String n) throws SAXException {
}
public void characters(char ch[], int start, int length) throws SAXException {
}
public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void endCDATA() throws SAXException {
}
public void startContentModel(String elementName, short contentModelType) throws SAXException {
}
public void endContentModel(String elementName, short contentModelType) throws SAXException {
}
public void attributeDecl(String elementName, String attributeName, String attributeType,
String[] enumeration, short attributeUse, String defaultValue) throws SAXException {
}
public void childElement(String elementName, short occurence) throws SAXException {
}
public void mixedElement(String elementName) throws SAXException {
}
public void startModelGroup() throws SAXException {
}
public void endModelGroup(short occurence) throws SAXException {
}
public void connector(short connectorType) throws SAXException {
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import java.io.IOException;
class EndOfInputException extends IOException {
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
/**
* Base class for entity declarations as used by the parser.
*
* @author David Brownell
* @author Janet Koenig
* @version 1.3 00/02/24
*/
class EntityDecl {
String name; // <!ENTITY name ... >
boolean isFromInternalSubset;
boolean isPE;
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.net.URL;
final class ExternalEntity extends EntityDecl {
String systemId; // resolved URI (not relative)
String publicId; // "-//xyz//....//en"
String notation;
public ExternalEntity(InputEntity in) {
}
public InputSource getInputSource(EntityResolver r)
throws IOException, SAXException {
InputSource retval;
retval = r.resolveEntity(publicId, systemId);
// SAX sez if null is returned, use the URI directly
if (retval == null)
retval = Resolver.createInputSource(new URL(systemId), false);
return retval;
}
}

View File

@@ -0,0 +1,990 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.Locale;
/**
* This is how the parser talks to its input entities, of all kinds.
* The entities are in a stack.
* <p/>
* <P> For internal entities, the character arrays are referenced here,
* and read from as needed (they're read-only). External entities have
* mutable buffers, that are read into as needed.
* <p/>
* <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
* whether it's in an external (parsed) entity or not. The XML 1.0 spec
* is inconsistent in explaining EOL handling; this is the sensible way.
*
* @author David Brownell
* @author Janet Koenig
* @version 1.4 00/08/05
*/
public class InputEntity {
private int start, finish;
private char buf [];
private int lineNumber = 1;
private boolean returnedFirstHalf = false;
private boolean maybeInCRLF = false;
// name of entity (never main document or unnamed DTD PE)
private String name;
private InputEntity next;
// for system and public IDs in diagnostics
private InputSource input;
// this is a buffer; some buffers can be replenished.
private Reader reader;
private boolean isClosed;
private DTDEventListener errHandler;
private Locale locale;
private StringBuffer rememberedText;
private int startRemember;
// record if this is a PE, so endParsedEntity won't be called
private boolean isPE;
// InputStreamReader throws an internal per-read exception, so
// we minimize reads. We also add a byte to compensate for the
// "ungetc" byte we keep, so that our downstream reads are as
// nicely sized as we can make them.
final private static int BUFSIZ = 8 * 1024 + 1;
final private static char newline [] = {'\n'};
public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
InputEntity retval = new InputEntity();
retval.errHandler = h;
retval.locale = l;
return retval;
}
private InputEntity() {
}
//
// predicate: return true iff this is an internal entity reader,
// and so may safely be "popped" as needed. external entities have
// syntax to uphold; internal parameter entities have at most validity
// constraints to monitor. also, only external entities get decent
// location diagnostics.
//
public boolean isInternal() {
return reader == null;
}
//
// predicate: return true iff this is the toplevel document
//
public boolean isDocument() {
return next == null;
}
//
// predicate: return true iff this is a PE expansion (so that
// LexicalEventListner.endParsedEntity won't be called)
//
public boolean isParameterEntity() {
return isPE;
}
//
// return name of current entity
//
public String getName() {
return name;
}
//
// use this for an external parsed entity
//
public void init(InputSource in, String name, InputEntity stack,
boolean isPE)
throws IOException, SAXException {
input = in;
this.isPE = isPE;
reader = in.getCharacterStream();
if (reader == null) {
InputStream bytes = in.getByteStream();
if (bytes == null)
reader = XmlReader.createReader(new URL(in.getSystemId())
.openStream());
else if (in.getEncoding() != null)
reader = XmlReader.createReader(in.getByteStream(),
in.getEncoding());
else
reader = XmlReader.createReader(in.getByteStream());
}
next = stack;
buf = new char[BUFSIZ];
this.name = name;
checkRecursion(stack);
}
//
// use this for an internal parsed entity; buffer is readonly
//
public void init(char b [], String name, InputEntity stack, boolean isPE)
throws SAXException {
next = stack;
buf = b;
finish = b.length;
this.name = name;
this.isPE = isPE;
checkRecursion(stack);
}
private void checkRecursion(InputEntity stack)
throws SAXException {
if (stack == null)
return;
for (stack = stack.next; stack != null; stack = stack.next) {
if (stack.name != null && stack.name.equals(name))
fatal("P-069", new Object[]{name});
}
}
public InputEntity pop() throws IOException {
// caller has ensured there's nothing left to read
close();
return next;
}
/**
* returns true iff there's no more data to consume ...
*/
public boolean isEOF() throws IOException, SAXException {
// called to ensure WF-ness of included entities and to pop
// input entities appropriately ... EOF is not always legal.
if (start >= finish) {
fillbuf();
return start >= finish;
} else
return false;
}
/**
* Returns the name of the encoding in use, else null; the name
* returned is in as standard a form as we can get.
*/
public String getEncoding() {
if (reader == null)
return null;
if (reader instanceof XmlReader)
return ((XmlReader) reader).getEncoding();
// XXX prefer a java2std() call to normalize names...
if (reader instanceof InputStreamReader)
return ((InputStreamReader) reader).getEncoding();
return null;
}
/**
* returns the next name char, or NUL ... faster than getc(),
* and the common "name or nmtoken must be next" case won't
* need ungetc().
*/
public char getNameChar() throws IOException, SAXException {
if (finish <= start)
fillbuf();
if (finish > start) {
char c = buf[start++];
if (XmlChars.isNameChar(c))
return c;
start--;
}
return 0;
}
/**
* gets the next Java character -- might be part of an XML
* text character represented by a surrogate pair, or be
* the end of the entity.
*/
public char getc() throws IOException, SAXException {
if (finish <= start)
fillbuf();
if (finish > start) {
char c = buf[start++];
// [2] Char ::= #x0009 | #x000A | #x000D
// | [#x0020-#xD7FF]
// | [#xE000-#xFFFD]
// plus surrogate _pairs_ representing [#x10000-#x10ffff]
if (returnedFirstHalf) {
if (c >= 0xdc00 && c <= 0xdfff) {
returnedFirstHalf = false;
return c;
} else
fatal("P-070", new Object[]{Integer.toHexString(c)});
}
if ((c >= 0x0020 && c <= 0xD7FF)
|| c == 0x0009
// no surrogates!
|| (c >= 0xE000 && c <= 0xFFFD))
return c;
//
// CRLF and CR are both line ends; map both to LF, and
// keep line count correct.
//
else if (c == '\r' && !isInternal()) {
maybeInCRLF = true;
c = getc();
if (c != '\n')
ungetc();
maybeInCRLF = false;
lineNumber++;
return '\n';
} else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
if (!isInternal() && !maybeInCRLF)
lineNumber++;
return c;
}
// surrogates...
if (c >= 0xd800 && c < 0xdc00) {
returnedFirstHalf = true;
return c;
}
fatal("P-071", new Object[]{Integer.toHexString(c)});
}
throw new EndOfInputException();
}
/**
* lookahead one character
*/
public boolean peekc(char c) throws IOException, SAXException {
if (finish <= start)
fillbuf();
if (finish > start) {
if (buf[start] == c) {
start++;
return true;
} else
return false;
}
return false;
}
/**
* two character pushback is guaranteed
*/
public void ungetc() {
if (start == 0)
throw new InternalError("ungetc");
start--;
if (buf[start] == '\n' || buf[start] == '\r') {
if (!isInternal())
lineNumber--;
} else if (returnedFirstHalf)
returnedFirstHalf = false;
}
/**
* optional grammatical whitespace (discarded)
*/
public boolean maybeWhitespace()
throws IOException, SAXException {
char c;
boolean isSpace = false;
boolean sawCR = false;
// [3] S ::= #20 | #09 | #0D | #0A
for (; ;) {
if (finish <= start)
fillbuf();
if (finish <= start)
return isSpace;
c = buf[start++];
if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
isSpace = true;
//
// CR, LF are line endings ... CLRF is one, not two!
//
if ((c == '\n' || c == '\r') && !isInternal()) {
if (!(c == '\n' && sawCR)) {
lineNumber++;
sawCR = false;
}
if (c == '\r')
sawCR = true;
}
} else {
start--;
return isSpace;
}
}
}
/**
* normal content; whitespace in markup may be handled
* specially if the parser uses the content model.
* <p/>
* <P> content terminates with markup delimiter characters,
* namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
* <p/>
* <P> the document handler's characters() method is called
* on all the content found
*/
public boolean parsedContent(DTDEventListener docHandler
/*ElementValidator validator*/)
throws IOException, SAXException {
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
int first; // first char to return
int last; // last char to return
boolean sawContent; // sent any chars?
char c;
// deliver right out of the buffer, until delimiter, EOF,
// or error, refilling as we go
for (first = last = start, sawContent = false; ; last++) {
// buffer empty?
if (last >= finish) {
if (last > first) {
// validator.text ();
docHandler.characters(buf, first, last - first);
sawContent = true;
start = last;
}
if (isEOF()) // calls fillbuf
return sawContent;
first = start;
last = first - 1; // incremented in loop
continue;
}
c = buf[last];
//
// pass most chars through ASAP; this inlines the code of
// [2] !XmlChars.isChar(c) leaving only characters needing
// special treatment ... line ends, surrogates, and:
// 0x0026 == '&'
// 0x003C == '<'
// 0x005D == ']'
// Comparisons ordered for speed on 'typical' text
//
if ((c > 0x005D && c <= 0xD7FF) // a-z and more
|| (c < 0x0026 && c >= 0x0020) // space & punct
|| (c > 0x003C && c < 0x005D) // A-Z & punct
|| (c > 0x0026 && c < 0x003C) // 0-9 & punct
|| c == 0x0009
|| (c >= 0xE000 && c <= 0xFFFD)
)
continue;
// terminate on markup delimiters
if (c == '<' || c == '&')
break;
// count lines
if (c == '\n') {
if (!isInternal())
lineNumber++;
continue;
}
// External entities get CR, CRLF --> LF mapping
// Internal ones got it already, and we can't repeat
// else we break char ref handling!!
if (c == '\r') {
if (isInternal())
continue;
docHandler.characters(buf, first, last - first);
docHandler.characters(newline, 0, 1);
sawContent = true;
lineNumber++;
if (finish > (last + 1)) {
if (buf[last + 1] == '\n')
last++;
} else { // CR at end of buffer
// XXX case not yet handled: CRLF here will look like two lines
}
first = start = last + 1;
continue;
}
// ']]>' is a WF error -- must fail if we see it
if (c == ']') {
switch (finish - last) {
// for suspicious end-of-buffer cases, get more data
// into the buffer to rule out this sequence.
case 2:
if (buf[last + 1] != ']')
continue;
// FALLTHROUGH
case 1:
if (reader == null || isClosed)
continue;
if (last == first)
throw new InternalError("fillbuf");
last--;
if (last > first) {
// validator.text ();
docHandler.characters(buf, first, last - first);
sawContent = true;
start = last;
}
fillbuf();
first = last = start;
continue;
// otherwise any "]]>" would be buffered, and we can
// see right away if that's what we have
default:
if (buf[last + 1] == ']' && buf[last + 2] == '>')
fatal("P-072", null);
continue;
}
}
// correctly paired surrogates are OK
if (c >= 0xd800 && c <= 0xdfff) {
if ((last + 1) >= finish) {
if (last > first) {
// validator.text ();
docHandler.characters(buf, first, last - first);
sawContent = true;
start = last + 1;
}
if (isEOF()) { // calls fillbuf
fatal("P-081",
new Object[]{Integer.toHexString(c)});
}
first = start;
last = first;
continue;
}
if (checkSurrogatePair(last))
last++;
else {
last--;
// also terminate on surrogate pair oddities
break;
}
continue;
}
fatal("P-071", new Object[]{Integer.toHexString(c)});
}
if (last == first)
return sawContent;
// validator.text ();
docHandler.characters(buf, first, last - first);
start = last;
return true;
}
/**
* CDATA -- character data, terminated by "]]>" and optionally
* including unescaped markup delimiters (ampersand and left angle
* bracket). This should otherwise be exactly like character data,
* modulo differences in error report details.
* <p/>
* <P> The document handler's characters() or ignorableWhitespace()
* methods are invoked on all the character data found
*
* @param docHandler gets callbacks for character data
* @param ignorableWhitespace if true, whitespace characters will
* be reported using docHandler.ignorableWhitespace(); implicitly,
* non-whitespace characters will cause validation errors
* @param whitespaceInvalidMessage if true, ignorable whitespace
* causes a validity error report as well as a callback
*/
public boolean unparsedContent(DTDEventListener docHandler,
/*ElementValidator validator,*/
boolean ignorableWhitespace,
String whitespaceInvalidMessage)
throws IOException, SAXException {
// [18] CDSect ::= CDStart CData CDEnd
// [19] CDStart ::= '<![CDATA['
// [20] CData ::= (Char* - (Char* ']]>' Char*))
// [21] CDEnd ::= ']]>'
// caller peeked the leading '<' ...
if (!peek("![CDATA[", null))
return false;
docHandler.startCDATA();
// only a literal ']]>' stops this ...
int last;
for (; ;) { // until ']]>' seen
boolean done = false;
char c;
// don't report ignorable whitespace as "text" for
// validation purposes.
boolean white = ignorableWhitespace;
for (last = start; last < finish; last++) {
c = buf[last];
//
// Reject illegal characters.
//
if (!XmlChars.isChar(c)) {
white = false;
if (c >= 0xd800 && c <= 0xdfff) {
if (checkSurrogatePair(last)) {
last++;
continue;
} else {
last--;
break;
}
}
fatal("P-071", new Object[]
{Integer.toHexString(buf[last])});
}
if (c == '\n') {
if (!isInternal())
lineNumber++;
continue;
}
if (c == '\r') {
// As above, we can't repeat CR/CRLF --> LF mapping
if (isInternal())
continue;
if (white) {
if (whitespaceInvalidMessage != null)
errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
whitespaceInvalidMessage), null));
docHandler.ignorableWhitespace(buf, start,
last - start);
docHandler.ignorableWhitespace(newline, 0, 1);
} else {
// validator.text ();
docHandler.characters(buf, start, last - start);
docHandler.characters(newline, 0, 1);
}
lineNumber++;
if (finish > (last + 1)) {
if (buf[last + 1] == '\n')
last++;
} else { // CR at end of buffer
// XXX case not yet handled ... as above
}
start = last + 1;
continue;
}
if (c != ']') {
if (c != ' ' && c != '\t')
white = false;
continue;
}
if ((last + 2) < finish) {
if (buf[last + 1] == ']' && buf[last + 2] == '>') {
done = true;
break;
}
white = false;
continue;
} else {
//last--;
break;
}
}
if (white) {
if (whitespaceInvalidMessage != null)
errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
whitespaceInvalidMessage), null));
docHandler.ignorableWhitespace(buf, start, last - start);
} else {
// validator.text ();
docHandler.characters(buf, start, last - start);
}
if (done) {
start = last + 3;
break;
}
start = last;
if (isEOF())
fatal("P-073", null);
}
docHandler.endCDATA();
return true;
}
// return false to backstep at end of buffer)
private boolean checkSurrogatePair(int offset)
throws SAXException {
if ((offset + 1) >= finish)
return false;
char c1 = buf[offset++];
char c2 = buf[offset];
if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
return true;
fatal("P-074", new Object[]{
Integer.toHexString(c1 & 0x0ffff),
Integer.toHexString(c2 & 0x0ffff)
});
return false;
}
/**
* whitespace in markup (flagged to app, discardable)
* <p/>
* <P> the document handler's ignorableWhitespace() method
* is called on all the whitespace found
*/
public boolean ignorableWhitespace(DTDEventListener handler)
throws IOException, SAXException {
char c;
boolean isSpace = false;
int first;
// [3] S ::= #20 | #09 | #0D | #0A
for (first = start; ;) {
if (finish <= start) {
if (isSpace)
handler.ignorableWhitespace(buf, first, start - first);
fillbuf();
first = start;
}
if (finish <= start)
return isSpace;
c = buf[start++];
switch (c) {
case '\n':
if (!isInternal())
lineNumber++;
// XXX handles Macintosh line endings wrong
// fallthrough
case 0x09:
case 0x20:
isSpace = true;
continue;
case '\r':
isSpace = true;
if (!isInternal())
lineNumber++;
handler.ignorableWhitespace(buf, first,
(start - 1) - first);
handler.ignorableWhitespace(newline, 0, 1);
if (start < finish && buf[start] == '\n')
++start;
first = start;
continue;
default:
ungetc();
if (isSpace)
handler.ignorableWhitespace(buf, first, start - first);
return isSpace;
}
}
}
/**
* returns false iff 'next' string isn't as provided,
* else skips that text and returns true.
* <p/>
* <P> NOTE: two alternative string representations are
* both passed in, since one is faster.
*/
public boolean peek(String next, char chars [])
throws IOException, SAXException {
int len;
int i;
if (chars != null)
len = chars.length;
else
len = next.length();
// buffer should hold the whole thing ... give it a
// chance for the end-of-buffer case and cope with EOF
// by letting fillbuf compact and fill
if (finish <= start || (finish - start) < len)
fillbuf();
// can't peek past EOF
if (finish <= start)
return false;
// compare the string; consume iff it matches
if (chars != null) {
for (i = 0; i < len && (start + i) < finish; i++) {
if (buf[start + i] != chars[i])
return false;
}
} else {
for (i = 0; i < len && (start + i) < finish; i++) {
if (buf[start + i] != next.charAt(i))
return false;
}
}
// if the first fillbuf didn't get enough data, give
// fillbuf another chance to read
if (i < len) {
if (reader == null || isClosed)
return false;
//
// This diagnostic "knows" that the only way big strings would
// fail to be peeked is where it's a symbol ... e.g. for an
// </EndTag> construct. That knowledge could also be applied
// to get rid of the symbol length constraint, since having
// the wrong symbol is a fatal error anyway ...
//
if (len > buf.length)
fatal("P-077", new Object[]{new Integer(buf.length)});
fillbuf();
return peek(next, chars);
}
start += len;
return true;
}
//
// Support for reporting the internal DTD subset, so <!DOCTYPE...>
// declarations can be recreated. This is collected as a single
// string; such subsets are normally small, and many applications
// don't even care about this.
//
public void startRemembering() {
if (startRemember != 0)
throw new InternalError();
startRemember = start;
}
public String rememberText() {
String retval;
// If the internal subset crossed a buffer boundary, we
// created a temporary buffer.
if (rememberedText != null) {
rememberedText.append(buf, startRemember,
start - startRemember);
retval = rememberedText.toString();
} else
retval = new String(buf, startRemember,
start - startRemember);
startRemember = 0;
rememberedText = null;
return retval;
}
private InputEntity getTopEntity() {
InputEntity current = this;
// don't report locations within internal entities!
while (current != null && current.input == null)
current = current.next;
return current == null ? this : current;
}
/**
* Returns the public ID of this input source, if known
*/
public String getPublicId() {
InputEntity where = getTopEntity();
if (where == this)
return input.getPublicId();
return where.getPublicId();
}
/**
* Returns the system ID of this input source, if known
*/
public String getSystemId() {
InputEntity where = getTopEntity();
if (where == this)
return input.getSystemId();
return where.getSystemId();
}
/**
* Returns the current line number in this input source
*/
public int getLineNumber() {
InputEntity where = getTopEntity();
if (where == this)
return lineNumber;
return where.getLineNumber();
}
/**
* returns -1; maintaining column numbers hurts performance
*/
public int getColumnNumber() {
return -1; // not maintained (speed)
}
//
// n.b. for non-EOF end-of-buffer cases, reader should return
// at least a handful of bytes so various lookaheads behave.
//
// two character pushback exists except at first; characters
// represented by surrogate pairs can't be pushed back (they'd
// only be in character data anyway).
//
// DTD exception thrown on char conversion problems; line number
// will be low, as a rule.
//
private void fillbuf() throws IOException, SAXException {
// don't touched fixed buffers, that'll usually
// change entity values (and isn't needed anyway)
// likewise, ignore closed streams
if (reader == null || isClosed)
return;
// if remembering DTD text, copy!
if (startRemember != 0) {
if (rememberedText == null)
rememberedText = new StringBuffer(buf.length);
rememberedText.append(buf, startRemember,
start - startRemember);
}
boolean extra = (finish > 0) && (start > 0);
int len;
if (extra) // extra pushback
start--;
len = finish - start;
System.arraycopy(buf, start, buf, 0, len);
start = 0;
finish = len;
try {
len = buf.length - len;
len = reader.read(buf, finish, len);
} catch (UnsupportedEncodingException e) {
fatal("P-075", new Object[]{e.getMessage()});
} catch (CharConversionException e) {
fatal("P-076", new Object[]{e.getMessage()});
}
if (len >= 0)
finish += len;
else
close();
if (extra) // extra pushback
start++;
if (startRemember != 0)
// assert extra == true
startRemember = 1;
}
public void close() {
try {
if (reader != null && !isClosed)
reader.close();
isClosed = true;
} catch (IOException e) {
/* NOTHING */
}
}
private void fatal(String messageId, Object params [])
throws SAXException {
SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
// not continuable ... e.g. WF errors
close();
errHandler.fatalError(x);
throw x;
}
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
final class InternalEntity extends EntityDecl {
InternalEntity(String name, char value []) {
this.name = name;
this.buf = value;
}
char buf [];
}

View File

@@ -0,0 +1,516 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import java.io.InputStream;
import java.text.FieldPosition;
import java.text.MessageFormat;
import java.util.Hashtable;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
/**
* This class provides support for multi-language string lookup, as needed
* to localize messages from applications supporting multiple languages
* at the same time. One class of such applications is network services,
* such as HTTP servers, which talk to clients who may not be from the
* same locale as the server. This class supports a form of negotiation
* for the language used in presenting a message from some package, where
* both user (client) preferences and application (server) support are
* accounted for when choosing locales and formatting messages.
* <p/>
* <P> Each package should have a singleton package-private message catalog
* class. This ensures that the correct class loader will always be used to
* access message resources, and minimizes use of memory: <PRE>
* package <em>some.package</em>;
* <p/>
* // "foo" might be public
* class foo {
* ...
* // package private
* static final Catalog messages = new Catalog ();
* static final class Catalog extends MessageCatalog {
* Catalog () { super (Catalog.class); }
* }
* ...
* }
* </PRE>
* <p/>
* <P> Messages for a known client could be generated using code
* something like this: <PRE>
* String clientLanguages [];
* Locale clientLocale;
* String clientMessage;
* <p/>
* // client languages will probably be provided by client,
* // e.g. by an HTTP/1.1 "Accept-Language" header.
* clientLanguages = new String [] { "en-ca", "fr-ca", "ja", "zh" };
* clientLocale = foo.messages.chooseLocale (clientLanguages);
* clientMessage = foo.messages.getMessage (clientLocale,
* "fileCount",
* new Object [] { new Integer (numberOfFiles) }
* );
* </PRE>
* <p/>
* <P> At this time, this class does not include functionality permitting
* messages to be passed around and localized after-the-fact. The consequence
* of this is that the locale for messages must be passed down through layers
* which have no normal reason to support such passdown, or else the system
* default locale must be used instead of the one the client needs.
* <p/>
* <P> <hr> The following guidelines should be used when constructiong
* multi-language applications: <OL>
* <p/>
* <LI> Always use <a href=#chooseLocale>chooseLocale</a> to select the
* locale you pass to your <code>getMessage</code> call. This lets your
* applications use IETF standard locale names, and avoids needless
* use of system defaults.
* <p/>
* <LI> The localized messages for a given package should always go in
* a separate <em>resources</em> sub-package. There are security
* implications; see below.
* <p/>
* <LI> Make sure that a language name is included in each bundle name,
* so that the developer's locale will not be inadvertently used. That
* is, don't create defaults like <em>resources/Messages.properties</em>
* or <em>resources/Messages.class</em>, since ResourceBundle will choose
* such defaults rather than giving software a chance to choose a more
* appropriate language for its messages. Your message bundles should
* have names like <em>Messages_en.properties</em> (for the "en", or
* English, language) or <em>Messages_ja.class</em> ("ja" indicates the
* Japanese language).
* <p/>
* <LI> Only use property files for messages in languages which can
* be limited to the ISO Latin/1 (8859-1) characters supported by the
* property file format. (This is mostly Western European languages.)
* Otherwise, subclass ResourceBundle to provide your messages; it is
* simplest to subclass <code>java.util.ListResourceBundle</code>.
* <p/>
* <LI> Never use another package's message catalog or resource bundles.
* It should not be possible for a change internal to one package (such
* as eliminating or improving messages) to break another package.
* <p/>
* </OL>
* <p/>
* <P> The "resources" sub-package can be treated separately from the
* package with which it is associated. That main package may be sealed
* and possibly signed, preventing other software from adding classes to
* the package which would be able to access methods and data which are
* not designed to be publicly accessible. On the other hand, resources
* such as localized messages are often provided after initial product
* shipment, without a full release cycle for the product. Such files
* (text and class files) need to be added to some package. Since they
* should not be added to the main package, the "resources" subpackage is
* used without risking the security or integrity of that main package
* as distributed in its JAR file.
*
* @author David Brownell
* @version 1.1, 00/08/05
* @see java.util.Locale
* @see java.util.ListResourceBundle
* @see java.text.MessageFormat
*/
// leave this as "abstract" -- each package needs its own subclass,
// else it's not always going to be using the right class loader.
abstract public class MessageCatalog {
private String bundleName;
/**
* Create a message catalog for use by classes in the same package
* as the specified class. This uses <em>Messages</em> resource
* bundles in the <em>resources</em> sub-package of class passed as
* a parameter.
*
* @param packageMember Class whose package has localized messages
*/
protected MessageCatalog(Class packageMember) {
this(packageMember, "Messages");
}
/**
* Create a message catalog for use by classes in the same package
* as the specified class. This uses the specified resource
* bundle name in the <em>resources</em> sub-package of class passed
* as a parameter; for example, <em>resources.Messages</em>.
*
* @param packageMember Class whose package has localized messages
* @param bundle Name of a group of resource bundles
*/
private MessageCatalog(Class packageMember, String bundle) {
int index;
bundleName = packageMember.getName();
index = bundleName.lastIndexOf('.');
if (index == -1) // "ClassName"
bundleName = "";
else // "some.package.ClassName"
bundleName = bundleName.substring(0, index) + ".";
bundleName = bundleName + "resources." + bundle;
}
/**
* Get a message localized to the specified locale, using the message ID
* and package name if no message is available. The locale is normally
* that of the client of a service, chosen with knowledge that both the
* client and this server support that locale. There are two error
* cases: first, when the specified locale is unsupported or null, the
* default locale is used if possible; second, when no bundle supports
* that locale, the message ID and package name are used.
*
* @param locale The locale of the message to use. If this is null,
* the default locale will be used.
* @param messageId The ID of the message to use.
* @return The message, localized as described above.
*/
public String getMessage(Locale locale,
String messageId) {
ResourceBundle bundle;
// cope with unsupported locale...
if (locale == null)
locale = Locale.getDefault();
try {
bundle = ResourceBundle.getBundle(bundleName, locale);
} catch (MissingResourceException e) {
bundle = ResourceBundle.getBundle(bundleName, Locale.ENGLISH);
}
return bundle.getString(messageId);
}
/**
* Format a message localized to the specified locale, using the message
* ID with its package name if none is available. The locale is normally
* the client of a service, chosen with knowledge that both the client
* server support that locale. There are two error cases: first, if the
* specified locale is unsupported or null, the default locale is used if
* possible; second, when no bundle supports that locale, the message ID
* and package name are used.
*
* @param locale The locale of the message to use. If this is null,
* the default locale will be used.
* @param messageId The ID of the message format to use.
* @param parameters Used when formatting the message. Objects in
* this list are turned to strings if they are not Strings, Numbers,
* or Dates (that is, if MessageFormat would treat them as errors).
* @return The message, localized as described above.
* @see java.text.MessageFormat
*/
public String getMessage(Locale locale,
String messageId,
Object parameters []) {
if (parameters == null)
return getMessage(locale, messageId);
// since most messages won't be tested (sigh), be friendly to
// the inevitable developer errors of passing random data types
// to the message formatting code.
for (int i = 0; i < parameters.length; i++) {
if (!(parameters[i] instanceof String)
&& !(parameters[i] instanceof Number)
&& !(parameters[i] instanceof java.util.Date)) {
if (parameters[i] == null)
parameters[i] = "(null)";
else
parameters[i] = parameters[i].toString();
}
}
// similarly, cope with unsupported locale...
if (locale == null)
locale = Locale.getDefault();
// get the appropriately localized MessageFormat object
ResourceBundle bundle;
MessageFormat format;
try {
bundle = ResourceBundle.getBundle(bundleName, locale);
} catch (MissingResourceException e) {
bundle = ResourceBundle.getBundle(bundleName, Locale.ENGLISH);
/*String retval;
retval = packagePrefix (messageId);
for (int i = 0; i < parameters.length; i++) {
retval += ' ';
retval += parameters [i];
}
return retval;*/
}
format = new MessageFormat(bundle.getString(messageId));
format.setLocale(locale);
// return the formatted message
StringBuffer result = new StringBuffer();
result = format.format(parameters, result, new FieldPosition(0));
return result.toString();
}
/**
* Chooses a client locale to use, using the first language specified in
* the list that is supported by this catalog. If none of the specified
* languages is supported, a null value is returned. Such a list of
* languages might be provided in an HTTP/1.1 "Accept-Language" header
* field, or through some other content negotiation mechanism.
* <p/>
* <P> The language specifiers recognized are RFC 1766 style ("fr" for
* all French, "fr-ca" for Canadian French), although only the strict
* ISO subset (two letter language and country specifiers) is currently
* supported. Java-style locale strings ("fr_CA") are also supported.
*
* @param languages Array of language specifiers, ordered with the most
* preferable one at the front. For example, "en-ca" then "fr-ca",
* followed by "zh_CN".
* @return The most preferable supported locale, or null.
* @see java.util.Locale
*/
public Locale chooseLocale(String languages []) {
if ((languages = canonicalize(languages)) != null) {
for (int i = 0; i < languages.length; i++)
if (isLocaleSupported(languages[i]))
return getLocale(languages[i]);
}
return null;
}
//
// Canonicalizes the RFC 1766 style language strings ("en-in") to
// match standard Java usage ("en_IN"), removing strings that don't
// use two character ISO language and country codes. Avoids all
// memory allocations possible, so that if the strings passed in are
// just lowercase ISO codes (a common case) the input is returned.
//
private String[] canonicalize(String languages []) {
boolean didClone = false;
int trimCount = 0;
if (languages == null)
return languages;
for (int i = 0; i < languages.length; i++) {
String lang = languages[i];
int len = lang.length();
// no RFC1766 extensions allowed; "zh" and "zh-tw" (etc) are OK
// as are regular locale names with no variant ("de_CH").
if (!(len == 2 || len == 5)) {
if (!didClone) {
languages = (String[]) languages.clone();
didClone = true;
}
languages[i] = null;
trimCount++;
continue;
}
// language code ... if already lowercase, we change nothing
if (len == 2) {
lang = lang.toLowerCase();
if (lang != languages[i]) {
if (!didClone) {
languages = (String[]) languages.clone();
didClone = true;
}
languages[i] = lang;
}
continue;
}
// language_country ... fixup case, force "_"
char buf [] = new char[5];
buf[0] = Character.toLowerCase(lang.charAt(0));
buf[1] = Character.toLowerCase(lang.charAt(1));
buf[2] = '_';
buf[3] = Character.toUpperCase(lang.charAt(3));
buf[4] = Character.toUpperCase(lang.charAt(4));
if (!didClone) {
languages = (String[]) languages.clone();
didClone = true;
}
languages[i] = new String(buf);
}
// purge any shadows of deleted RFC1766 extended language codes
if (trimCount != 0) {
String temp [] = new String[languages.length - trimCount];
int i;
for (i = 0, trimCount = 0; i < temp.length; i++) {
while (languages[i + trimCount] == null)
trimCount++;
temp[i] = languages[i + trimCount];
}
languages = temp;
}
return languages;
}
//
// Returns a locale object supporting the specified locale, using
// a small cache to speed up some common languages and reduce the
// needless allocation of memory.
//
private Locale getLocale(String localeName) {
String language, country;
int index;
index = localeName.indexOf('_');
if (index == -1) {
//
// Special case the builtin JDK languages
//
if (localeName.equals("de"))
return Locale.GERMAN;
if (localeName.equals("en"))
return Locale.ENGLISH;
if (localeName.equals("fr"))
return Locale.FRENCH;
if (localeName.equals("it"))
return Locale.ITALIAN;
if (localeName.equals("ja"))
return Locale.JAPANESE;
if (localeName.equals("ko"))
return Locale.KOREAN;
if (localeName.equals("zh"))
return Locale.CHINESE;
language = localeName;
country = "";
} else {
if (localeName.equals("zh_CN"))
return Locale.SIMPLIFIED_CHINESE;
if (localeName.equals("zh_TW"))
return Locale.TRADITIONAL_CHINESE;
//
// JDK also has constants for countries: en_GB, en_US, en_CA,
// fr_FR, fr_CA, de_DE, ja_JP, ko_KR. We don't use those.
//
language = localeName.substring(0, index);
country = localeName.substring(index + 1);
}
return new Locale(language, country);
}
//
// cache for isLanguageSupported(), below ... key is a language
// or locale name, value is a Boolean
//
private Hashtable cache = new Hashtable(5);
/**
* Returns true iff the specified locale has explicit language support.
* For example, the traditional Chinese locale "zh_TW" has such support
* if there are message bundles suffixed with either "zh_TW" or "zh".
* <p/>
* <P> This method is used to bypass part of the search path mechanism
* of the <code>ResourceBundle</code> class, specifically the parts which
* force use of default locales and bundles. Such bypassing is required
* in order to enable use of a client's preferred languages. Following
* the above example, if a client prefers "zh_TW" but can also accept
* "ja", this method would be used to detect that there are no "zh_TW"
* resource bundles and hence that "ja" messages should be used. This
* bypasses the ResourceBundle mechanism which will return messages in
* some other locale (picking some hard-to-anticipate default) instead
* of reporting an error and letting the client choose another locale.
*
* @param localeName A standard Java locale name, using two character
* language codes optionally suffixed by country codes.
* @return True iff the language of that locale is supported.
* @see java.util.Locale
*/
public boolean isLocaleSupported(String localeName) {
//
// Use previous results if possible. We expect that the codebase
// is immutable, so we never worry about changing the cache.
//
Boolean value = (Boolean) cache.get(localeName);
if (value != null)
return value.booleanValue();
//
// Try "language_country_variant", then "language_country",
// then finally "language" ... assuming the longest locale name
// is passed. If not, we'll try fewer options.
//
ClassLoader loader = null;
for (; ;) {
String name = bundleName + "_" + localeName;
// look up classes ...
try {
Class.forName(name);
cache.put(localeName, Boolean.TRUE);
return true;
} catch (Exception e) {
}
// ... then property files (only for ISO Latin/1 messages)
InputStream in;
if (loader == null)
loader = getClass().getClassLoader();
name = name.replace('.', '/');
name = name + ".properties";
if (loader == null)
in = ClassLoader.getSystemResourceAsStream(name);
else
in = loader.getResourceAsStream(name);
if (in != null) {
cache.put(localeName, Boolean.TRUE);
return true;
}
int index = localeName.indexOf('_');
if (index > 0)
localeName = localeName.substring(0, index);
else
break;
}
//
// If we got this far, we failed. Remember for later.
//
cache.put(localeName, Boolean.FALSE);
return false;
}
}

View File

@@ -0,0 +1,448 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Hashtable;
/**
* This entity resolver class provides a number of utilities which can help
* managment of external parsed entities in XML. These are commonly used
* to hold markup declarations that are to be used as part of a Document
* Type Declaration (DTD), or to hold text marked up with XML.
* <p/>
* <P> Features include: <UL>
* <p/>
* <LI> Static factory methods are provided for constructing SAX InputSource
* objects from Files, URLs, or MIME objects. This eliminates a class of
* error-prone coding in applications.
* <p/>
* <LI> Character encodings for XML documents are correctly supported: <UL>
* <p/>
* <LI> The encodings defined in the RFCs for MIME content types
* (2046 for general MIME, and 2376 for XML in particular), are
* supported, handling <em>charset=...</em> attributes and accepting
* content types which are known to be safe for use with XML;
* <p/>
* <LI> The character encoding autodetection algorithm identified
* in the XML specification is used, and leverages all of
* the JDK 1.1 (and later) character encoding support.
* <p/>
* <LI> The use of MIME typing may optionally be disabled, forcing the
* use of autodetection, to support web servers which don't correctly
* report MIME types for XML. For example, they may report text that
* is encoded in EUC-JP as being US-ASCII text, leading to fatal
* errors during parsing.
* <p/>
* <LI> The InputSource objects returned by this class always
* have a <code>java.io.Reader</code> available as the "character
* stream" property.
* <p/>
* </UL>
* <p/>
* <LI> Catalog entries can map public identifiers to Java resources or
* to local URLs. These are used to reduce network dependencies and loads,
* and will often be used for external DTD components. For example, packages
* shipping DTD files as resources in JAR files can eliminate network traffic
* when accessing them, and sites may provide local caches of common DTDs.
* Note that no particular catalog syntax is supported by this class, only
* the notion of a set of entries.
* <p/>
* </UL>
* <p/>
* <P> Subclasses can perform tasks such as supporting new URI schemes for
* URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
* MIME entities which are part of a <em>multipart/related</em> group
* (see RFC 2387). They may also be used to support particular catalog
* syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
* SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
* Public Identifiers (FPIs).
*
* @author David Brownell
* @author Janet Koenig
* @version 1.3 00/02/24
*/
public class Resolver implements EntityResolver {
private boolean ignoringMIME;
// table mapping public IDs to (local) URIs
private Hashtable id2uri;
// tables mapping public IDs to resources and classloaders
private Hashtable id2resource;
private Hashtable id2loader;
//
// table of MIME content types (less attributes!) known
// to be mostly "OK" to use with XML MIME entities. the
// idea is to rule out obvious braindamage ("image/jpg")
// not the subtle stuff ("text/html") that might actually
// be (or become) safe.
//
private static final String types [] = {
"application/xml",
"text/xml",
"text/plain",
"text/html", // commonly mis-inferred
"application/x-netcdf", // this is often illegal XML
"content/unknown"
};
/**
* Constructs a resolver.
*/
public Resolver() {
}
/**
* Returns an input source, using the MIME type information and URL
* scheme to statically determine the correct character encoding if
* possible and otherwise autodetecting it. MIME carefully specifies
* the character encoding defaults, and how attributes of the content
* type can change it. XML further specifies two mandatory encodings
* (UTF-8 and UTF-16), and includes an XML declaration which can be
* used to internally label most documents encoded using US-ASCII
* supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
* more).
* <p/>
* <P> This method can be used to access XML documents which do not
* have URIs (such as servlet input streams, or most JavaMail message
* entities) and to support access methods such as HTTP POST or PUT.
* (URLs normally return content using the GET method.)
* <p/>
* <P> <em> The caller should set the system ID in order for relative URIs
* found in this document to be interpreted correctly.</em> In some cases,
* a custom resolver will need to be used; for example, documents
* may be grouped in a single MIME "multipart/related" bundle, and
* relative URLs would refer to other documents in that bundle.
*
* @param contentType The MIME content type for the source for which
* an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
* @param stream The input byte stream for the input source.
* @param checkType If true, this verifies that the content type is known
* to support XML documents, such as <em>application/xml</em>.
* @param scheme Unless this is "file", unspecified MIME types
* default to US-ASCII. Files are always autodetected since most
* file systems discard character encoding information.
*/
public static InputSource createInputSource(String contentType,
InputStream stream,
boolean checkType,
String scheme) throws IOException {
InputSource retval;
String charset = null;
if (contentType != null) {
int index;
contentType = contentType.toLowerCase();
index = contentType.indexOf(';');
if (index != -1) {
String attributes;
attributes = contentType.substring(index + 1);
contentType = contentType.substring(0, index);
// use "charset=..." if it's available
index = attributes.indexOf("charset");
if (index != -1) {
attributes = attributes.substring(index + 7);
// strip out subsequent attributes
if ((index = attributes.indexOf(';')) != -1)
attributes = attributes.substring(0, index);
// find start of value
if ((index = attributes.indexOf('=')) != -1) {
attributes = attributes.substring(index + 1);
// strip out rfc822 comments
if ((index = attributes.indexOf('(')) != -1)
attributes = attributes.substring(0, index);
// double quotes are optional
if ((index = attributes.indexOf('"')) != -1) {
attributes = attributes.substring(index + 1);
attributes = attributes.substring(0,
attributes.indexOf('"'));
}
charset = attributes.trim();
// XXX "\;", "\)" etc were mishandled above
}
}
}
//
// Check MIME type.
//
if (checkType) {
boolean isOK = false;
for (int i = 0; i < types.length; i++)
if (types[i].equals(contentType)) {
isOK = true;
break;
}
if (!isOK)
throw new IOException("Not XML: " + contentType);
}
//
// "text/*" MIME types have hard-wired character set
// defaults, as specified in the RFCs. For XML, we
// ignore the system "file.encoding" property since
// autodetection is more correct.
//
if (charset == null) {
contentType = contentType.trim();
if (contentType.startsWith("text/")) {
if (!"file".equalsIgnoreCase(scheme))
charset = "US-ASCII";
}
// "application/*" has no default
}
}
retval = new InputSource(XmlReader.createReader(stream, charset));
retval.setByteStream(stream);
retval.setEncoding(charset);
return retval;
}
/**
* Creates an input source from a given URI.
*
* @param uri the URI (system ID) for the entity
* @param checkType if true, the MIME content type for the entity
* is checked for document type and character set encoding.
*/
static public InputSource createInputSource(URL uri, boolean checkType)
throws IOException {
URLConnection conn = uri.openConnection();
InputSource retval;
if (checkType) {
String contentType = conn.getContentType();
retval = createInputSource(contentType, conn.getInputStream(),
false, uri.getProtocol());
} else {
retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
}
retval.setSystemId(conn.getURL().toString());
return retval;
}
/**
* Creates an input source from a given file, autodetecting
* the character encoding.
*/
static public InputSource createInputSource(File file)
throws IOException {
InputSource retval;
String path;
retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
// On JDK 1.2 and later, simplify this:
// "path = file.toURL ().toString ()".
path = file.getAbsolutePath();
if (File.separatorChar != '/')
path = path.replace(File.separatorChar, '/');
if (!path.startsWith("/"))
path = "/" + path;
if (!path.endsWith("/") && file.isDirectory())
path = path + "/";
retval.setSystemId("file:" + path);
return retval;
}
/**
* <b>SAX:</b>
* Resolve the given entity into an input source. If the name can't
* be mapped to a preferred form of the entity, the URI is used. To
* resolve the entity, first a local catalog mapping names to URIs is
* consulted. If no mapping is found there, a catalog mapping names
* to java resources is consulted. Finally, if neither mapping found
* a copy of the entity, the specified URI is used.
* <p/>
* <P> When a URI is used, <a href="#createInputSource">
* createInputSource</a> is used to correctly deduce the character
* encoding used by this entity. No MIME type checking is done.
*
* @param name Used to find alternate copies of the entity, when
* this value is non-null; this is the XML "public ID".
* @param uri Used when no alternate copy of the entity is found;
* this is the XML "system ID", normally a URI.
*/
public InputSource resolveEntity(String name, String uri)
throws IOException {
InputSource retval;
String mappedURI = name2uri(name);
InputStream stream;
// prefer explicit URI mappings, then bundled resources...
if (mappedURI == null && (stream = mapResource(name)) != null) {
uri = "java:resource:" + (String) id2resource.get(name);
retval = new InputSource(XmlReader.createReader(stream));
// ...and treat all URIs the same (as URLs for now).
} else {
URL url;
URLConnection conn;
if (mappedURI != null)
uri = mappedURI;
else if (uri == null)
return null;
url = new URL(uri);
conn = url.openConnection();
uri = conn.getURL().toString();
// System.out.println ("++ URI: " + url);
if (ignoringMIME)
retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
else {
String contentType = conn.getContentType();
retval = createInputSource(contentType,
conn.getInputStream(),
false, url.getProtocol());
}
}
retval.setSystemId(uri);
retval.setPublicId(name);
return retval;
}
/**
* Returns true if this resolver is ignoring MIME types in the documents
* it returns, to work around bugs in how servers have reported the
* documents' MIME types.
*/
public boolean isIgnoringMIME() {
return ignoringMIME;
}
/**
* Tells the resolver whether to ignore MIME types in the documents it
* retrieves. Many web servers incorrectly assign text documents a
* default character encoding, even when that is incorrect. For example,
* all HTTP text documents default to use ISO-8859-1 (used for Western
* European languages), and other MIME sources default text documents
* to use US-ASCII (a seven bit encoding). For XML documents which
* include text encoding declarations (as most should do), these server
* bugs can be worked around by ignoring the MIME type entirely.
*/
public void setIgnoringMIME(boolean value) {
ignoringMIME = value;
}
// maps the public ID to an alternate URI, if one is registered
private String name2uri(String publicId) {
if (publicId == null || id2uri == null)
return null;
return (String) id2uri.get(publicId);
}
/**
* Registers the given public ID as corresponding to a particular
* URI, typically a local copy. This URI will be used in preference
* to ones provided as system IDs in XML entity declarations. This
* mechanism would most typically be used for Document Type Definitions
* (DTDs), where the public IDs are formally managed and versioned.
*
* @param publicId The managed public ID being mapped
* @param uri The URI of the preferred copy of that entity
*/
public void registerCatalogEntry(String publicId,
String uri) {
if (id2uri == null)
id2uri = new Hashtable(17);
id2uri.put(publicId, uri);
}
// return the resource as a stream
private InputStream mapResource(String publicId) {
// System.out.println ("++ PUBLIC: " + publicId);
if (publicId == null || id2resource == null)
return null;
String resourceName = (String) id2resource.get(publicId);
ClassLoader loader = null;
if (resourceName == null)
return null;
// System.out.println ("++ Resource: " + resourceName);
if (id2loader != null)
loader = (ClassLoader) id2loader.get(publicId);
// System.out.println ("++ Loader: " + loader);
if (loader == null)
return ClassLoader.getSystemResourceAsStream(resourceName);
return loader.getResourceAsStream(resourceName);
}
/**
* Registers a given public ID as corresponding to a particular Java
* resource in a given class loader, typically distributed with a
* software package. This resource will be preferred over system IDs
* included in XML documents. This mechanism should most typically be
* used for Document Type Definitions (DTDs), where the public IDs are
* formally managed and versioned.
* <p/>
* <P> If a mapping to a URI has been provided, that mapping takes
* precedence over this one.
*
* @param publicId The managed public ID being mapped
* @param resourceName The name of the Java resource
* @param loader The class loader holding the resource, or null if
* it is a system resource.
*/
public void registerCatalogEntry(String publicId,
String resourceName,
ClassLoader loader) {
if (id2resource == null)
id2resource = new Hashtable(17);
id2resource.put(publicId, resourceName);
if (loader != null) {
if (id2loader == null)
id2loader = new Hashtable(17);
id2loader.put(publicId, loader);
}
}
}

View File

@@ -0,0 +1,285 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import java.util.Enumeration;
// This could be replaced by Collections class unless we want
// to be able to run on JDK 1.1
/**
* This class implements a special purpose hashtable. It works like a
* normal <code>java.util.Hashtable</code> except that: <OL>
* <p/>
* <LI> Keys to "get" are strings which are known to be interned,
* so that "==" is used instead of "String.equals". (Interning
* could be document-relative instead of global.)
* <p/>
* <LI> It's not synchronized, since it's to be used only by
* one thread at a time.
* <p/>
* <LI> The keys () enumerator allocates no memory, with live
* updates to the data disallowed.
* <p/>
* <LI> It's got fewer bells and whistles: fixed threshold and
* load factor, no JDK 1.2 collection support, only keys can be
* enumerated, things can't be removed, simpler inheritance; more.
* <p/>
* </OL>
* <p/>
* <P> The overall result is that it's less expensive to use these in
* performance-critical locations, in terms both of CPU and memory,
* than <code>java.util.Hashtable</code> instances. In this package
* it makes a significant difference when normalizing attributes,
* which is done for each start-element construct.
*
* @version $Revision: 1.2 $
*/
final class SimpleHashtable implements Enumeration {
// entries ...
private Entry table[];
// currently enumerated key
private Entry current = null;
private int currentBucket = 0;
private int count;
private int threshold;
private static final float loadFactor = 0.75f;
/**
* Constructs a new, empty hashtable with the specified initial
* capacity.
*
* @param initialCapacity the initial capacity of the hashtable.
*/
public SimpleHashtable(int initialCapacity) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal Capacity: " +
initialCapacity);
if (initialCapacity == 0)
initialCapacity = 1;
table = new Entry[initialCapacity];
threshold = (int) (initialCapacity * loadFactor);
}
/**
* Constructs a new, empty hashtable with a default capacity.
*/
public SimpleHashtable() {
this(11);
}
/**
*/
public void clear() {
count = 0;
currentBucket = 0;
current = null;
for (int i = 0; i < table.length; i++)
table[i] = null;
}
/**
* Returns the number of keys in this hashtable.
*
* @return the number of keys in this hashtable.
*/
public int size() {
return count;
}
/**
* Returns an enumeration of the keys in this hashtable.
*
* @return an enumeration of the keys in this hashtable.
* @see Enumeration
*/
public Enumeration keys() {
currentBucket = 0;
current = null;
return this;
}
/**
* Used to view this as an enumeration; returns true if there
* are more keys to be enumerated.
*/
public boolean hasMoreElements() {
if (current != null)
return true;
while (currentBucket < table.length) {
current = table[currentBucket++];
if (current != null)
return true;
}
return false;
}
/**
* Used to view this as an enumeration; returns the next key
* in the enumeration.
*/
public Object nextElement() {
Object retval;
if (current == null)
throw new IllegalStateException();
retval = current.key;
current = current.next;
return retval;
}
/**
* Returns the value to which the specified key is mapped in this hashtable.
*/
public Object get(String key) {
Entry tab[] = table;
int hash = key.hashCode();
int index = (hash & 0x7FFFFFFF) % tab.length;
for (Entry e = tab[index]; e != null; e = e.next) {
if ((e.hash == hash) && (e.key == key))
return e.value;
}
return null;
}
/**
* Returns the value to which the specified key is mapped in this
* hashtable ... the key isn't necessarily interned, though.
*/
public Object getNonInterned(String key) {
Entry tab[] = table;
int hash = key.hashCode();
int index = (hash & 0x7FFFFFFF) % tab.length;
for (Entry e = tab[index]; e != null; e = e.next) {
if ((e.hash == hash) && e.key.equals(key))
return e.value;
}
return null;
}
/**
* Increases the capacity of and internally reorganizes this
* hashtable, in order to accommodate and access its entries more
* efficiently. This method is called automatically when the
* number of keys in the hashtable exceeds this hashtable's capacity
* and load factor.
*/
private void rehash() {
int oldCapacity = table.length;
Entry oldMap[] = table;
int newCapacity = oldCapacity * 2 + 1;
Entry newMap[] = new Entry[newCapacity];
threshold = (int) (newCapacity * loadFactor);
table = newMap;
/*
System.out.println("rehash old=" + oldCapacity
+ ", new=" + newCapacity
+ ", thresh=" + threshold
+ ", count=" + count);
*/
for (int i = oldCapacity; i-- > 0;) {
for (Entry old = oldMap[i]; old != null;) {
Entry e = old;
old = old.next;
int index = (e.hash & 0x7FFFFFFF) % newCapacity;
e.next = newMap[index];
newMap[index] = e;
}
}
}
/**
* Maps the specified <code>key</code> to the specified
* <code>value</code> in this hashtable. Neither the key nor the
* value can be <code>null</code>.
* <p/>
* <P>The value can be retrieved by calling the <code>get</code> method
* with a key that is equal to the original key.
*/
public Object put(Object key, Object value) {
// Make sure the value is not null
if (value == null) {
throw new NullPointerException();
}
// Makes sure the key is not already in the hashtable.
Entry tab[] = table;
int hash = key.hashCode();
int index = (hash & 0x7FFFFFFF) % tab.length;
for (Entry e = tab[index]; e != null; e = e.next) {
// if ((e.hash == hash) && e.key.equals(key)) {
if ((e.hash == hash) && (e.key == key)) {
Object old = e.value;
e.value = value;
return old;
}
}
if (count >= threshold) {
// Rehash the table if the threshold is exceeded
rehash();
tab = table;
index = (hash & 0x7FFFFFFF) % tab.length;
}
// Creates the new entry.
Entry e = new Entry(hash, key, value, tab[index]);
tab[index] = e;
count++;
return null;
}
/**
* Hashtable collision list.
*/
private static class Entry {
int hash;
Object key;
Object value;
Entry next;
protected Entry(int hash, Object key, Object value, Entry next) {
this.hash = hash;
this.key = key;
this.value = value;
this.next = next;
}
}
}

View File

@@ -0,0 +1,387 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
/**
* Methods in this class are used to determine whether characters may
* appear in certain roles in XML documents. Such methods are used
* both to parse and to create such documents.
*
* @author David Brownell
* @version 1.1, 00/08/05
*/
public class XmlChars {
// can't construct instances
private XmlChars() {
}
/**
* Returns true if the argument, a UCS-4 character code, is valid in
* XML documents. Unicode characters fit into the low sixteen
* bits of a UCS-4 character, and pairs of Unicode <em>surrogate
* characters</em> can be combined to encode UCS-4 characters in
* documents containing only Unicode. (The <code>char</code> datatype
* in the Java Programming Language represents Unicode characters,
* including unpaired surrogates.)
* <p/>
* <P> In XML, UCS-4 characters can also be encoded by the use of
* <em>character references</em> such as <b>&amp;#x12345678;</b>, which
* happens to refer to a character that is disallowed in XML documents.
* UCS-4 characters allowed in XML documents can be expressed with
* one or two Unicode characters.
*
* @param ucs4char The 32-bit UCS-4 character being tested.
*/
static public boolean isChar(int ucs4char) {
// [2] Char ::= #x0009 | #x000A | #x000D
// | [#x0020-#xD7FF]
// ... surrogates excluded!
// | [#xE000-#xFFFD]
// | [#x10000-#x10ffff]
return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
|| ucs4char == 0x000A || ucs4char == 0x0009
|| ucs4char == 0x000D
|| (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
|| (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
}
/**
* Returns true if the character is allowed to be a non-initial
* character in names according to the XML recommendation.
*
* @see #isNCNameChar(char)
* @see #isLetter(char)
*/
public static boolean isNameChar(char c) {
// [4] NameChar ::= Letter | Digit | '.' | '_' | ':'
// | CombiningChar | Extender
if (isLetter2(c))
return true;
else if (c == '>')
return false;
else if (c == '.' || c == '-' || c == '_' || c == ':'
|| isExtender(c))
return true;
else
return false;
}
/**
* Returns true if the character is allowed to be a non-initial
* character in unscoped names according to the rules of the XML
* Namespaces proposed recommendation. Except for precluding
* the colon (used to separate names from their scopes) these
* characters are just as allowed by the XML recommendation.
*
* @see #isNameChar(char)
* @see #isLetter(char)
*/
public static boolean isNCNameChar(char c) {
// [NC 5] NCNameChar ::= Letter | Digit | '.' | '_'
// | CombiningChar | Extender
return c != ':' && isNameChar(c);
}
/**
* Returns true if the character is allowed where XML supports
* whitespace characters, false otherwise.
*/
public static boolean isSpace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
/*
* NOTE: java.lang.Character.getType() values are:
*
* UNASSIGNED = 0,
*
* UPPERCASE_LETTER = 1, // Lu
* LOWERCASE_LETTER = 2, // Ll
* TITLECASE_LETTER = 3, // Lt
* MODIFIER_LETTER = 4, // Lm
* OTHER_LETTER = 5, // Lo
* NON_SPACING_MARK = 6, // Mn
* ENCLOSING_MARK = 7, // Me
* COMBINING_SPACING_MARK = 8, // Mc
* DECIMAL_DIGIT_NUMBER = 9, // Nd
* LETTER_NUMBER = 10, // Nl
* OTHER_NUMBER = 11, // No
* SPACE_SEPARATOR = 12, // Zs
* LINE_SEPARATOR = 13, // Zl
* PARAGRAPH_SEPARATOR = 14, // Zp
* CONTROL = 15, // Cc
* FORMAT = 16, // Cf
* // 17 reserved for proposed Ci category
* PRIVATE_USE = 18, // Co
* SURROGATE = 19, // Cs
* DASH_PUNCTUATION = 20, // Pd
* START_PUNCTUATION = 21, // Ps
* END_PUNCTUATION = 22, // Pe
* CONNECTOR_PUNCTUATION = 23, // Pc
* OTHER_PUNCTUATION = 24, // Po
* MATH_SYMBOL = 25, // Sm
* CURRENCY_SYMBOL = 26, // Sc
* MODIFIER_SYMBOL = 27, // Sk
* OTHER_SYMBOL = 28; // So
*/
/**
* Returns true if the character is an XML "letter". XML Names must
* start with Letters or a few other characters, but other characters
* in names must only satisfy the <em>isNameChar</em> predicate.
*
* @see #isNameChar(char)
* @see #isNCNameChar(char)
*/
public static boolean isLetter(char c) {
// [84] Letter ::= BaseChar | Ideographic
// [85] BaseChar ::= ... too much to repeat
// [86] Ideographic ::= ... too much to repeat
//
// Optimize the typical case.
//
if (c >= 'a' && c <= 'z')
return true;
if (c == '/')
return false;
if (c >= 'A' && c <= 'Z')
return true;
//
// Since the tables are too ridiculous to use in code,
// we're using the footnotes here to drive this test.
//
switch (Character.getType(c)) {
// app. B footnote says these are 'name start'
// chars' ...
case Character.LOWERCASE_LETTER: // Ll
case Character.UPPERCASE_LETTER: // Lu
case Character.OTHER_LETTER: // Lo
case Character.TITLECASE_LETTER: // Lt
case Character.LETTER_NUMBER: // Nl
// OK, here we just have some exceptions to check...
return !isCompatibilityChar(c)
// per "5.14 of Unicode", rule out some combiners
&& !(c >= 0x20dd && c <= 0x20e0);
default:
// check for some exceptions: these are "alphabetic"
return ((c >= 0x02bb && c <= 0x02c1)
|| c == 0x0559 || c == 0x06e5 || c == 0x06e6);
}
}
//
// XML 1.0 discourages "compatibility" characters in names; these
// were defined to permit passing through some information stored in
// older non-Unicode character sets. These always have alternative
// representations in Unicode, e.g. using combining chars.
//
private static boolean isCompatibilityChar(char c) {
// the numerous comparisions here seem unavoidable,
// but the switch can reduce the number which must
// actually be executed.
switch ((c >> 8) & 0x0ff) {
case 0x00:
// ISO Latin/1 has a few compatibility characters
return c == 0x00aa || c == 0x00b5 || c == 0x00ba;
case 0x01:
// as do Latin Extended A and (parts of) B
return (c >= 0x0132 && c <= 0x0133)
|| (c >= 0x013f && c <= 0x0140)
|| c == 0x0149
|| c == 0x017f
|| (c >= 0x01c4 && c <= 0x01cc)
|| (c >= 0x01f1 && c <= 0x01f3);
case 0x02:
// some spacing modifiers
return (c >= 0x02b0 && c <= 0x02b8)
|| (c >= 0x02e0 && c <= 0x02e4);
case 0x03:
return c == 0x037a; // Greek
case 0x05:
return c == 0x0587; // Armenian
case 0x0e:
return c >= 0x0edc && c <= 0x0edd; // Laotian
case 0x11:
// big chunks of Hangul Jamo are all "compatibility"
return c == 0x1101
|| c == 0x1104
|| c == 0x1108
|| c == 0x110a
|| c == 0x110d
|| (c >= 0x1113 && c <= 0x113b)
|| c == 0x113d
|| c == 0x113f
|| (c >= 0x1141 && c <= 0x114b)
|| c == 0x114d
|| c == 0x114f
|| (c >= 0x1151 && c <= 0x1153)
|| (c >= 0x1156 && c <= 0x1158)
|| c == 0x1162
|| c == 0x1164
|| c == 0x1166
|| c == 0x1168
|| (c >= 0x116a && c <= 0x116c)
|| (c >= 0x116f && c <= 0x1171)
|| c == 0x1174
|| (c >= 0x1176 && c <= 0x119d)
|| (c >= 0x119f && c <= 0x11a2)
|| (c >= 0x11a9 && c <= 0x11aa)
|| (c >= 0x11ac && c <= 0x11ad)
|| (c >= 0x11b0 && c <= 0x11b6)
|| c == 0x11b9
|| c == 0x11bb
|| (c >= 0x11c3 && c <= 0x11ea)
|| (c >= 0x11ec && c <= 0x11ef)
|| (c >= 0x11f1 && c <= 0x11f8)
;
case 0x20:
return c == 0x207f; // superscript
case 0x21:
return
// various letterlike symbols
c == 0x2102
|| c == 0x2107
|| (c >= 0x210a && c <= 0x2113)
|| c == 0x2115
|| (c >= 0x2118 && c <= 0x211d)
|| c == 0x2124
|| c == 0x2128
|| (c >= 0x212c && c <= 0x212d)
|| (c >= 0x212f && c <= 0x2138)
// most Roman numerals (less 1K, 5K, 10K)
|| (c >= 0x2160 && c <= 0x217f)
;
case 0x30:
// some Hiragana
return c >= 0x309b && c <= 0x309c;
case 0x31:
// all Hangul Compatibility Jamo
return c >= 0x3131 && c <= 0x318e;
case 0xf9:
case 0xfa:
case 0xfb:
case 0xfc:
case 0xfd:
case 0xfe:
case 0xff:
// the whole "compatibility" area is for that purpose!
return true;
default:
// most of Unicode isn't flagged as being for compatibility
return false;
}
}
// guts of isNameChar/isNCNameChar
private static boolean isLetter2(char c) {
// [84] Letter ::= BaseChar | Ideographic
// [85] BaseChar ::= ... too much to repeat
// [86] Ideographic ::= ... too much to repeat
// [87] CombiningChar ::= ... too much to repeat
//
// Optimize the typical case.
//
if (c >= 'a' && c <= 'z')
return true;
if (c == '>')
return false;
if (c >= 'A' && c <= 'Z')
return true;
//
// Since the tables are too ridiculous to use in code,
// we're using the footnotes here to drive this test.
//
switch (Character.getType(c)) {
// app. B footnote says these are 'name start'
// chars' ...
case Character.LOWERCASE_LETTER: // Ll
case Character.UPPERCASE_LETTER: // Lu
case Character.OTHER_LETTER: // Lo
case Character.TITLECASE_LETTER: // Lt
case Character.LETTER_NUMBER: // Nl
// ... and these are name characters 'other
// than name start characters'
case Character.COMBINING_SPACING_MARK: // Mc
case Character.ENCLOSING_MARK: // Me
case Character.NON_SPACING_MARK: // Mn
case Character.MODIFIER_LETTER: // Lm
case Character.DECIMAL_DIGIT_NUMBER: // Nd
// OK, here we just have some exceptions to check...
return !isCompatibilityChar(c)
// per "5.14 of Unicode", rule out some combiners
&& !(c >= 0x20dd && c <= 0x20e0);
default:
// added a character ...
return c == 0x0387;
}
}
private static boolean isDigit(char c) {
// [88] Digit ::= ...
//
// java.lang.Character.isDigit is correct from the XML point
// of view except that it allows "fullwidth" digits.
//
return Character.isDigit(c)
&& !((c >= 0xff10) && (c <= 0xff19));
}
private static boolean isExtender(char c) {
// [89] Extender ::= ...
return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
|| c == 0x0640 || c == 0x0e46 || c == 0x0ec6
|| c == 0x3005 || (c >= 0x3031 && c <= 0x3035)
|| (c >= 0x309d && c <= 0x309e)
|| (c >= 0x30fc && c <= 0x30fe)
;
}
}

View File

@@ -0,0 +1,147 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
/**
* This class contains static methods used to determine whether identifiers
* may appear in certain roles in XML documents. Such methods are used
* both to parse and to create such documents.
*
* @author David Brownell
* @version 1.1, 00/08/05
*/
public class XmlNames {
private XmlNames() {
}
/**
* Returns true if the value is a legal XML name.
*
* @param value the string being tested
*/
public static boolean isName(String value) {
if (value == null)
return false;
char c = value.charAt(0);
if (!XmlChars.isLetter(c) && c != '_' && c != ':')
return false;
for (int i = 1; i < value.length(); i++)
if (!XmlChars.isNameChar(value.charAt(i)))
return false;
return true;
}
/**
* Returns true if the value is a legal "unqualified" XML name, as
* defined in the XML Namespaces proposed recommendation.
* These are normal XML names, except that they may not contain
* a "colon" character.
*
* @param value the string being tested
*/
public static boolean isUnqualifiedName(String value) {
if (value == null || value.length() == 0)
return false;
char c = value.charAt(0);
if (!XmlChars.isLetter(c) && c != '_')
return false;
for (int i = 1; i < value.length(); i++)
if (!XmlChars.isNCNameChar(value.charAt(i)))
return false;
return true;
}
/**
* Returns true if the value is a legal "qualified" XML name, as defined
* in the XML Namespaces proposed recommendation. Qualified names are
* composed of an optional prefix (an unqualified name), followed by a
* colon, and a required "local part" (an unqualified name). Prefixes are
* declared, and correspond to particular URIs which scope the "local
* part" of the name. (This method cannot check whether the prefix of a
* name has been declared.)
*
* @param value the string being tested
*/
public static boolean isQualifiedName(String value) {
if (value == null)
return false;
// [6] QName ::= (Prefix ':')? LocalPart
// [7] Prefix ::= NCName
// [8] LocalPart ::= NCName
int first = value.indexOf(':');
// no Prefix, only check LocalPart
if (first <= 0)
return isUnqualifiedName(value);
// Prefix exists, check everything
int last = value.lastIndexOf(':');
if (last != first)
return false;
return isUnqualifiedName(value.substring(0, first))
&& isUnqualifiedName(value.substring(first + 1));
}
/**
* This method returns true if the identifier is a "name token"
* as defined in the XML specification. Like names, these
* may only contain "name characters"; however, they do not need
* to have letters as their initial characters. Attribute values
* defined to be of type NMTOKEN(S) must satisfy this predicate.
*
* @param token the string being tested
*/
public static boolean isNmtoken(String token) {
int length = token.length();
for (int i = 0; i < length; i++)
if (!XmlChars.isNameChar(token.charAt(i)))
return false;
return true;
}
/**
* This method returns true if the identifier is a "name token" as
* defined by the XML Namespaces proposed recommendation.
* These are like XML "name tokens" but they may not contain the
* "colon" character.
*
* @param token the string being tested
* @see #isNmtoken
*/
public static boolean isNCNmtoken(String token) {
return isNmtoken(token) && token.indexOf(':') < 0;
}
}

View File

@@ -0,0 +1,784 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.dtdparser;
import java.io.ByteArrayInputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.util.Hashtable;
// NOTE: Add I18N support to this class when JDK gets the ability to
// defer selection of locale for exception messages ... use the same
// technique for both.
/**
* This handles several XML-related tasks that normal java.io Readers
* don't support, inluding use of IETF standard encoding names and
* automatic detection of most XML encodings. The former is needed
* for interoperability; the latter is needed to conform with the XML
* spec. This class also optimizes reading some common encodings by
* providing low-overhead unsynchronized Reader support.
* <p/>
* <P> Note that the autodetection facility should be used only on
* data streams which have an unknown character encoding. For example,
* it should never be used on MIME text/xml entities.
* <p/>
* <P> Note that XML processors are only required to support UTF-8 and
* UTF-16 character encodings. Autodetection permits the underlying Java
* implementation to provide support for many other encodings, such as
* US-ASCII, ISO-8859-5, Shift_JIS, EUC-JP, and ISO-2022-JP.
*
* @author David Brownell
* @author Janet Koenig
* @version 1.3 00/02/24
*/
// package private
final class XmlReader extends Reader {
private static final int MAXPUSHBACK = 512;
private Reader in;
private String assignedEncoding;
private boolean closed;
//
// This class always delegates I/O to a reader, which gets
// its data from the very beginning of the XML text. It needs
// to use a pushback stream since (a) autodetection can read
// partial UTF-8 characters which need to be fully processed,
// (b) the "Unicode" readers swallow characters that they think
// are byte order marks, so tests fail if they don't see the
// real byte order mark.
//
// It's got do this efficiently: character I/O is solidly on the
// critical path. (So keep buffer length over 2 Kbytes to avoid
// excess buffering. Many URL handlers stuff a BufferedInputStream
// between here and the real data source, and larger buffers keep
// that from slowing you down.)
//
/**
* Constructs the reader from an input stream, autodetecting
* the encoding to use according to the heuristic specified
* in the XML 1.0 recommendation.
*
* @param in the input stream from which the reader is constructed
* @throws IOException on error, such as unrecognized encoding
*/
public static Reader createReader(InputStream in) throws IOException {
return new XmlReader(in);
}
/**
* Creates a reader supporting the given encoding, mapping
* from standard encoding names to ones that understood by
* Java where necessary.
*
* @param in the input stream from which the reader is constructed
* @param encoding the IETF standard name of the encoding to use;
* if null, autodetection is used.
* @throws IOException on error, including unrecognized encoding
*/
public static Reader createReader(InputStream in, String encoding)
throws IOException {
if (encoding == null)
return new XmlReader(in);
if ("UTF-8".equalsIgnoreCase(encoding)
|| "UTF8".equalsIgnoreCase(encoding))
return new Utf8Reader(in);
if ("US-ASCII".equalsIgnoreCase(encoding)
|| "ASCII".equalsIgnoreCase(encoding))
return new AsciiReader(in);
if ("ISO-8859-1".equalsIgnoreCase(encoding)
// plus numerous aliases ...
)
return new Iso8859_1Reader(in);
//
// What we really want is an administerable resource mapping
// encoding names/aliases to classnames. For example a property
// file resource, "readers/mapping.props", holding and a set
// of readers in that (sub)package... defaulting to this call
// only if no better choice is available.
//
return new InputStreamReader(in, std2java(encoding));
}
//
// JDK doesn't know all of the standard encoding names, and
// in particular none of the EBCDIC ones IANA defines (and
// which IBM encourages).
//
static private final Hashtable charsets = new Hashtable(31);
static {
charsets.put("UTF-16", "Unicode");
charsets.put("ISO-10646-UCS-2", "Unicode");
// NOTE: no support for ISO-10646-UCS-4 yet.
charsets.put("EBCDIC-CP-US", "cp037");
charsets.put("EBCDIC-CP-CA", "cp037");
charsets.put("EBCDIC-CP-NL", "cp037");
charsets.put("EBCDIC-CP-WT", "cp037");
charsets.put("EBCDIC-CP-DK", "cp277");
charsets.put("EBCDIC-CP-NO", "cp277");
charsets.put("EBCDIC-CP-FI", "cp278");
charsets.put("EBCDIC-CP-SE", "cp278");
charsets.put("EBCDIC-CP-IT", "cp280");
charsets.put("EBCDIC-CP-ES", "cp284");
charsets.put("EBCDIC-CP-GB", "cp285");
charsets.put("EBCDIC-CP-FR", "cp297");
charsets.put("EBCDIC-CP-AR1", "cp420");
charsets.put("EBCDIC-CP-HE", "cp424");
charsets.put("EBCDIC-CP-BE", "cp500");
charsets.put("EBCDIC-CP-CH", "cp500");
charsets.put("EBCDIC-CP-ROECE", "cp870");
charsets.put("EBCDIC-CP-YU", "cp870");
charsets.put("EBCDIC-CP-IS", "cp871");
charsets.put("EBCDIC-CP-AR2", "cp918");
// IANA also defines two that JDK 1.2 doesn't handle:
// EBCDIC-CP-GR --> CP423
// EBCDIC-CP-TR --> CP905
}
// returns an encoding name supported by JDK >= 1.1.6
// for some cases required by the XML spec
private static String std2java(String encoding) {
String temp = encoding.toUpperCase();
temp = (String) charsets.get(temp);
return temp != null ? temp : encoding;
}
/**
* Returns the standard name of the encoding in use
*/
public String getEncoding() {
return assignedEncoding;
}
private XmlReader(InputStream stream) throws IOException {
super(stream);
PushbackInputStream pb;
byte buf [];
int len;
if (stream instanceof PushbackInputStream)
pb = (PushbackInputStream) stream;
else
pb = new PushbackInputStream(stream, MAXPUSHBACK);
//
// See if we can figure out the character encoding used
// in this file by peeking at the first few bytes.
//
buf = new byte[4];
len = pb.read(buf);
if (len > 0)
pb.unread(buf, 0, len);
if (len == 4)
switch (buf[0] & 0x0ff) {
case 0:
// 00 3c 00 3f == illegal UTF-16 big-endian
if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {
setEncoding(pb, "UnicodeBig");
return;
}
// else it's probably UCS-4
break;
case '<': // 0x3c: the most common cases!
switch (buf[1] & 0x0ff) {
// First character is '<'; could be XML without
// an XML directive such as "<hello>", "<!-- ...",
// and so on.
default:
break;
// 3c 00 3f 00 == illegal UTF-16 little endian
case 0x00:
if (buf[2] == 0x3f && buf[3] == 0x00) {
setEncoding(pb, "UnicodeLittle");
return;
}
// else probably UCS-4
break;
// 3c 3f 78 6d == ASCII and supersets '<?xm'
case '?':
if (buf[2] != 'x' || buf[3] != 'm')
break;
//
// One of several encodings could be used:
// Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
//
useEncodingDecl(pb, "UTF8");
return;
}
break;
// 4c 6f a7 94 ... some EBCDIC code page
case 0x4c:
if (buf[1] == 0x6f
&& (0x0ff & buf[2]) == 0x0a7
&& (0x0ff & buf[3]) == 0x094) {
useEncodingDecl(pb, "CP037");
return;
}
// whoops, treat as UTF-8
break;
// UTF-16 big-endian
case 0xfe:
if ((buf[1] & 0x0ff) != 0xff)
break;
setEncoding(pb, "UTF-16");
return;
// UTF-16 little-endian
case 0xff:
if ((buf[1] & 0x0ff) != 0xfe)
break;
setEncoding(pb, "UTF-16");
return;
// default ... no XML declaration
default:
break;
}
//
// If all else fails, assume XML without a declaration, and
// using UTF-8 encoding.
//
setEncoding(pb, "UTF-8");
}
/*
* Read the encoding decl on the stream, knowing that it should
* be readable using the specified encoding (basically, ASCII or
* EBCDIC). The body of the document may use a wider range of
* characters than the XML/Text decl itself, so we switch to use
* the specified encoding as soon as we can. (ASCII is a subset
* of UTF-8, ISO-8859-*, ISO-2022-JP, EUC-JP, and more; EBCDIC
* has a variety of "code pages" that have these characters as
* a common subset.)
*/
private void useEncodingDecl(PushbackInputStream pb, String encoding)
throws IOException {
byte buffer [] = new byte[MAXPUSHBACK];
int len;
Reader r;
int c;
//
// Buffer up a bunch of input, and set up to read it in
// the specified encoding ... we can skip the first four
// bytes since we know that "<?xm" was read to determine
// what encoding to use!
//
len = pb.read(buffer, 0, buffer.length);
pb.unread(buffer, 0, len);
r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len),
encoding);
//
// Next must be "l" (and whitespace) else we conclude
// error and choose UTF-8.
//
if ((c = r.read()) != 'l') {
setEncoding(pb, "UTF-8");
return;
}
//
// Then, we'll skip any
// S version="..." [or single quotes]
// bit and get any subsequent
// S encoding="..." [or single quotes]
//
// We put an arbitrary size limit on how far we read; lots
// of space will break this algorithm.
//
StringBuffer buf = new StringBuffer();
StringBuffer keyBuf = null;
String key = null;
boolean sawEq = false;
char quoteChar = 0;
boolean sawQuestion = false;
XmlDecl:
for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
if ((c = r.read()) == -1)
break;
// ignore whitespace before/between "key = 'value'"
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
// ... but require at least a little!
if (i == 0)
break;
// terminate the loop ASAP
if (c == '?')
sawQuestion = true;
else if (sawQuestion) {
if (c == '>')
break;
sawQuestion = false;
}
// did we get the "key =" bit yet?
if (key == null || !sawEq) {
if (keyBuf == null) {
if (Character.isWhitespace((char) c))
continue;
keyBuf = buf;
buf.setLength(0);
buf.append((char) c);
sawEq = false;
} else if (Character.isWhitespace((char) c)) {
key = keyBuf.toString();
} else if (c == '=') {
if (key == null)
key = keyBuf.toString();
sawEq = true;
keyBuf = null;
quoteChar = 0;
} else
keyBuf.append((char) c);
continue;
}
// space before quoted value
if (Character.isWhitespace((char) c))
continue;
if (c == '"' || c == '\'') {
if (quoteChar == 0) {
quoteChar = (char) c;
buf.setLength(0);
continue;
} else if (c == quoteChar) {
if ("encoding".equals(key)) {
assignedEncoding = buf.toString();
// [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
for (i = 0; i < assignedEncoding.length(); i++) {
c = assignedEncoding.charAt(i);
if ((c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z'))
continue;
if (i == 0)
break XmlDecl;
if (i > 0 && (c == '-'
|| (c >= '0' && c <= '9')
|| c == '.' || c == '_'))
continue;
// map illegal names to UTF-8 default
break XmlDecl;
}
setEncoding(pb, assignedEncoding);
return;
} else {
key = null;
continue;
}
}
}
buf.append((char) c);
}
setEncoding(pb, "UTF-8");
}
private void setEncoding(InputStream stream, String encoding)
throws IOException {
assignedEncoding = encoding;
in = createReader(stream, encoding);
}
/**
* Reads the number of characters read into the buffer, or -1 on EOF.
*/
public int read(char buf [], int off, int len) throws IOException {
int val;
if (closed)
return -1; // throw new IOException ("closed");
val = in.read(buf, off, len);
if (val == -1)
close();
return val;
}
/**
* Reads a single character.
*/
public int read() throws IOException {
int val;
if (closed)
throw new IOException("closed");
val = in.read();
if (val == -1)
close();
return val;
}
/**
* Returns true iff the reader supports mark/reset.
*/
public boolean markSupported() {
return in == null ? false : in.markSupported();
}
/**
* Sets a mark allowing a limited number of characters to
* be "peeked", by reading and then resetting.
*
* @param value how many characters may be "peeked".
*/
public void mark(int value) throws IOException {
if (in != null) in.mark(value);
}
/**
* Resets the current position to the last marked position.
*/
public void reset() throws IOException {
if (in != null) in.reset();
}
/**
* Skips a specified number of characters.
*/
public long skip(long value) throws IOException {
return in == null ? 0 : in.skip(value);
}
/**
* Returns true iff input characters are known to be ready.
*/
public boolean ready() throws IOException {
return in == null ? false : in.ready();
}
/**
* Closes the reader.
*/
public void close() throws IOException {
if (closed)
return;
in.close();
in = null;
closed = true;
}
//
// Delegating to a converter module will always be slower than
// direct conversion. Use a similar approach for any other
// readers that need to be particularly fast; only block I/O
// speed matters to this package. For UTF-16, separate readers
// for big and little endian streams make a difference, too;
// fewer conditionals in the critical path!
//
static abstract class BaseReader extends Reader {
protected InputStream instream;
protected byte buffer [];
protected int start, finish;
BaseReader(InputStream stream) {
super(stream);
instream = stream;
buffer = new byte[8192];
}
public boolean ready() throws IOException {
return instream == null
|| (finish - start) > 0
|| instream.available() != 0;
}
// caller shouldn't read again
public void close() throws IOException {
if (instream != null) {
instream.close();
start = finish = 0;
buffer = null;
instream = null;
}
}
}
//
// We want this reader, to make the default encoding be as fast
// as we can make it. JDK's "UTF8" (not "UTF-8" till JDK 1.2)
// InputStreamReader works, but 20+% slower speed isn't OK for
// the default/primary encoding.
//
static final class Utf8Reader extends BaseReader {
// 2nd half of UTF-8 surrogate pair
private char nextChar;
Utf8Reader(InputStream stream) {
super(stream);
}
public int read(char buf [], int offset, int len) throws IOException {
int i = 0, c = 0;
if (len <= 0)
return 0;
// Consume remaining half of any surrogate pair immediately
if (nextChar != 0) {
buf[offset + i++] = nextChar;
nextChar = 0;
}
while (i < len) {
// stop or read data if needed
if (finish <= start) {
if (instream == null) {
c = -1;
break;
}
start = 0;
finish = instream.read(buffer, 0, buffer.length);
if (finish <= 0) {
this.close();
c = -1;
break;
}
}
//
// RFC 2279 describes UTF-8; there are six encodings.
// Each encoding takes a fixed number of characters
// (1-6 bytes) and is flagged by a bit pattern in the
// first byte. The five and six byte-per-character
// encodings address characters which are disallowed
// in XML documents, as do some four byte ones.
//
//
// Single byte == ASCII. Common; optimize.
//
c = buffer[start] & 0x0ff;
if ((c & 0x80) == 0x00) {
// 0x0000 <= c <= 0x007f
start++;
buf[offset + i++] = (char) c;
continue;
}
//
// Multibyte chars -- check offsets optimistically,
// ditto the "10xx xxxx" format for subsequent bytes
//
int off = start;
try {
// 2 bytes
if ((buffer[off] & 0x0E0) == 0x0C0) {
c = (buffer[off++] & 0x1f) << 6;
c += buffer[off++] & 0x3f;
// 0x0080 <= c <= 0x07ff
// 3 bytes
} else if ((buffer[off] & 0x0F0) == 0x0E0) {
c = (buffer[off++] & 0x0f) << 12;
c += (buffer[off++] & 0x3f) << 6;
c += buffer[off++] & 0x3f;
// 0x0800 <= c <= 0xffff
// 4 bytes
} else if ((buffer[off] & 0x0f8) == 0x0F0) {
c = (buffer[off++] & 0x07) << 18;
c += (buffer[off++] & 0x3f) << 12;
c += (buffer[off++] & 0x3f) << 6;
c += buffer[off++] & 0x3f;
// 0x0001 0000 <= c <= 0x001f ffff
// Unicode supports c <= 0x0010 ffff ...
if (c > 0x0010ffff)
throw new CharConversionException("UTF-8 encoding of character 0x00"
+ Integer.toHexString(c)
+ " can't be converted to Unicode.");
// Convert UCS-4 char to surrogate pair (UTF-16)
c -= 0x10000;
nextChar = (char) (0xDC00 + (c & 0x03ff));
c = 0xD800 + (c >> 10);
// 5 and 6 byte versions are XML WF errors, but
// typically come from mislabeled encodings
} else
throw new CharConversionException("Unconvertible UTF-8 character"
+ " beginning with 0x"
+ Integer.toHexString(buffer[start] & 0xff));
} catch (ArrayIndexOutOfBoundsException e) {
// off > length && length >= buffer.length
c = 0;
}
//
// if the buffer held only a partial character,
// compact it and try to read the rest of the
// character. worst case involves three
// single-byte reads -- quite rare.
//
if (off > finish) {
System.arraycopy(buffer, start,
buffer, 0, finish - start);
finish -= start;
start = 0;
off = instream.read(buffer, finish,
buffer.length - finish);
if (off < 0) {
this.close();
throw new CharConversionException("Partial UTF-8 char");
}
finish += off;
continue;
}
//
// check the format of the non-initial bytes
//
for (start++; start < off; start++) {
if ((buffer[start] & 0xC0) != 0x80) {
this.close();
throw new CharConversionException("Malformed UTF-8 char -- "
+ "is an XML encoding declaration missing?");
}
}
//
// If this needed a surrogate pair, consume ASAP
//
buf[offset + i++] = (char) c;
if (nextChar != 0 && i < len) {
buf[offset + i++] = nextChar;
nextChar = 0;
}
}
if (i > 0)
return i;
return (c == -1) ? -1 : 0;
}
}
//
// We want ASCII and ISO-8859 Readers since they're the most common
// encodings in the US and Europe, and we don't want performance
// regressions for them. They're also easy to implement efficiently,
// since they're bitmask subsets of UNICODE.
//
// XXX haven't benchmarked these readers vs what we get out of JDK.
//
static final class AsciiReader extends BaseReader {
AsciiReader(InputStream in) {
super(in);
}
public int read(char buf [], int offset, int len) throws IOException {
int i, c;
if (instream == null)
return -1;
for (i = 0; i < len; i++) {
if (start >= finish) {
start = 0;
finish = instream.read(buffer, 0, buffer.length);
if (finish <= 0) {
if (finish <= 0)
this.close();
break;
}
}
c = buffer[start++];
if ((c & 0x80) != 0)
throw new CharConversionException("Illegal ASCII character, 0x"
+ Integer.toHexString(c & 0xff));
buf[offset + i] = (char) c;
}
if (i == 0 && finish <= 0)
return -1;
return i;
}
}
static final class Iso8859_1Reader extends BaseReader {
Iso8859_1Reader(InputStream in) {
super(in);
}
public int read(char buf [], int offset, int len) throws IOException {
int i;
if (instream == null)
return -1;
for (i = 0; i < len; i++) {
if (start >= finish) {
start = 0;
finish = instream.read(buffer, 0, buffer.length);
if (finish <= 0) {
if (finish <= 0)
this.close();
break;
}
}
buf[offset + i] = (char) (0x0ff & buffer[start++]);
}
if (i == 0 && finish <= 0)
return -1;
return i;
}
}
}