feat(jdk8): move files to new folder to avoid resources compiled.
This commit is contained in:
344
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDEventListener.java
Normal file
344
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDEventListener.java
Normal file
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
import java.util.EventListener;
|
||||
|
||||
/**
|
||||
* All DTD parsing events are signaled through this interface.
|
||||
*/
|
||||
public interface DTDEventListener extends EventListener {
|
||||
|
||||
public void setDocumentLocator(Locator loc);
|
||||
|
||||
/**
|
||||
* Receive notification of a Processing Instruction.
|
||||
* Processing instructions contain information meaningful
|
||||
* to the application.
|
||||
*
|
||||
* @param target The target of the proceessing instruction
|
||||
* which should have meaning to the application.
|
||||
* @param data The instruction itself which should contain
|
||||
* valid XML characters.
|
||||
* @throws SAXException
|
||||
*/
|
||||
public void processingInstruction(String target, String data)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of a Notation Declaration.
|
||||
* Notation declarations are used by elements and entities
|
||||
* for identifying embedded non-XML data.
|
||||
*
|
||||
* @param name The notation name, referred to by entities and
|
||||
* elements.
|
||||
* @param publicId The public identifier
|
||||
* @param systemId The system identifier
|
||||
*/
|
||||
public void notationDecl(String name, String publicId, String systemId)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of an unparsed entity declaration.
|
||||
* Unparsed entities are non-XML data.
|
||||
*
|
||||
* @param name The name of the unparsed entity.
|
||||
* @param publicId The public identifier
|
||||
* @param systemId The system identifier
|
||||
* @param notationName The associated notation
|
||||
*/
|
||||
public void unparsedEntityDecl(String name, String publicId,
|
||||
String systemId, String notationName)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of a internal general entity declaration event.
|
||||
*
|
||||
* @param name The internal general entity name.
|
||||
* @param value The value of the entity, which may include unexpanded
|
||||
* entity references. Character references will have been
|
||||
* expanded.
|
||||
* @throws SAXException
|
||||
* @see #externalGeneralEntityDecl(String, String, String)
|
||||
*/
|
||||
public void internalGeneralEntityDecl(String name, String value)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of an external parsed general entity
|
||||
* declaration event.
|
||||
* <p/>
|
||||
* <p>If a system identifier is present, and it is a relative URL, the
|
||||
* parser will have resolved it fully before passing it through this
|
||||
* method to a listener.</p>
|
||||
*
|
||||
* @param name The entity name.
|
||||
* @param publicId The entity's public identifier, or null if
|
||||
* none was given.
|
||||
* @param systemId The entity's system identifier.
|
||||
* @throws SAXException
|
||||
* @see #unparsedEntityDecl(String, String, String, String)
|
||||
*/
|
||||
public void externalGeneralEntityDecl(String name, String publicId,
|
||||
String systemId)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of a internal parameter entity declaration
|
||||
* event.
|
||||
*
|
||||
* @param name The internal parameter entity name.
|
||||
* @param value The value of the entity, which may include unexpanded
|
||||
* entity references. Character references will have been
|
||||
* expanded.
|
||||
* @throws SAXException
|
||||
* @see #externalParameterEntityDecl(String, String, String)
|
||||
*/
|
||||
public void internalParameterEntityDecl(String name, String value)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of an external parameter entity declaration
|
||||
* event.
|
||||
* <p/>
|
||||
* <p>If a system identifier is present, and it is a relative URL, the
|
||||
* parser will have resolved it fully before passing it through this
|
||||
* method to a listener.</p>
|
||||
*
|
||||
* @param name The parameter entity name.
|
||||
* @param publicId The entity's public identifier, or null if
|
||||
* none was given.
|
||||
* @param systemId The entity's system identifier.
|
||||
* @throws SAXException
|
||||
* @see #unparsedEntityDecl(String, String, String, String)
|
||||
*/
|
||||
public void externalParameterEntityDecl(String name, String publicId,
|
||||
String systemId)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of the beginning of the DTD.
|
||||
*
|
||||
* @param in Current input entity.
|
||||
* @see #endDTD()
|
||||
*/
|
||||
public void startDTD(InputEntity in)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of the end of a DTD. The parser will invoke
|
||||
* this method only once.
|
||||
*
|
||||
* @throws SAXException
|
||||
* @see #startDTD(InputEntity)
|
||||
*/
|
||||
public void endDTD()
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification that a comment has been read.
|
||||
* <p/>
|
||||
* <P> Note that processing instructions are the mechanism designed
|
||||
* to hold information for consumption by applications, not comments.
|
||||
* XML systems may rely on applications being able to access information
|
||||
* found in processing instructions; this is not true of comments, which
|
||||
* are typically discarded.
|
||||
*
|
||||
* @param text the text within the comment delimiters.
|
||||
* @throws SAXException
|
||||
*/
|
||||
public void comment(String text)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification of character data.
|
||||
* <p/>
|
||||
* <p>The Parser will call this method to report each chunk of
|
||||
* character data. SAX parsers may return all contiguous character
|
||||
* data in a single chunk, or they may split it into several
|
||||
* chunks; however, all of the characters in any single event
|
||||
* must come from the same external entity, so that the Locator
|
||||
* provides useful information.</p>
|
||||
* <p/>
|
||||
* <p>The application must not attempt to read from the array
|
||||
* outside of the specified range.</p>
|
||||
* <p/>
|
||||
* <p>Note that some parsers will report whitespace using the
|
||||
* ignorableWhitespace() method rather than this one (validating
|
||||
* parsers must do so).</p>
|
||||
*
|
||||
* @param ch The characters from the DTD.
|
||||
* @param start The start position in the array.
|
||||
* @param length The number of characters to read from the array.
|
||||
* @throws SAXException
|
||||
* @see #ignorableWhitespace(char[], int, int)
|
||||
*/
|
||||
public void characters(char ch[], int start, int length)
|
||||
throws SAXException;
|
||||
|
||||
|
||||
/**
|
||||
* Receive notification of ignorable whitespace in element content.
|
||||
* <p/>
|
||||
* <p>Validating Parsers must use this method to report each chunk
|
||||
* of ignorable whitespace (see the W3C XML 1.0 recommendation,
|
||||
* section 2.10): non-validating parsers may also use this method
|
||||
* if they are capable of parsing and using content models.</p>
|
||||
* <p/>
|
||||
* <p>SAX parsers may return all contiguous whitespace in a single
|
||||
* chunk, or they may split it into several chunks; however, all of
|
||||
* the characters in any single event must come from the same
|
||||
* external entity, so that the Locator provides useful
|
||||
* information.</p>
|
||||
* <p/>
|
||||
* <p>The application must not attempt to read from the array
|
||||
* outside of the specified range.</p>
|
||||
*
|
||||
* @param ch The characters from the DTD.
|
||||
* @param start The start position in the array.
|
||||
* @param length The number of characters to read from the array.
|
||||
* @throws SAXException
|
||||
* @see #characters(char[], int, int)
|
||||
*/
|
||||
public void ignorableWhitespace(char ch[], int start, int length)
|
||||
throws SAXException;
|
||||
|
||||
/**
|
||||
* Receive notification that a CDATA section is beginning. Data in a
|
||||
* CDATA section is is reported through the appropriate event, either
|
||||
* <em>characters()</em> or <em>ignorableWhitespace</em>.
|
||||
*
|
||||
* @throws SAXException
|
||||
* @see #endCDATA()
|
||||
*/
|
||||
public void startCDATA() throws SAXException;
|
||||
|
||||
|
||||
/**
|
||||
* Receive notification that the CDATA section finished.
|
||||
*
|
||||
* @throws SAXException
|
||||
* @see #startCDATA()
|
||||
*/
|
||||
public void endCDATA() throws SAXException;
|
||||
|
||||
|
||||
public void fatalError(SAXParseException e)
|
||||
throws SAXException;
|
||||
|
||||
public void error(SAXParseException e) throws SAXException;
|
||||
|
||||
public void warning(SAXParseException err) throws SAXException;
|
||||
|
||||
public final short CONTENT_MODEL_EMPTY = 0;
|
||||
public final short CONTENT_MODEL_ANY = 1;
|
||||
public final short CONTENT_MODEL_MIXED = 2;
|
||||
public final short CONTENT_MODEL_CHILDREN = 3;
|
||||
|
||||
/**
|
||||
* receives notification that parsing of content model is beginning.
|
||||
*
|
||||
* @param elementName name of the element whose content model is going to be defined.
|
||||
* @param contentModelType {@link #CONTENT_MODEL_EMPTY}
|
||||
* this element has EMPTY content model. This notification
|
||||
* will be immediately followed by the corresponding endContentModel.
|
||||
* {@link #CONTENT_MODEL_ANY}
|
||||
* this element has ANY content model. This notification
|
||||
* will be immediately followed by the corresponding endContentModel.
|
||||
* {@link #CONTENT_MODEL_MIXED}
|
||||
* this element has mixed content model. #PCDATA will not be reported.
|
||||
* each child element will be reported by mixedElement method.
|
||||
* {@link #CONTENT_MODEL_CHILDREN}
|
||||
* this elemen has child content model. The actual content model will
|
||||
* be reported by childElement, startModelGroup, endModelGroup, and
|
||||
* connector methods. Possible call sequences are:
|
||||
* <p/>
|
||||
* START := MODEL_GROUP
|
||||
* MODEL_GROUP := startModelGroup TOKEN (connector TOKEN)* endModelGroup
|
||||
* TOKEN := childElement
|
||||
* | MODEL_GROUP
|
||||
*/
|
||||
public void startContentModel(String elementName, short contentModelType) throws SAXException;
|
||||
|
||||
/**
|
||||
* receives notification that parsing of content model is finished.
|
||||
*/
|
||||
public void endContentModel(String elementName, short contentModelType) throws SAXException;
|
||||
|
||||
public final short USE_NORMAL = 0;
|
||||
public final short USE_IMPLIED = 1;
|
||||
public final short USE_FIXED = 2;
|
||||
public final short USE_REQUIRED = 3;
|
||||
|
||||
/**
|
||||
* For each entry in an ATTLIST declaration,
|
||||
* this event will be fired.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* DTD allows the same attributes to be declared more than
|
||||
* once, and in that case the first one wins. I think
|
||||
* this method will be only fired for the first one,
|
||||
* but I need to check.
|
||||
*/
|
||||
public void attributeDecl(String elementName, String attributeName, String attributeType,
|
||||
String[] enumeration, short attributeUse, String defaultValue) throws SAXException;
|
||||
|
||||
public void childElement(String elementName, short occurence) throws SAXException;
|
||||
|
||||
/**
|
||||
* receives notification of child element of mixed content model.
|
||||
* this method is called for each child element.
|
||||
*
|
||||
* @see #startContentModel(String, short)
|
||||
*/
|
||||
public void mixedElement(String elementName) throws SAXException;
|
||||
|
||||
public void startModelGroup() throws SAXException;
|
||||
|
||||
public void endModelGroup(short occurence) throws SAXException;
|
||||
|
||||
public final short CHOICE = 0;
|
||||
public final short SEQUENCE = 1;
|
||||
|
||||
/**
|
||||
* Connectors in one model group is guaranteed to be the same.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* IOW, you'll never see an event sequence like (a|b,c)
|
||||
*
|
||||
* @return {@link #CHOICE} or {@link #SEQUENCE}.
|
||||
*/
|
||||
public void connector(short connectorType) throws SAXException;
|
||||
|
||||
public final short OCCURENCE_ZERO_OR_MORE = 0;
|
||||
public final short OCCURENCE_ONE_OR_MORE = 1;
|
||||
public final short OCCURENCE_ZERO_OR_ONE = 2;
|
||||
public final short OCCURENCE_ONCE = 3;
|
||||
}
|
||||
120
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDHandlerBase.java
Normal file
120
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDHandlerBase.java
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
/**
|
||||
* do-nothing implementation of DTDEventHandler.
|
||||
*/
|
||||
public class DTDHandlerBase implements DTDEventListener {
|
||||
|
||||
public void processingInstruction(String target, String data)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void setDocumentLocator(Locator loc) {
|
||||
}
|
||||
|
||||
public void fatalError(SAXParseException e) throws SAXException {
|
||||
throw e;
|
||||
}
|
||||
|
||||
public void error(SAXParseException e) throws SAXException {
|
||||
throw e;
|
||||
}
|
||||
|
||||
public void warning(SAXParseException err) throws SAXException {
|
||||
}
|
||||
|
||||
public void notationDecl(String name, String publicId, String systemId) throws SAXException {
|
||||
}
|
||||
|
||||
public void unparsedEntityDecl(String name, String publicId,
|
||||
String systemId, String notationName) throws SAXException {
|
||||
}
|
||||
|
||||
public void endDTD() throws SAXException {
|
||||
}
|
||||
|
||||
public void externalGeneralEntityDecl(String n, String p, String s) throws SAXException {
|
||||
}
|
||||
|
||||
public void internalGeneralEntityDecl(String n, String v) throws SAXException {
|
||||
}
|
||||
|
||||
public void externalParameterEntityDecl(String n, String p, String s) throws SAXException {
|
||||
}
|
||||
|
||||
public void internalParameterEntityDecl(String n, String v) throws SAXException {
|
||||
}
|
||||
|
||||
public void startDTD(InputEntity in) throws SAXException {
|
||||
}
|
||||
|
||||
public void comment(String n) throws SAXException {
|
||||
}
|
||||
|
||||
public void characters(char ch[], int start, int length) throws SAXException {
|
||||
}
|
||||
|
||||
public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
|
||||
}
|
||||
|
||||
public void startCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void endCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
|
||||
public void startContentModel(String elementName, short contentModelType) throws SAXException {
|
||||
}
|
||||
|
||||
public void endContentModel(String elementName, short contentModelType) throws SAXException {
|
||||
}
|
||||
|
||||
public void attributeDecl(String elementName, String attributeName, String attributeType,
|
||||
String[] enumeration, short attributeUse, String defaultValue) throws SAXException {
|
||||
}
|
||||
|
||||
public void childElement(String elementName, short occurence) throws SAXException {
|
||||
}
|
||||
|
||||
public void mixedElement(String elementName) throws SAXException {
|
||||
}
|
||||
|
||||
public void startModelGroup() throws SAXException {
|
||||
}
|
||||
|
||||
public void endModelGroup(short occurence) throws SAXException {
|
||||
}
|
||||
|
||||
public void connector(short connectorType) throws SAXException {
|
||||
}
|
||||
}
|
||||
2350
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDParser.java
Normal file
2350
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/DTDParser.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
class EndOfInputException extends IOException {
|
||||
}
|
||||
40
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/EntityDecl.java
Normal file
40
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/EntityDecl.java
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
/**
|
||||
* Base class for entity declarations as used by the parser.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @author Janet Koenig
|
||||
* @version 1.3 00/02/24
|
||||
*/
|
||||
class EntityDecl {
|
||||
String name; // <!ENTITY name ... >
|
||||
|
||||
boolean isFromInternalSubset;
|
||||
boolean isPE;
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import org.xml.sax.EntityResolver;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
final class ExternalEntity extends EntityDecl {
|
||||
String systemId; // resolved URI (not relative)
|
||||
String publicId; // "-//xyz//....//en"
|
||||
String notation;
|
||||
|
||||
public ExternalEntity(InputEntity in) {
|
||||
}
|
||||
|
||||
public InputSource getInputSource(EntityResolver r)
|
||||
throws IOException, SAXException {
|
||||
|
||||
InputSource retval;
|
||||
|
||||
retval = r.resolveEntity(publicId, systemId);
|
||||
// SAX sez if null is returned, use the URI directly
|
||||
if (retval == null)
|
||||
retval = Resolver.createInputSource(new URL(systemId), false);
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
990
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/InputEntity.java
Normal file
990
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/InputEntity.java
Normal file
@@ -0,0 +1,990 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
import java.io.CharConversionException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URL;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* This is how the parser talks to its input entities, of all kinds.
|
||||
* The entities are in a stack.
|
||||
* <p/>
|
||||
* <P> For internal entities, the character arrays are referenced here,
|
||||
* and read from as needed (they're read-only). External entities have
|
||||
* mutable buffers, that are read into as needed.
|
||||
* <p/>
|
||||
* <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
|
||||
* whether it's in an external (parsed) entity or not. The XML 1.0 spec
|
||||
* is inconsistent in explaining EOL handling; this is the sensible way.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @author Janet Koenig
|
||||
* @version 1.4 00/08/05
|
||||
*/
|
||||
public class InputEntity {
|
||||
private int start, finish;
|
||||
private char buf [];
|
||||
private int lineNumber = 1;
|
||||
private boolean returnedFirstHalf = false;
|
||||
private boolean maybeInCRLF = false;
|
||||
|
||||
// name of entity (never main document or unnamed DTD PE)
|
||||
private String name;
|
||||
|
||||
private InputEntity next;
|
||||
|
||||
// for system and public IDs in diagnostics
|
||||
private InputSource input;
|
||||
|
||||
// this is a buffer; some buffers can be replenished.
|
||||
private Reader reader;
|
||||
private boolean isClosed;
|
||||
|
||||
private DTDEventListener errHandler;
|
||||
private Locale locale;
|
||||
|
||||
private StringBuffer rememberedText;
|
||||
private int startRemember;
|
||||
|
||||
// record if this is a PE, so endParsedEntity won't be called
|
||||
private boolean isPE;
|
||||
|
||||
// InputStreamReader throws an internal per-read exception, so
|
||||
// we minimize reads. We also add a byte to compensate for the
|
||||
// "ungetc" byte we keep, so that our downstream reads are as
|
||||
// nicely sized as we can make them.
|
||||
final private static int BUFSIZ = 8 * 1024 + 1;
|
||||
|
||||
final private static char newline [] = {'\n'};
|
||||
|
||||
public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
|
||||
InputEntity retval = new InputEntity();
|
||||
retval.errHandler = h;
|
||||
retval.locale = l;
|
||||
return retval;
|
||||
}
|
||||
|
||||
private InputEntity() {
|
||||
}
|
||||
|
||||
//
|
||||
// predicate: return true iff this is an internal entity reader,
|
||||
// and so may safely be "popped" as needed. external entities have
|
||||
// syntax to uphold; internal parameter entities have at most validity
|
||||
// constraints to monitor. also, only external entities get decent
|
||||
// location diagnostics.
|
||||
//
|
||||
public boolean isInternal() {
|
||||
return reader == null;
|
||||
}
|
||||
|
||||
//
|
||||
// predicate: return true iff this is the toplevel document
|
||||
//
|
||||
public boolean isDocument() {
|
||||
return next == null;
|
||||
}
|
||||
|
||||
//
|
||||
// predicate: return true iff this is a PE expansion (so that
|
||||
// LexicalEventListner.endParsedEntity won't be called)
|
||||
//
|
||||
public boolean isParameterEntity() {
|
||||
return isPE;
|
||||
}
|
||||
|
||||
//
|
||||
// return name of current entity
|
||||
//
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
//
|
||||
// use this for an external parsed entity
|
||||
//
|
||||
public void init(InputSource in, String name, InputEntity stack,
|
||||
boolean isPE)
|
||||
throws IOException, SAXException {
|
||||
|
||||
input = in;
|
||||
this.isPE = isPE;
|
||||
reader = in.getCharacterStream();
|
||||
|
||||
if (reader == null) {
|
||||
InputStream bytes = in.getByteStream();
|
||||
|
||||
if (bytes == null)
|
||||
reader = XmlReader.createReader(new URL(in.getSystemId())
|
||||
.openStream());
|
||||
else if (in.getEncoding() != null)
|
||||
reader = XmlReader.createReader(in.getByteStream(),
|
||||
in.getEncoding());
|
||||
else
|
||||
reader = XmlReader.createReader(in.getByteStream());
|
||||
}
|
||||
next = stack;
|
||||
buf = new char[BUFSIZ];
|
||||
this.name = name;
|
||||
checkRecursion(stack);
|
||||
}
|
||||
|
||||
//
|
||||
// use this for an internal parsed entity; buffer is readonly
|
||||
//
|
||||
public void init(char b [], String name, InputEntity stack, boolean isPE)
|
||||
throws SAXException {
|
||||
|
||||
next = stack;
|
||||
buf = b;
|
||||
finish = b.length;
|
||||
this.name = name;
|
||||
this.isPE = isPE;
|
||||
checkRecursion(stack);
|
||||
}
|
||||
|
||||
private void checkRecursion(InputEntity stack)
|
||||
throws SAXException {
|
||||
|
||||
if (stack == null)
|
||||
return;
|
||||
for (stack = stack.next; stack != null; stack = stack.next) {
|
||||
if (stack.name != null && stack.name.equals(name))
|
||||
fatal("P-069", new Object[]{name});
|
||||
}
|
||||
}
|
||||
|
||||
public InputEntity pop() throws IOException {
|
||||
|
||||
// caller has ensured there's nothing left to read
|
||||
close();
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true iff there's no more data to consume ...
|
||||
*/
|
||||
public boolean isEOF() throws IOException, SAXException {
|
||||
|
||||
// called to ensure WF-ness of included entities and to pop
|
||||
// input entities appropriately ... EOF is not always legal.
|
||||
if (start >= finish) {
|
||||
fillbuf();
|
||||
return start >= finish;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the encoding in use, else null; the name
|
||||
* returned is in as standard a form as we can get.
|
||||
*/
|
||||
public String getEncoding() {
|
||||
|
||||
if (reader == null)
|
||||
return null;
|
||||
if (reader instanceof XmlReader)
|
||||
return ((XmlReader) reader).getEncoding();
|
||||
|
||||
// XXX prefer a java2std() call to normalize names...
|
||||
|
||||
if (reader instanceof InputStreamReader)
|
||||
return ((InputStreamReader) reader).getEncoding();
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns the next name char, or NUL ... faster than getc(),
|
||||
* and the common "name or nmtoken must be next" case won't
|
||||
* need ungetc().
|
||||
*/
|
||||
public char getNameChar() throws IOException, SAXException {
|
||||
|
||||
if (finish <= start)
|
||||
fillbuf();
|
||||
if (finish > start) {
|
||||
char c = buf[start++];
|
||||
if (XmlChars.isNameChar(c))
|
||||
return c;
|
||||
start--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the next Java character -- might be part of an XML
|
||||
* text character represented by a surrogate pair, or be
|
||||
* the end of the entity.
|
||||
*/
|
||||
public char getc() throws IOException, SAXException {
|
||||
|
||||
if (finish <= start)
|
||||
fillbuf();
|
||||
if (finish > start) {
|
||||
char c = buf[start++];
|
||||
|
||||
// [2] Char ::= #x0009 | #x000A | #x000D
|
||||
// | [#x0020-#xD7FF]
|
||||
// | [#xE000-#xFFFD]
|
||||
// plus surrogate _pairs_ representing [#x10000-#x10ffff]
|
||||
if (returnedFirstHalf) {
|
||||
if (c >= 0xdc00 && c <= 0xdfff) {
|
||||
returnedFirstHalf = false;
|
||||
return c;
|
||||
} else
|
||||
fatal("P-070", new Object[]{Integer.toHexString(c)});
|
||||
}
|
||||
if ((c >= 0x0020 && c <= 0xD7FF)
|
||||
|| c == 0x0009
|
||||
// no surrogates!
|
||||
|| (c >= 0xE000 && c <= 0xFFFD))
|
||||
return c;
|
||||
|
||||
//
|
||||
// CRLF and CR are both line ends; map both to LF, and
|
||||
// keep line count correct.
|
||||
//
|
||||
else if (c == '\r' && !isInternal()) {
|
||||
maybeInCRLF = true;
|
||||
c = getc();
|
||||
if (c != '\n')
|
||||
ungetc();
|
||||
maybeInCRLF = false;
|
||||
|
||||
lineNumber++;
|
||||
return '\n';
|
||||
|
||||
} else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
|
||||
if (!isInternal() && !maybeInCRLF)
|
||||
lineNumber++;
|
||||
return c;
|
||||
}
|
||||
|
||||
// surrogates...
|
||||
if (c >= 0xd800 && c < 0xdc00) {
|
||||
returnedFirstHalf = true;
|
||||
return c;
|
||||
}
|
||||
|
||||
fatal("P-071", new Object[]{Integer.toHexString(c)});
|
||||
}
|
||||
throw new EndOfInputException();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* lookahead one character
|
||||
*/
|
||||
public boolean peekc(char c) throws IOException, SAXException {
|
||||
|
||||
if (finish <= start)
|
||||
fillbuf();
|
||||
if (finish > start) {
|
||||
if (buf[start] == c) {
|
||||
start++;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* two character pushback is guaranteed
|
||||
*/
|
||||
public void ungetc() {
|
||||
|
||||
if (start == 0)
|
||||
throw new InternalError("ungetc");
|
||||
start--;
|
||||
|
||||
if (buf[start] == '\n' || buf[start] == '\r') {
|
||||
if (!isInternal())
|
||||
lineNumber--;
|
||||
} else if (returnedFirstHalf)
|
||||
returnedFirstHalf = false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* optional grammatical whitespace (discarded)
|
||||
*/
|
||||
public boolean maybeWhitespace()
|
||||
throws IOException, SAXException {
|
||||
|
||||
char c;
|
||||
boolean isSpace = false;
|
||||
boolean sawCR = false;
|
||||
|
||||
// [3] S ::= #20 | #09 | #0D | #0A
|
||||
for (; ;) {
|
||||
if (finish <= start)
|
||||
fillbuf();
|
||||
if (finish <= start)
|
||||
return isSpace;
|
||||
|
||||
c = buf[start++];
|
||||
if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
|
||||
isSpace = true;
|
||||
|
||||
//
|
||||
// CR, LF are line endings ... CLRF is one, not two!
|
||||
//
|
||||
if ((c == '\n' || c == '\r') && !isInternal()) {
|
||||
if (!(c == '\n' && sawCR)) {
|
||||
lineNumber++;
|
||||
sawCR = false;
|
||||
}
|
||||
if (c == '\r')
|
||||
sawCR = true;
|
||||
}
|
||||
} else {
|
||||
start--;
|
||||
return isSpace;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* normal content; whitespace in markup may be handled
|
||||
* specially if the parser uses the content model.
|
||||
* <p/>
|
||||
* <P> content terminates with markup delimiter characters,
|
||||
* namely ampersand (&amp;) and left angle bracket (&lt;).
|
||||
* <p/>
|
||||
* <P> the document handler's characters() method is called
|
||||
* on all the content found
|
||||
*/
|
||||
public boolean parsedContent(DTDEventListener docHandler
|
||||
/*ElementValidator validator*/)
|
||||
throws IOException, SAXException {
|
||||
|
||||
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
||||
|
||||
int first; // first char to return
|
||||
int last; // last char to return
|
||||
boolean sawContent; // sent any chars?
|
||||
char c;
|
||||
|
||||
// deliver right out of the buffer, until delimiter, EOF,
|
||||
// or error, refilling as we go
|
||||
for (first = last = start, sawContent = false; ; last++) {
|
||||
|
||||
// buffer empty?
|
||||
if (last >= finish) {
|
||||
if (last > first) {
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, first, last - first);
|
||||
sawContent = true;
|
||||
start = last;
|
||||
}
|
||||
if (isEOF()) // calls fillbuf
|
||||
return sawContent;
|
||||
first = start;
|
||||
last = first - 1; // incremented in loop
|
||||
continue;
|
||||
}
|
||||
|
||||
c = buf[last];
|
||||
|
||||
//
|
||||
// pass most chars through ASAP; this inlines the code of
|
||||
// [2] !XmlChars.isChar(c) leaving only characters needing
|
||||
// special treatment ... line ends, surrogates, and:
|
||||
// 0x0026 == '&'
|
||||
// 0x003C == '<'
|
||||
// 0x005D == ']'
|
||||
// Comparisons ordered for speed on 'typical' text
|
||||
//
|
||||
if ((c > 0x005D && c <= 0xD7FF) // a-z and more
|
||||
|| (c < 0x0026 && c >= 0x0020) // space & punct
|
||||
|| (c > 0x003C && c < 0x005D) // A-Z & punct
|
||||
|| (c > 0x0026 && c < 0x003C) // 0-9 & punct
|
||||
|| c == 0x0009
|
||||
|| (c >= 0xE000 && c <= 0xFFFD)
|
||||
)
|
||||
continue;
|
||||
|
||||
// terminate on markup delimiters
|
||||
if (c == '<' || c == '&')
|
||||
break;
|
||||
|
||||
// count lines
|
||||
if (c == '\n') {
|
||||
if (!isInternal())
|
||||
lineNumber++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// External entities get CR, CRLF --> LF mapping
|
||||
// Internal ones got it already, and we can't repeat
|
||||
// else we break char ref handling!!
|
||||
if (c == '\r') {
|
||||
if (isInternal())
|
||||
continue;
|
||||
|
||||
docHandler.characters(buf, first, last - first);
|
||||
docHandler.characters(newline, 0, 1);
|
||||
sawContent = true;
|
||||
lineNumber++;
|
||||
if (finish > (last + 1)) {
|
||||
if (buf[last + 1] == '\n')
|
||||
last++;
|
||||
} else { // CR at end of buffer
|
||||
// XXX case not yet handled: CRLF here will look like two lines
|
||||
}
|
||||
first = start = last + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// ']]>' is a WF error -- must fail if we see it
|
||||
if (c == ']') {
|
||||
switch (finish - last) {
|
||||
// for suspicious end-of-buffer cases, get more data
|
||||
// into the buffer to rule out this sequence.
|
||||
case 2:
|
||||
if (buf[last + 1] != ']')
|
||||
continue;
|
||||
// FALLTHROUGH
|
||||
|
||||
case 1:
|
||||
if (reader == null || isClosed)
|
||||
continue;
|
||||
if (last == first)
|
||||
throw new InternalError("fillbuf");
|
||||
last--;
|
||||
if (last > first) {
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, first, last - first);
|
||||
sawContent = true;
|
||||
start = last;
|
||||
}
|
||||
fillbuf();
|
||||
first = last = start;
|
||||
continue;
|
||||
|
||||
// otherwise any "]]>" would be buffered, and we can
|
||||
// see right away if that's what we have
|
||||
default:
|
||||
if (buf[last + 1] == ']' && buf[last + 2] == '>')
|
||||
fatal("P-072", null);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// correctly paired surrogates are OK
|
||||
if (c >= 0xd800 && c <= 0xdfff) {
|
||||
if ((last + 1) >= finish) {
|
||||
if (last > first) {
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, first, last - first);
|
||||
sawContent = true;
|
||||
start = last + 1;
|
||||
}
|
||||
if (isEOF()) { // calls fillbuf
|
||||
fatal("P-081",
|
||||
new Object[]{Integer.toHexString(c)});
|
||||
}
|
||||
first = start;
|
||||
last = first;
|
||||
continue;
|
||||
}
|
||||
if (checkSurrogatePair(last))
|
||||
last++;
|
||||
else {
|
||||
last--;
|
||||
// also terminate on surrogate pair oddities
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
fatal("P-071", new Object[]{Integer.toHexString(c)});
|
||||
}
|
||||
if (last == first)
|
||||
return sawContent;
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, first, last - first);
|
||||
start = last;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CDATA -- character data, terminated by "]]>" and optionally
|
||||
* including unescaped markup delimiters (ampersand and left angle
|
||||
* bracket). This should otherwise be exactly like character data,
|
||||
* modulo differences in error report details.
|
||||
* <p/>
|
||||
* <P> The document handler's characters() or ignorableWhitespace()
|
||||
* methods are invoked on all the character data found
|
||||
*
|
||||
* @param docHandler gets callbacks for character data
|
||||
* @param ignorableWhitespace if true, whitespace characters will
|
||||
* be reported using docHandler.ignorableWhitespace(); implicitly,
|
||||
* non-whitespace characters will cause validation errors
|
||||
* @param whitespaceInvalidMessage if true, ignorable whitespace
|
||||
* causes a validity error report as well as a callback
|
||||
*/
|
||||
public boolean unparsedContent(DTDEventListener docHandler,
|
||||
/*ElementValidator validator,*/
|
||||
boolean ignorableWhitespace,
|
||||
String whitespaceInvalidMessage)
|
||||
throws IOException, SAXException {
|
||||
|
||||
// [18] CDSect ::= CDStart CData CDEnd
|
||||
// [19] CDStart ::= '<![CDATA['
|
||||
// [20] CData ::= (Char* - (Char* ']]>' Char*))
|
||||
// [21] CDEnd ::= ']]>'
|
||||
|
||||
// caller peeked the leading '<' ...
|
||||
if (!peek("![CDATA[", null))
|
||||
return false;
|
||||
docHandler.startCDATA();
|
||||
|
||||
// only a literal ']]>' stops this ...
|
||||
int last;
|
||||
|
||||
for (; ;) { // until ']]>' seen
|
||||
boolean done = false;
|
||||
char c;
|
||||
|
||||
// don't report ignorable whitespace as "text" for
|
||||
// validation purposes.
|
||||
boolean white = ignorableWhitespace;
|
||||
|
||||
for (last = start; last < finish; last++) {
|
||||
c = buf[last];
|
||||
|
||||
//
|
||||
// Reject illegal characters.
|
||||
//
|
||||
if (!XmlChars.isChar(c)) {
|
||||
white = false;
|
||||
if (c >= 0xd800 && c <= 0xdfff) {
|
||||
if (checkSurrogatePair(last)) {
|
||||
last++;
|
||||
continue;
|
||||
} else {
|
||||
last--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fatal("P-071", new Object[]
|
||||
{Integer.toHexString(buf[last])});
|
||||
}
|
||||
if (c == '\n') {
|
||||
if (!isInternal())
|
||||
lineNumber++;
|
||||
continue;
|
||||
}
|
||||
if (c == '\r') {
|
||||
// As above, we can't repeat CR/CRLF --> LF mapping
|
||||
if (isInternal())
|
||||
continue;
|
||||
|
||||
if (white) {
|
||||
if (whitespaceInvalidMessage != null)
|
||||
errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
|
||||
whitespaceInvalidMessage), null));
|
||||
docHandler.ignorableWhitespace(buf, start,
|
||||
last - start);
|
||||
docHandler.ignorableWhitespace(newline, 0, 1);
|
||||
} else {
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, start, last - start);
|
||||
docHandler.characters(newline, 0, 1);
|
||||
}
|
||||
lineNumber++;
|
||||
if (finish > (last + 1)) {
|
||||
if (buf[last + 1] == '\n')
|
||||
last++;
|
||||
} else { // CR at end of buffer
|
||||
// XXX case not yet handled ... as above
|
||||
}
|
||||
start = last + 1;
|
||||
continue;
|
||||
}
|
||||
if (c != ']') {
|
||||
if (c != ' ' && c != '\t')
|
||||
white = false;
|
||||
continue;
|
||||
}
|
||||
if ((last + 2) < finish) {
|
||||
if (buf[last + 1] == ']' && buf[last + 2] == '>') {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
white = false;
|
||||
continue;
|
||||
} else {
|
||||
//last--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (white) {
|
||||
if (whitespaceInvalidMessage != null)
|
||||
errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
|
||||
whitespaceInvalidMessage), null));
|
||||
docHandler.ignorableWhitespace(buf, start, last - start);
|
||||
} else {
|
||||
// validator.text ();
|
||||
docHandler.characters(buf, start, last - start);
|
||||
}
|
||||
if (done) {
|
||||
start = last + 3;
|
||||
break;
|
||||
}
|
||||
start = last;
|
||||
if (isEOF())
|
||||
fatal("P-073", null);
|
||||
}
|
||||
docHandler.endCDATA();
|
||||
return true;
|
||||
}
|
||||
|
||||
// return false to backstep at end of buffer)
|
||||
private boolean checkSurrogatePair(int offset)
|
||||
throws SAXException {
|
||||
|
||||
if ((offset + 1) >= finish)
|
||||
return false;
|
||||
|
||||
char c1 = buf[offset++];
|
||||
char c2 = buf[offset];
|
||||
|
||||
if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
|
||||
return true;
|
||||
fatal("P-074", new Object[]{
|
||||
Integer.toHexString(c1 & 0x0ffff),
|
||||
Integer.toHexString(c2 & 0x0ffff)
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* whitespace in markup (flagged to app, discardable)
|
||||
* <p/>
|
||||
* <P> the document handler's ignorableWhitespace() method
|
||||
* is called on all the whitespace found
|
||||
*/
|
||||
public boolean ignorableWhitespace(DTDEventListener handler)
|
||||
throws IOException, SAXException {
|
||||
|
||||
char c;
|
||||
boolean isSpace = false;
|
||||
int first;
|
||||
|
||||
// [3] S ::= #20 | #09 | #0D | #0A
|
||||
for (first = start; ;) {
|
||||
if (finish <= start) {
|
||||
if (isSpace)
|
||||
handler.ignorableWhitespace(buf, first, start - first);
|
||||
fillbuf();
|
||||
first = start;
|
||||
}
|
||||
if (finish <= start)
|
||||
return isSpace;
|
||||
|
||||
c = buf[start++];
|
||||
switch (c) {
|
||||
case '\n':
|
||||
if (!isInternal())
|
||||
lineNumber++;
|
||||
// XXX handles Macintosh line endings wrong
|
||||
// fallthrough
|
||||
case 0x09:
|
||||
case 0x20:
|
||||
isSpace = true;
|
||||
continue;
|
||||
|
||||
case '\r':
|
||||
isSpace = true;
|
||||
if (!isInternal())
|
||||
lineNumber++;
|
||||
handler.ignorableWhitespace(buf, first,
|
||||
(start - 1) - first);
|
||||
handler.ignorableWhitespace(newline, 0, 1);
|
||||
if (start < finish && buf[start] == '\n')
|
||||
++start;
|
||||
first = start;
|
||||
continue;
|
||||
|
||||
default:
|
||||
ungetc();
|
||||
if (isSpace)
|
||||
handler.ignorableWhitespace(buf, first, start - first);
|
||||
return isSpace;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns false iff 'next' string isn't as provided,
|
||||
* else skips that text and returns true.
|
||||
* <p/>
|
||||
* <P> NOTE: two alternative string representations are
|
||||
* both passed in, since one is faster.
|
||||
*/
|
||||
public boolean peek(String next, char chars [])
|
||||
throws IOException, SAXException {
|
||||
|
||||
int len;
|
||||
int i;
|
||||
|
||||
if (chars != null)
|
||||
len = chars.length;
|
||||
else
|
||||
len = next.length();
|
||||
|
||||
// buffer should hold the whole thing ... give it a
|
||||
// chance for the end-of-buffer case and cope with EOF
|
||||
// by letting fillbuf compact and fill
|
||||
if (finish <= start || (finish - start) < len)
|
||||
fillbuf();
|
||||
|
||||
// can't peek past EOF
|
||||
if (finish <= start)
|
||||
return false;
|
||||
|
||||
// compare the string; consume iff it matches
|
||||
if (chars != null) {
|
||||
for (i = 0; i < len && (start + i) < finish; i++) {
|
||||
if (buf[start + i] != chars[i])
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < len && (start + i) < finish; i++) {
|
||||
if (buf[start + i] != next.charAt(i))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// if the first fillbuf didn't get enough data, give
|
||||
// fillbuf another chance to read
|
||||
if (i < len) {
|
||||
if (reader == null || isClosed)
|
||||
return false;
|
||||
|
||||
//
|
||||
// This diagnostic "knows" that the only way big strings would
|
||||
// fail to be peeked is where it's a symbol ... e.g. for an
|
||||
// </EndTag> construct. That knowledge could also be applied
|
||||
// to get rid of the symbol length constraint, since having
|
||||
// the wrong symbol is a fatal error anyway ...
|
||||
//
|
||||
if (len > buf.length)
|
||||
fatal("P-077", new Object[]{new Integer(buf.length)});
|
||||
|
||||
fillbuf();
|
||||
return peek(next, chars);
|
||||
}
|
||||
|
||||
start += len;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Support for reporting the internal DTD subset, so <!DOCTYPE...>
|
||||
// declarations can be recreated. This is collected as a single
|
||||
// string; such subsets are normally small, and many applications
|
||||
// don't even care about this.
|
||||
//
|
||||
public void startRemembering() {
|
||||
|
||||
if (startRemember != 0)
|
||||
throw new InternalError();
|
||||
startRemember = start;
|
||||
}
|
||||
|
||||
public String rememberText() {
|
||||
|
||||
String retval;
|
||||
|
||||
// If the internal subset crossed a buffer boundary, we
|
||||
// created a temporary buffer.
|
||||
if (rememberedText != null) {
|
||||
rememberedText.append(buf, startRemember,
|
||||
start - startRemember);
|
||||
retval = rememberedText.toString();
|
||||
} else
|
||||
retval = new String(buf, startRemember,
|
||||
start - startRemember);
|
||||
|
||||
startRemember = 0;
|
||||
rememberedText = null;
|
||||
return retval;
|
||||
}
|
||||
|
||||
private InputEntity getTopEntity() {
|
||||
|
||||
InputEntity current = this;
|
||||
|
||||
// don't report locations within internal entities!
|
||||
|
||||
while (current != null && current.input == null)
|
||||
current = current.next;
|
||||
return current == null ? this : current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the public ID of this input source, if known
|
||||
*/
|
||||
public String getPublicId() {
|
||||
|
||||
InputEntity where = getTopEntity();
|
||||
if (where == this)
|
||||
return input.getPublicId();
|
||||
return where.getPublicId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the system ID of this input source, if known
|
||||
*/
|
||||
public String getSystemId() {
|
||||
|
||||
InputEntity where = getTopEntity();
|
||||
if (where == this)
|
||||
return input.getSystemId();
|
||||
return where.getSystemId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number in this input source
|
||||
*/
|
||||
public int getLineNumber() {
|
||||
|
||||
InputEntity where = getTopEntity();
|
||||
if (where == this)
|
||||
return lineNumber;
|
||||
return where.getLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* returns -1; maintaining column numbers hurts performance
|
||||
*/
|
||||
public int getColumnNumber() {
|
||||
|
||||
return -1; // not maintained (speed)
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// n.b. for non-EOF end-of-buffer cases, reader should return
|
||||
// at least a handful of bytes so various lookaheads behave.
|
||||
//
|
||||
// two character pushback exists except at first; characters
|
||||
// represented by surrogate pairs can't be pushed back (they'd
|
||||
// only be in character data anyway).
|
||||
//
|
||||
// DTD exception thrown on char conversion problems; line number
|
||||
// will be low, as a rule.
|
||||
//
|
||||
private void fillbuf() throws IOException, SAXException {
|
||||
|
||||
// don't touched fixed buffers, that'll usually
|
||||
// change entity values (and isn't needed anyway)
|
||||
// likewise, ignore closed streams
|
||||
if (reader == null || isClosed)
|
||||
return;
|
||||
|
||||
// if remembering DTD text, copy!
|
||||
if (startRemember != 0) {
|
||||
if (rememberedText == null)
|
||||
rememberedText = new StringBuffer(buf.length);
|
||||
rememberedText.append(buf, startRemember,
|
||||
start - startRemember);
|
||||
}
|
||||
|
||||
boolean extra = (finish > 0) && (start > 0);
|
||||
int len;
|
||||
|
||||
if (extra) // extra pushback
|
||||
start--;
|
||||
len = finish - start;
|
||||
|
||||
System.arraycopy(buf, start, buf, 0, len);
|
||||
start = 0;
|
||||
finish = len;
|
||||
|
||||
try {
|
||||
len = buf.length - len;
|
||||
len = reader.read(buf, finish, len);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
fatal("P-075", new Object[]{e.getMessage()});
|
||||
} catch (CharConversionException e) {
|
||||
fatal("P-076", new Object[]{e.getMessage()});
|
||||
}
|
||||
if (len >= 0)
|
||||
finish += len;
|
||||
else
|
||||
close();
|
||||
if (extra) // extra pushback
|
||||
start++;
|
||||
|
||||
if (startRemember != 0)
|
||||
// assert extra == true
|
||||
startRemember = 1;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
||||
try {
|
||||
if (reader != null && !isClosed)
|
||||
reader.close();
|
||||
isClosed = true;
|
||||
} catch (IOException e) {
|
||||
/* NOTHING */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void fatal(String messageId, Object params [])
|
||||
throws SAXException {
|
||||
|
||||
SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
|
||||
|
||||
// not continuable ... e.g. WF errors
|
||||
close();
|
||||
errHandler.fatalError(x);
|
||||
throw x;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
|
||||
final class InternalEntity extends EntityDecl {
|
||||
InternalEntity(String name, char value []) {
|
||||
this.name = name;
|
||||
this.buf = value;
|
||||
}
|
||||
|
||||
char buf [];
|
||||
}
|
||||
516
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/MessageCatalog.java
Normal file
516
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/MessageCatalog.java
Normal file
@@ -0,0 +1,516 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
|
||||
/**
|
||||
* This class provides support for multi-language string lookup, as needed
|
||||
* to localize messages from applications supporting multiple languages
|
||||
* at the same time. One class of such applications is network services,
|
||||
* such as HTTP servers, which talk to clients who may not be from the
|
||||
* same locale as the server. This class supports a form of negotiation
|
||||
* for the language used in presenting a message from some package, where
|
||||
* both user (client) preferences and application (server) support are
|
||||
* accounted for when choosing locales and formatting messages.
|
||||
* <p/>
|
||||
* <P> Each package should have a singleton package-private message catalog
|
||||
* class. This ensures that the correct class loader will always be used to
|
||||
* access message resources, and minimizes use of memory: <PRE>
|
||||
* package <em>some.package</em>;
|
||||
* <p/>
|
||||
* // "foo" might be public
|
||||
* class foo {
|
||||
* ...
|
||||
* // package private
|
||||
* static final Catalog messages = new Catalog ();
|
||||
* static final class Catalog extends MessageCatalog {
|
||||
* Catalog () { super (Catalog.class); }
|
||||
* }
|
||||
* ...
|
||||
* }
|
||||
* </PRE>
|
||||
* <p/>
|
||||
* <P> Messages for a known client could be generated using code
|
||||
* something like this: <PRE>
|
||||
* String clientLanguages [];
|
||||
* Locale clientLocale;
|
||||
* String clientMessage;
|
||||
* <p/>
|
||||
* // client languages will probably be provided by client,
|
||||
* // e.g. by an HTTP/1.1 "Accept-Language" header.
|
||||
* clientLanguages = new String [] { "en-ca", "fr-ca", "ja", "zh" };
|
||||
* clientLocale = foo.messages.chooseLocale (clientLanguages);
|
||||
* clientMessage = foo.messages.getMessage (clientLocale,
|
||||
* "fileCount",
|
||||
* new Object [] { new Integer (numberOfFiles) }
|
||||
* );
|
||||
* </PRE>
|
||||
* <p/>
|
||||
* <P> At this time, this class does not include functionality permitting
|
||||
* messages to be passed around and localized after-the-fact. The consequence
|
||||
* of this is that the locale for messages must be passed down through layers
|
||||
* which have no normal reason to support such passdown, or else the system
|
||||
* default locale must be used instead of the one the client needs.
|
||||
* <p/>
|
||||
* <P> <hr> The following guidelines should be used when constructiong
|
||||
* multi-language applications: <OL>
|
||||
* <p/>
|
||||
* <LI> Always use <a href=#chooseLocale>chooseLocale</a> to select the
|
||||
* locale you pass to your <code>getMessage</code> call. This lets your
|
||||
* applications use IETF standard locale names, and avoids needless
|
||||
* use of system defaults.
|
||||
* <p/>
|
||||
* <LI> The localized messages for a given package should always go in
|
||||
* a separate <em>resources</em> sub-package. There are security
|
||||
* implications; see below.
|
||||
* <p/>
|
||||
* <LI> Make sure that a language name is included in each bundle name,
|
||||
* so that the developer's locale will not be inadvertently used. That
|
||||
* is, don't create defaults like <em>resources/Messages.properties</em>
|
||||
* or <em>resources/Messages.class</em>, since ResourceBundle will choose
|
||||
* such defaults rather than giving software a chance to choose a more
|
||||
* appropriate language for its messages. Your message bundles should
|
||||
* have names like <em>Messages_en.properties</em> (for the "en", or
|
||||
* English, language) or <em>Messages_ja.class</em> ("ja" indicates the
|
||||
* Japanese language).
|
||||
* <p/>
|
||||
* <LI> Only use property files for messages in languages which can
|
||||
* be limited to the ISO Latin/1 (8859-1) characters supported by the
|
||||
* property file format. (This is mostly Western European languages.)
|
||||
* Otherwise, subclass ResourceBundle to provide your messages; it is
|
||||
* simplest to subclass <code>java.util.ListResourceBundle</code>.
|
||||
* <p/>
|
||||
* <LI> Never use another package's message catalog or resource bundles.
|
||||
* It should not be possible for a change internal to one package (such
|
||||
* as eliminating or improving messages) to break another package.
|
||||
* <p/>
|
||||
* </OL>
|
||||
* <p/>
|
||||
* <P> The "resources" sub-package can be treated separately from the
|
||||
* package with which it is associated. That main package may be sealed
|
||||
* and possibly signed, preventing other software from adding classes to
|
||||
* the package which would be able to access methods and data which are
|
||||
* not designed to be publicly accessible. On the other hand, resources
|
||||
* such as localized messages are often provided after initial product
|
||||
* shipment, without a full release cycle for the product. Such files
|
||||
* (text and class files) need to be added to some package. Since they
|
||||
* should not be added to the main package, the "resources" subpackage is
|
||||
* used without risking the security or integrity of that main package
|
||||
* as distributed in its JAR file.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @version 1.1, 00/08/05
|
||||
* @see java.util.Locale
|
||||
* @see java.util.ListResourceBundle
|
||||
* @see java.text.MessageFormat
|
||||
*/
|
||||
// leave this as "abstract" -- each package needs its own subclass,
|
||||
// else it's not always going to be using the right class loader.
|
||||
abstract public class MessageCatalog {
|
||||
private String bundleName;
|
||||
|
||||
/**
|
||||
* Create a message catalog for use by classes in the same package
|
||||
* as the specified class. This uses <em>Messages</em> resource
|
||||
* bundles in the <em>resources</em> sub-package of class passed as
|
||||
* a parameter.
|
||||
*
|
||||
* @param packageMember Class whose package has localized messages
|
||||
*/
|
||||
protected MessageCatalog(Class packageMember) {
|
||||
this(packageMember, "Messages");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a message catalog for use by classes in the same package
|
||||
* as the specified class. This uses the specified resource
|
||||
* bundle name in the <em>resources</em> sub-package of class passed
|
||||
* as a parameter; for example, <em>resources.Messages</em>.
|
||||
*
|
||||
* @param packageMember Class whose package has localized messages
|
||||
* @param bundle Name of a group of resource bundles
|
||||
*/
|
||||
private MessageCatalog(Class packageMember, String bundle) {
|
||||
int index;
|
||||
|
||||
bundleName = packageMember.getName();
|
||||
index = bundleName.lastIndexOf('.');
|
||||
if (index == -1) // "ClassName"
|
||||
bundleName = "";
|
||||
else // "some.package.ClassName"
|
||||
bundleName = bundleName.substring(0, index) + ".";
|
||||
bundleName = bundleName + "resources." + bundle;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get a message localized to the specified locale, using the message ID
|
||||
* and package name if no message is available. The locale is normally
|
||||
* that of the client of a service, chosen with knowledge that both the
|
||||
* client and this server support that locale. There are two error
|
||||
* cases: first, when the specified locale is unsupported or null, the
|
||||
* default locale is used if possible; second, when no bundle supports
|
||||
* that locale, the message ID and package name are used.
|
||||
*
|
||||
* @param locale The locale of the message to use. If this is null,
|
||||
* the default locale will be used.
|
||||
* @param messageId The ID of the message to use.
|
||||
* @return The message, localized as described above.
|
||||
*/
|
||||
public String getMessage(Locale locale,
|
||||
String messageId) {
|
||||
ResourceBundle bundle;
|
||||
|
||||
// cope with unsupported locale...
|
||||
if (locale == null)
|
||||
locale = Locale.getDefault();
|
||||
|
||||
try {
|
||||
bundle = ResourceBundle.getBundle(bundleName, locale);
|
||||
} catch (MissingResourceException e) {
|
||||
bundle = ResourceBundle.getBundle(bundleName, Locale.ENGLISH);
|
||||
}
|
||||
return bundle.getString(messageId);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Format a message localized to the specified locale, using the message
|
||||
* ID with its package name if none is available. The locale is normally
|
||||
* the client of a service, chosen with knowledge that both the client
|
||||
* server support that locale. There are two error cases: first, if the
|
||||
* specified locale is unsupported or null, the default locale is used if
|
||||
* possible; second, when no bundle supports that locale, the message ID
|
||||
* and package name are used.
|
||||
*
|
||||
* @param locale The locale of the message to use. If this is null,
|
||||
* the default locale will be used.
|
||||
* @param messageId The ID of the message format to use.
|
||||
* @param parameters Used when formatting the message. Objects in
|
||||
* this list are turned to strings if they are not Strings, Numbers,
|
||||
* or Dates (that is, if MessageFormat would treat them as errors).
|
||||
* @return The message, localized as described above.
|
||||
* @see java.text.MessageFormat
|
||||
*/
|
||||
public String getMessage(Locale locale,
|
||||
String messageId,
|
||||
Object parameters []) {
|
||||
if (parameters == null)
|
||||
return getMessage(locale, messageId);
|
||||
|
||||
// since most messages won't be tested (sigh), be friendly to
|
||||
// the inevitable developer errors of passing random data types
|
||||
// to the message formatting code.
|
||||
for (int i = 0; i < parameters.length; i++) {
|
||||
if (!(parameters[i] instanceof String)
|
||||
&& !(parameters[i] instanceof Number)
|
||||
&& !(parameters[i] instanceof java.util.Date)) {
|
||||
if (parameters[i] == null)
|
||||
parameters[i] = "(null)";
|
||||
else
|
||||
parameters[i] = parameters[i].toString();
|
||||
}
|
||||
}
|
||||
|
||||
// similarly, cope with unsupported locale...
|
||||
if (locale == null)
|
||||
locale = Locale.getDefault();
|
||||
|
||||
// get the appropriately localized MessageFormat object
|
||||
ResourceBundle bundle;
|
||||
MessageFormat format;
|
||||
|
||||
try {
|
||||
bundle = ResourceBundle.getBundle(bundleName, locale);
|
||||
} catch (MissingResourceException e) {
|
||||
bundle = ResourceBundle.getBundle(bundleName, Locale.ENGLISH);
|
||||
/*String retval;
|
||||
|
||||
retval = packagePrefix (messageId);
|
||||
for (int i = 0; i < parameters.length; i++) {
|
||||
retval += ' ';
|
||||
retval += parameters [i];
|
||||
}
|
||||
return retval;*/
|
||||
}
|
||||
format = new MessageFormat(bundle.getString(messageId));
|
||||
format.setLocale(locale);
|
||||
|
||||
// return the formatted message
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
||||
result = format.format(parameters, result, new FieldPosition(0));
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Chooses a client locale to use, using the first language specified in
|
||||
* the list that is supported by this catalog. If none of the specified
|
||||
* languages is supported, a null value is returned. Such a list of
|
||||
* languages might be provided in an HTTP/1.1 "Accept-Language" header
|
||||
* field, or through some other content negotiation mechanism.
|
||||
* <p/>
|
||||
* <P> The language specifiers recognized are RFC 1766 style ("fr" for
|
||||
* all French, "fr-ca" for Canadian French), although only the strict
|
||||
* ISO subset (two letter language and country specifiers) is currently
|
||||
* supported. Java-style locale strings ("fr_CA") are also supported.
|
||||
*
|
||||
* @param languages Array of language specifiers, ordered with the most
|
||||
* preferable one at the front. For example, "en-ca" then "fr-ca",
|
||||
* followed by "zh_CN".
|
||||
* @return The most preferable supported locale, or null.
|
||||
* @see java.util.Locale
|
||||
*/
|
||||
public Locale chooseLocale(String languages []) {
|
||||
if ((languages = canonicalize(languages)) != null) {
|
||||
for (int i = 0; i < languages.length; i++)
|
||||
if (isLocaleSupported(languages[i]))
|
||||
return getLocale(languages[i]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Canonicalizes the RFC 1766 style language strings ("en-in") to
|
||||
// match standard Java usage ("en_IN"), removing strings that don't
|
||||
// use two character ISO language and country codes. Avoids all
|
||||
// memory allocations possible, so that if the strings passed in are
|
||||
// just lowercase ISO codes (a common case) the input is returned.
|
||||
//
|
||||
private String[] canonicalize(String languages []) {
|
||||
boolean didClone = false;
|
||||
int trimCount = 0;
|
||||
|
||||
if (languages == null)
|
||||
return languages;
|
||||
|
||||
for (int i = 0; i < languages.length; i++) {
|
||||
String lang = languages[i];
|
||||
int len = lang.length();
|
||||
|
||||
// no RFC1766 extensions allowed; "zh" and "zh-tw" (etc) are OK
|
||||
// as are regular locale names with no variant ("de_CH").
|
||||
if (!(len == 2 || len == 5)) {
|
||||
if (!didClone) {
|
||||
languages = (String[]) languages.clone();
|
||||
didClone = true;
|
||||
}
|
||||
languages[i] = null;
|
||||
trimCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// language code ... if already lowercase, we change nothing
|
||||
if (len == 2) {
|
||||
lang = lang.toLowerCase();
|
||||
if (lang != languages[i]) {
|
||||
if (!didClone) {
|
||||
languages = (String[]) languages.clone();
|
||||
didClone = true;
|
||||
}
|
||||
languages[i] = lang;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// language_country ... fixup case, force "_"
|
||||
char buf [] = new char[5];
|
||||
|
||||
buf[0] = Character.toLowerCase(lang.charAt(0));
|
||||
buf[1] = Character.toLowerCase(lang.charAt(1));
|
||||
buf[2] = '_';
|
||||
buf[3] = Character.toUpperCase(lang.charAt(3));
|
||||
buf[4] = Character.toUpperCase(lang.charAt(4));
|
||||
if (!didClone) {
|
||||
languages = (String[]) languages.clone();
|
||||
didClone = true;
|
||||
}
|
||||
languages[i] = new String(buf);
|
||||
}
|
||||
|
||||
// purge any shadows of deleted RFC1766 extended language codes
|
||||
if (trimCount != 0) {
|
||||
String temp [] = new String[languages.length - trimCount];
|
||||
int i;
|
||||
|
||||
for (i = 0, trimCount = 0; i < temp.length; i++) {
|
||||
while (languages[i + trimCount] == null)
|
||||
trimCount++;
|
||||
temp[i] = languages[i + trimCount];
|
||||
}
|
||||
languages = temp;
|
||||
}
|
||||
return languages;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Returns a locale object supporting the specified locale, using
|
||||
// a small cache to speed up some common languages and reduce the
|
||||
// needless allocation of memory.
|
||||
//
|
||||
private Locale getLocale(String localeName) {
|
||||
String language, country;
|
||||
int index;
|
||||
|
||||
index = localeName.indexOf('_');
|
||||
if (index == -1) {
|
||||
//
|
||||
// Special case the builtin JDK languages
|
||||
//
|
||||
if (localeName.equals("de"))
|
||||
return Locale.GERMAN;
|
||||
if (localeName.equals("en"))
|
||||
return Locale.ENGLISH;
|
||||
if (localeName.equals("fr"))
|
||||
return Locale.FRENCH;
|
||||
if (localeName.equals("it"))
|
||||
return Locale.ITALIAN;
|
||||
if (localeName.equals("ja"))
|
||||
return Locale.JAPANESE;
|
||||
if (localeName.equals("ko"))
|
||||
return Locale.KOREAN;
|
||||
if (localeName.equals("zh"))
|
||||
return Locale.CHINESE;
|
||||
|
||||
language = localeName;
|
||||
country = "";
|
||||
} else {
|
||||
if (localeName.equals("zh_CN"))
|
||||
return Locale.SIMPLIFIED_CHINESE;
|
||||
if (localeName.equals("zh_TW"))
|
||||
return Locale.TRADITIONAL_CHINESE;
|
||||
|
||||
//
|
||||
// JDK also has constants for countries: en_GB, en_US, en_CA,
|
||||
// fr_FR, fr_CA, de_DE, ja_JP, ko_KR. We don't use those.
|
||||
//
|
||||
language = localeName.substring(0, index);
|
||||
country = localeName.substring(index + 1);
|
||||
}
|
||||
|
||||
return new Locale(language, country);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// cache for isLanguageSupported(), below ... key is a language
|
||||
// or locale name, value is a Boolean
|
||||
//
|
||||
private Hashtable cache = new Hashtable(5);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true iff the specified locale has explicit language support.
|
||||
* For example, the traditional Chinese locale "zh_TW" has such support
|
||||
* if there are message bundles suffixed with either "zh_TW" or "zh".
|
||||
* <p/>
|
||||
* <P> This method is used to bypass part of the search path mechanism
|
||||
* of the <code>ResourceBundle</code> class, specifically the parts which
|
||||
* force use of default locales and bundles. Such bypassing is required
|
||||
* in order to enable use of a client's preferred languages. Following
|
||||
* the above example, if a client prefers "zh_TW" but can also accept
|
||||
* "ja", this method would be used to detect that there are no "zh_TW"
|
||||
* resource bundles and hence that "ja" messages should be used. This
|
||||
* bypasses the ResourceBundle mechanism which will return messages in
|
||||
* some other locale (picking some hard-to-anticipate default) instead
|
||||
* of reporting an error and letting the client choose another locale.
|
||||
*
|
||||
* @param localeName A standard Java locale name, using two character
|
||||
* language codes optionally suffixed by country codes.
|
||||
* @return True iff the language of that locale is supported.
|
||||
* @see java.util.Locale
|
||||
*/
|
||||
public boolean isLocaleSupported(String localeName) {
|
||||
//
|
||||
// Use previous results if possible. We expect that the codebase
|
||||
// is immutable, so we never worry about changing the cache.
|
||||
//
|
||||
Boolean value = (Boolean) cache.get(localeName);
|
||||
|
||||
if (value != null)
|
||||
return value.booleanValue();
|
||||
|
||||
//
|
||||
// Try "language_country_variant", then "language_country",
|
||||
// then finally "language" ... assuming the longest locale name
|
||||
// is passed. If not, we'll try fewer options.
|
||||
//
|
||||
ClassLoader loader = null;
|
||||
|
||||
for (; ;) {
|
||||
String name = bundleName + "_" + localeName;
|
||||
|
||||
// look up classes ...
|
||||
try {
|
||||
Class.forName(name);
|
||||
cache.put(localeName, Boolean.TRUE);
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
// ... then property files (only for ISO Latin/1 messages)
|
||||
InputStream in;
|
||||
|
||||
if (loader == null)
|
||||
loader = getClass().getClassLoader();
|
||||
|
||||
name = name.replace('.', '/');
|
||||
name = name + ".properties";
|
||||
if (loader == null)
|
||||
in = ClassLoader.getSystemResourceAsStream(name);
|
||||
else
|
||||
in = loader.getResourceAsStream(name);
|
||||
if (in != null) {
|
||||
cache.put(localeName, Boolean.TRUE);
|
||||
return true;
|
||||
}
|
||||
|
||||
int index = localeName.indexOf('_');
|
||||
|
||||
if (index > 0)
|
||||
localeName = localeName.substring(0, index);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// If we got this far, we failed. Remember for later.
|
||||
//
|
||||
cache.put(localeName, Boolean.FALSE);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
448
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/Resolver.java
Normal file
448
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/Resolver.java
Normal file
@@ -0,0 +1,448 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import org.xml.sax.EntityResolver;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Hashtable;
|
||||
|
||||
/**
|
||||
* This entity resolver class provides a number of utilities which can help
|
||||
* managment of external parsed entities in XML. These are commonly used
|
||||
* to hold markup declarations that are to be used as part of a Document
|
||||
* Type Declaration (DTD), or to hold text marked up with XML.
|
||||
* <p/>
|
||||
* <P> Features include: <UL>
|
||||
* <p/>
|
||||
* <LI> Static factory methods are provided for constructing SAX InputSource
|
||||
* objects from Files, URLs, or MIME objects. This eliminates a class of
|
||||
* error-prone coding in applications.
|
||||
* <p/>
|
||||
* <LI> Character encodings for XML documents are correctly supported: <UL>
|
||||
* <p/>
|
||||
* <LI> The encodings defined in the RFCs for MIME content types
|
||||
* (2046 for general MIME, and 2376 for XML in particular), are
|
||||
* supported, handling <em>charset=...</em> attributes and accepting
|
||||
* content types which are known to be safe for use with XML;
|
||||
* <p/>
|
||||
* <LI> The character encoding autodetection algorithm identified
|
||||
* in the XML specification is used, and leverages all of
|
||||
* the JDK 1.1 (and later) character encoding support.
|
||||
* <p/>
|
||||
* <LI> The use of MIME typing may optionally be disabled, forcing the
|
||||
* use of autodetection, to support web servers which don't correctly
|
||||
* report MIME types for XML. For example, they may report text that
|
||||
* is encoded in EUC-JP as being US-ASCII text, leading to fatal
|
||||
* errors during parsing.
|
||||
* <p/>
|
||||
* <LI> The InputSource objects returned by this class always
|
||||
* have a <code>java.io.Reader</code> available as the "character
|
||||
* stream" property.
|
||||
* <p/>
|
||||
* </UL>
|
||||
* <p/>
|
||||
* <LI> Catalog entries can map public identifiers to Java resources or
|
||||
* to local URLs. These are used to reduce network dependencies and loads,
|
||||
* and will often be used for external DTD components. For example, packages
|
||||
* shipping DTD files as resources in JAR files can eliminate network traffic
|
||||
* when accessing them, and sites may provide local caches of common DTDs.
|
||||
* Note that no particular catalog syntax is supported by this class, only
|
||||
* the notion of a set of entries.
|
||||
* <p/>
|
||||
* </UL>
|
||||
* <p/>
|
||||
* <P> Subclasses can perform tasks such as supporting new URI schemes for
|
||||
* URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
|
||||
* MIME entities which are part of a <em>multipart/related</em> group
|
||||
* (see RFC 2387). They may also be used to support particular catalog
|
||||
* syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
|
||||
* SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
|
||||
* Public Identifiers (FPIs).
|
||||
*
|
||||
* @author David Brownell
|
||||
* @author Janet Koenig
|
||||
* @version 1.3 00/02/24
|
||||
*/
|
||||
public class Resolver implements EntityResolver {
|
||||
private boolean ignoringMIME;
|
||||
|
||||
// table mapping public IDs to (local) URIs
|
||||
private Hashtable id2uri;
|
||||
|
||||
// tables mapping public IDs to resources and classloaders
|
||||
private Hashtable id2resource;
|
||||
private Hashtable id2loader;
|
||||
|
||||
//
|
||||
// table of MIME content types (less attributes!) known
|
||||
// to be mostly "OK" to use with XML MIME entities. the
|
||||
// idea is to rule out obvious braindamage ("image/jpg")
|
||||
// not the subtle stuff ("text/html") that might actually
|
||||
// be (or become) safe.
|
||||
//
|
||||
private static final String types [] = {
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"text/plain",
|
||||
"text/html", // commonly mis-inferred
|
||||
"application/x-netcdf", // this is often illegal XML
|
||||
"content/unknown"
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructs a resolver.
|
||||
*/
|
||||
public Resolver() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an input source, using the MIME type information and URL
|
||||
* scheme to statically determine the correct character encoding if
|
||||
* possible and otherwise autodetecting it. MIME carefully specifies
|
||||
* the character encoding defaults, and how attributes of the content
|
||||
* type can change it. XML further specifies two mandatory encodings
|
||||
* (UTF-8 and UTF-16), and includes an XML declaration which can be
|
||||
* used to internally label most documents encoded using US-ASCII
|
||||
* supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
|
||||
* more).
|
||||
* <p/>
|
||||
* <P> This method can be used to access XML documents which do not
|
||||
* have URIs (such as servlet input streams, or most JavaMail message
|
||||
* entities) and to support access methods such as HTTP POST or PUT.
|
||||
* (URLs normally return content using the GET method.)
|
||||
* <p/>
|
||||
* <P> <em> The caller should set the system ID in order for relative URIs
|
||||
* found in this document to be interpreted correctly.</em> In some cases,
|
||||
* a custom resolver will need to be used; for example, documents
|
||||
* may be grouped in a single MIME "multipart/related" bundle, and
|
||||
* relative URLs would refer to other documents in that bundle.
|
||||
*
|
||||
* @param contentType The MIME content type for the source for which
|
||||
* an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
|
||||
* @param stream The input byte stream for the input source.
|
||||
* @param checkType If true, this verifies that the content type is known
|
||||
* to support XML documents, such as <em>application/xml</em>.
|
||||
* @param scheme Unless this is "file", unspecified MIME types
|
||||
* default to US-ASCII. Files are always autodetected since most
|
||||
* file systems discard character encoding information.
|
||||
*/
|
||||
public static InputSource createInputSource(String contentType,
|
||||
InputStream stream,
|
||||
boolean checkType,
|
||||
String scheme) throws IOException {
|
||||
InputSource retval;
|
||||
String charset = null;
|
||||
|
||||
if (contentType != null) {
|
||||
int index;
|
||||
|
||||
contentType = contentType.toLowerCase();
|
||||
index = contentType.indexOf(';');
|
||||
if (index != -1) {
|
||||
String attributes;
|
||||
|
||||
attributes = contentType.substring(index + 1);
|
||||
contentType = contentType.substring(0, index);
|
||||
|
||||
// use "charset=..." if it's available
|
||||
index = attributes.indexOf("charset");
|
||||
if (index != -1) {
|
||||
attributes = attributes.substring(index + 7);
|
||||
// strip out subsequent attributes
|
||||
if ((index = attributes.indexOf(';')) != -1)
|
||||
attributes = attributes.substring(0, index);
|
||||
// find start of value
|
||||
if ((index = attributes.indexOf('=')) != -1) {
|
||||
attributes = attributes.substring(index + 1);
|
||||
// strip out rfc822 comments
|
||||
if ((index = attributes.indexOf('(')) != -1)
|
||||
attributes = attributes.substring(0, index);
|
||||
// double quotes are optional
|
||||
if ((index = attributes.indexOf('"')) != -1) {
|
||||
attributes = attributes.substring(index + 1);
|
||||
attributes = attributes.substring(0,
|
||||
attributes.indexOf('"'));
|
||||
}
|
||||
charset = attributes.trim();
|
||||
// XXX "\;", "\)" etc were mishandled above
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Check MIME type.
|
||||
//
|
||||
if (checkType) {
|
||||
boolean isOK = false;
|
||||
for (int i = 0; i < types.length; i++)
|
||||
if (types[i].equals(contentType)) {
|
||||
isOK = true;
|
||||
break;
|
||||
}
|
||||
if (!isOK)
|
||||
throw new IOException("Not XML: " + contentType);
|
||||
}
|
||||
|
||||
//
|
||||
// "text/*" MIME types have hard-wired character set
|
||||
// defaults, as specified in the RFCs. For XML, we
|
||||
// ignore the system "file.encoding" property since
|
||||
// autodetection is more correct.
|
||||
//
|
||||
if (charset == null) {
|
||||
contentType = contentType.trim();
|
||||
if (contentType.startsWith("text/")) {
|
||||
if (!"file".equalsIgnoreCase(scheme))
|
||||
charset = "US-ASCII";
|
||||
}
|
||||
// "application/*" has no default
|
||||
}
|
||||
}
|
||||
|
||||
retval = new InputSource(XmlReader.createReader(stream, charset));
|
||||
retval.setByteStream(stream);
|
||||
retval.setEncoding(charset);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates an input source from a given URI.
|
||||
*
|
||||
* @param uri the URI (system ID) for the entity
|
||||
* @param checkType if true, the MIME content type for the entity
|
||||
* is checked for document type and character set encoding.
|
||||
*/
|
||||
static public InputSource createInputSource(URL uri, boolean checkType)
|
||||
throws IOException {
|
||||
|
||||
URLConnection conn = uri.openConnection();
|
||||
InputSource retval;
|
||||
|
||||
if (checkType) {
|
||||
String contentType = conn.getContentType();
|
||||
retval = createInputSource(contentType, conn.getInputStream(),
|
||||
false, uri.getProtocol());
|
||||
} else {
|
||||
retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
|
||||
}
|
||||
retval.setSystemId(conn.getURL().toString());
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates an input source from a given file, autodetecting
|
||||
* the character encoding.
|
||||
*/
|
||||
static public InputSource createInputSource(File file)
|
||||
throws IOException {
|
||||
InputSource retval;
|
||||
String path;
|
||||
|
||||
retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
|
||||
|
||||
// On JDK 1.2 and later, simplify this:
|
||||
// "path = file.toURL ().toString ()".
|
||||
path = file.getAbsolutePath();
|
||||
if (File.separatorChar != '/')
|
||||
path = path.replace(File.separatorChar, '/');
|
||||
if (!path.startsWith("/"))
|
||||
path = "/" + path;
|
||||
if (!path.endsWith("/") && file.isDirectory())
|
||||
path = path + "/";
|
||||
|
||||
retval.setSystemId("file:" + path);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <b>SAX:</b>
|
||||
* Resolve the given entity into an input source. If the name can't
|
||||
* be mapped to a preferred form of the entity, the URI is used. To
|
||||
* resolve the entity, first a local catalog mapping names to URIs is
|
||||
* consulted. If no mapping is found there, a catalog mapping names
|
||||
* to java resources is consulted. Finally, if neither mapping found
|
||||
* a copy of the entity, the specified URI is used.
|
||||
* <p/>
|
||||
* <P> When a URI is used, <a href="#createInputSource">
|
||||
* createInputSource</a> is used to correctly deduce the character
|
||||
* encoding used by this entity. No MIME type checking is done.
|
||||
*
|
||||
* @param name Used to find alternate copies of the entity, when
|
||||
* this value is non-null; this is the XML "public ID".
|
||||
* @param uri Used when no alternate copy of the entity is found;
|
||||
* this is the XML "system ID", normally a URI.
|
||||
*/
|
||||
public InputSource resolveEntity(String name, String uri)
|
||||
throws IOException {
|
||||
InputSource retval;
|
||||
String mappedURI = name2uri(name);
|
||||
InputStream stream;
|
||||
|
||||
// prefer explicit URI mappings, then bundled resources...
|
||||
if (mappedURI == null && (stream = mapResource(name)) != null) {
|
||||
uri = "java:resource:" + (String) id2resource.get(name);
|
||||
retval = new InputSource(XmlReader.createReader(stream));
|
||||
|
||||
// ...and treat all URIs the same (as URLs for now).
|
||||
} else {
|
||||
URL url;
|
||||
URLConnection conn;
|
||||
|
||||
if (mappedURI != null)
|
||||
uri = mappedURI;
|
||||
else if (uri == null)
|
||||
return null;
|
||||
|
||||
url = new URL(uri);
|
||||
conn = url.openConnection();
|
||||
uri = conn.getURL().toString();
|
||||
// System.out.println ("++ URI: " + url);
|
||||
if (ignoringMIME)
|
||||
retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
|
||||
else {
|
||||
String contentType = conn.getContentType();
|
||||
retval = createInputSource(contentType,
|
||||
conn.getInputStream(),
|
||||
false, url.getProtocol());
|
||||
}
|
||||
}
|
||||
retval.setSystemId(uri);
|
||||
retval.setPublicId(name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if this resolver is ignoring MIME types in the documents
|
||||
* it returns, to work around bugs in how servers have reported the
|
||||
* documents' MIME types.
|
||||
*/
|
||||
public boolean isIgnoringMIME() {
|
||||
return ignoringMIME;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells the resolver whether to ignore MIME types in the documents it
|
||||
* retrieves. Many web servers incorrectly assign text documents a
|
||||
* default character encoding, even when that is incorrect. For example,
|
||||
* all HTTP text documents default to use ISO-8859-1 (used for Western
|
||||
* European languages), and other MIME sources default text documents
|
||||
* to use US-ASCII (a seven bit encoding). For XML documents which
|
||||
* include text encoding declarations (as most should do), these server
|
||||
* bugs can be worked around by ignoring the MIME type entirely.
|
||||
*/
|
||||
public void setIgnoringMIME(boolean value) {
|
||||
ignoringMIME = value;
|
||||
}
|
||||
|
||||
|
||||
// maps the public ID to an alternate URI, if one is registered
|
||||
private String name2uri(String publicId) {
|
||||
if (publicId == null || id2uri == null)
|
||||
return null;
|
||||
return (String) id2uri.get(publicId);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Registers the given public ID as corresponding to a particular
|
||||
* URI, typically a local copy. This URI will be used in preference
|
||||
* to ones provided as system IDs in XML entity declarations. This
|
||||
* mechanism would most typically be used for Document Type Definitions
|
||||
* (DTDs), where the public IDs are formally managed and versioned.
|
||||
*
|
||||
* @param publicId The managed public ID being mapped
|
||||
* @param uri The URI of the preferred copy of that entity
|
||||
*/
|
||||
public void registerCatalogEntry(String publicId,
|
||||
String uri) {
|
||||
if (id2uri == null)
|
||||
id2uri = new Hashtable(17);
|
||||
id2uri.put(publicId, uri);
|
||||
}
|
||||
|
||||
|
||||
// return the resource as a stream
|
||||
private InputStream mapResource(String publicId) {
|
||||
// System.out.println ("++ PUBLIC: " + publicId);
|
||||
if (publicId == null || id2resource == null)
|
||||
return null;
|
||||
|
||||
String resourceName = (String) id2resource.get(publicId);
|
||||
ClassLoader loader = null;
|
||||
|
||||
if (resourceName == null)
|
||||
return null;
|
||||
// System.out.println ("++ Resource: " + resourceName);
|
||||
|
||||
if (id2loader != null)
|
||||
loader = (ClassLoader) id2loader.get(publicId);
|
||||
// System.out.println ("++ Loader: " + loader);
|
||||
if (loader == null)
|
||||
return ClassLoader.getSystemResourceAsStream(resourceName);
|
||||
return loader.getResourceAsStream(resourceName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a given public ID as corresponding to a particular Java
|
||||
* resource in a given class loader, typically distributed with a
|
||||
* software package. This resource will be preferred over system IDs
|
||||
* included in XML documents. This mechanism should most typically be
|
||||
* used for Document Type Definitions (DTDs), where the public IDs are
|
||||
* formally managed and versioned.
|
||||
* <p/>
|
||||
* <P> If a mapping to a URI has been provided, that mapping takes
|
||||
* precedence over this one.
|
||||
*
|
||||
* @param publicId The managed public ID being mapped
|
||||
* @param resourceName The name of the Java resource
|
||||
* @param loader The class loader holding the resource, or null if
|
||||
* it is a system resource.
|
||||
*/
|
||||
public void registerCatalogEntry(String publicId,
|
||||
String resourceName,
|
||||
ClassLoader loader) {
|
||||
if (id2resource == null)
|
||||
id2resource = new Hashtable(17);
|
||||
id2resource.put(publicId, resourceName);
|
||||
|
||||
if (loader != null) {
|
||||
if (id2loader == null)
|
||||
id2loader = new Hashtable(17);
|
||||
id2loader.put(publicId, loader);
|
||||
}
|
||||
}
|
||||
}
|
||||
285
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/SimpleHashtable.java
Normal file
285
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/SimpleHashtable.java
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import java.util.Enumeration;
|
||||
|
||||
|
||||
// This could be replaced by Collections class unless we want
|
||||
// to be able to run on JDK 1.1
|
||||
|
||||
|
||||
/**
|
||||
* This class implements a special purpose hashtable. It works like a
|
||||
* normal <code>java.util.Hashtable</code> except that: <OL>
|
||||
* <p/>
|
||||
* <LI> Keys to "get" are strings which are known to be interned,
|
||||
* so that "==" is used instead of "String.equals". (Interning
|
||||
* could be document-relative instead of global.)
|
||||
* <p/>
|
||||
* <LI> It's not synchronized, since it's to be used only by
|
||||
* one thread at a time.
|
||||
* <p/>
|
||||
* <LI> The keys () enumerator allocates no memory, with live
|
||||
* updates to the data disallowed.
|
||||
* <p/>
|
||||
* <LI> It's got fewer bells and whistles: fixed threshold and
|
||||
* load factor, no JDK 1.2 collection support, only keys can be
|
||||
* enumerated, things can't be removed, simpler inheritance; more.
|
||||
* <p/>
|
||||
* </OL>
|
||||
* <p/>
|
||||
* <P> The overall result is that it's less expensive to use these in
|
||||
* performance-critical locations, in terms both of CPU and memory,
|
||||
* than <code>java.util.Hashtable</code> instances. In this package
|
||||
* it makes a significant difference when normalizing attributes,
|
||||
* which is done for each start-element construct.
|
||||
*
|
||||
* @version $Revision: 1.2 $
|
||||
*/
|
||||
final class SimpleHashtable implements Enumeration {
|
||||
// entries ...
|
||||
private Entry table[];
|
||||
|
||||
// currently enumerated key
|
||||
private Entry current = null;
|
||||
private int currentBucket = 0;
|
||||
|
||||
private int count;
|
||||
private int threshold;
|
||||
|
||||
private static final float loadFactor = 0.75f;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new, empty hashtable with the specified initial
|
||||
* capacity.
|
||||
*
|
||||
* @param initialCapacity the initial capacity of the hashtable.
|
||||
*/
|
||||
public SimpleHashtable(int initialCapacity) {
|
||||
if (initialCapacity < 0)
|
||||
throw new IllegalArgumentException("Illegal Capacity: " +
|
||||
initialCapacity);
|
||||
if (initialCapacity == 0)
|
||||
initialCapacity = 1;
|
||||
table = new Entry[initialCapacity];
|
||||
threshold = (int) (initialCapacity * loadFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new, empty hashtable with a default capacity.
|
||||
*/
|
||||
public SimpleHashtable() {
|
||||
this(11);
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
public void clear() {
|
||||
count = 0;
|
||||
currentBucket = 0;
|
||||
current = null;
|
||||
for (int i = 0; i < table.length; i++)
|
||||
table[i] = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of keys in this hashtable.
|
||||
*
|
||||
* @return the number of keys in this hashtable.
|
||||
*/
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an enumeration of the keys in this hashtable.
|
||||
*
|
||||
* @return an enumeration of the keys in this hashtable.
|
||||
* @see Enumeration
|
||||
*/
|
||||
public Enumeration keys() {
|
||||
currentBucket = 0;
|
||||
current = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to view this as an enumeration; returns true if there
|
||||
* are more keys to be enumerated.
|
||||
*/
|
||||
public boolean hasMoreElements() {
|
||||
if (current != null)
|
||||
return true;
|
||||
while (currentBucket < table.length) {
|
||||
current = table[currentBucket++];
|
||||
if (current != null)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to view this as an enumeration; returns the next key
|
||||
* in the enumeration.
|
||||
*/
|
||||
public Object nextElement() {
|
||||
Object retval;
|
||||
|
||||
if (current == null)
|
||||
throw new IllegalStateException();
|
||||
retval = current.key;
|
||||
current = current.next;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the value to which the specified key is mapped in this hashtable.
|
||||
*/
|
||||
public Object get(String key) {
|
||||
Entry tab[] = table;
|
||||
int hash = key.hashCode();
|
||||
int index = (hash & 0x7FFFFFFF) % tab.length;
|
||||
for (Entry e = tab[index]; e != null; e = e.next) {
|
||||
if ((e.hash == hash) && (e.key == key))
|
||||
return e.value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value to which the specified key is mapped in this
|
||||
* hashtable ... the key isn't necessarily interned, though.
|
||||
*/
|
||||
public Object getNonInterned(String key) {
|
||||
Entry tab[] = table;
|
||||
int hash = key.hashCode();
|
||||
int index = (hash & 0x7FFFFFFF) % tab.length;
|
||||
for (Entry e = tab[index]; e != null; e = e.next) {
|
||||
if ((e.hash == hash) && e.key.equals(key))
|
||||
return e.value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increases the capacity of and internally reorganizes this
|
||||
* hashtable, in order to accommodate and access its entries more
|
||||
* efficiently. This method is called automatically when the
|
||||
* number of keys in the hashtable exceeds this hashtable's capacity
|
||||
* and load factor.
|
||||
*/
|
||||
private void rehash() {
|
||||
int oldCapacity = table.length;
|
||||
Entry oldMap[] = table;
|
||||
|
||||
int newCapacity = oldCapacity * 2 + 1;
|
||||
Entry newMap[] = new Entry[newCapacity];
|
||||
|
||||
threshold = (int) (newCapacity * loadFactor);
|
||||
table = newMap;
|
||||
|
||||
/*
|
||||
System.out.println("rehash old=" + oldCapacity
|
||||
+ ", new=" + newCapacity
|
||||
+ ", thresh=" + threshold
|
||||
+ ", count=" + count);
|
||||
*/
|
||||
|
||||
for (int i = oldCapacity; i-- > 0;) {
|
||||
for (Entry old = oldMap[i]; old != null;) {
|
||||
Entry e = old;
|
||||
old = old.next;
|
||||
|
||||
int index = (e.hash & 0x7FFFFFFF) % newCapacity;
|
||||
e.next = newMap[index];
|
||||
newMap[index] = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps the specified <code>key</code> to the specified
|
||||
* <code>value</code> in this hashtable. Neither the key nor the
|
||||
* value can be <code>null</code>.
|
||||
* <p/>
|
||||
* <P>The value can be retrieved by calling the <code>get</code> method
|
||||
* with a key that is equal to the original key.
|
||||
*/
|
||||
public Object put(Object key, Object value) {
|
||||
// Make sure the value is not null
|
||||
if (value == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
|
||||
// Makes sure the key is not already in the hashtable.
|
||||
Entry tab[] = table;
|
||||
int hash = key.hashCode();
|
||||
int index = (hash & 0x7FFFFFFF) % tab.length;
|
||||
for (Entry e = tab[index]; e != null; e = e.next) {
|
||||
// if ((e.hash == hash) && e.key.equals(key)) {
|
||||
if ((e.hash == hash) && (e.key == key)) {
|
||||
Object old = e.value;
|
||||
e.value = value;
|
||||
return old;
|
||||
}
|
||||
}
|
||||
|
||||
if (count >= threshold) {
|
||||
// Rehash the table if the threshold is exceeded
|
||||
rehash();
|
||||
|
||||
tab = table;
|
||||
index = (hash & 0x7FFFFFFF) % tab.length;
|
||||
}
|
||||
|
||||
// Creates the new entry.
|
||||
Entry e = new Entry(hash, key, value, tab[index]);
|
||||
tab[index] = e;
|
||||
count++;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Hashtable collision list.
|
||||
*/
|
||||
private static class Entry {
|
||||
int hash;
|
||||
Object key;
|
||||
Object value;
|
||||
Entry next;
|
||||
|
||||
protected Entry(int hash, Object key, Object value, Entry next) {
|
||||
this.hash = hash;
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
this.next = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
387
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlChars.java
Normal file
387
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlChars.java
Normal file
@@ -0,0 +1,387 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
|
||||
/**
|
||||
* Methods in this class are used to determine whether characters may
|
||||
* appear in certain roles in XML documents. Such methods are used
|
||||
* both to parse and to create such documents.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @version 1.1, 00/08/05
|
||||
*/
|
||||
public class XmlChars {
|
||||
// can't construct instances
|
||||
private XmlChars() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the argument, a UCS-4 character code, is valid in
|
||||
* XML documents. Unicode characters fit into the low sixteen
|
||||
* bits of a UCS-4 character, and pairs of Unicode <em>surrogate
|
||||
* characters</em> can be combined to encode UCS-4 characters in
|
||||
* documents containing only Unicode. (The <code>char</code> datatype
|
||||
* in the Java Programming Language represents Unicode characters,
|
||||
* including unpaired surrogates.)
|
||||
* <p/>
|
||||
* <P> In XML, UCS-4 characters can also be encoded by the use of
|
||||
* <em>character references</em> such as <b>&#x12345678;</b>, which
|
||||
* happens to refer to a character that is disallowed in XML documents.
|
||||
* UCS-4 characters allowed in XML documents can be expressed with
|
||||
* one or two Unicode characters.
|
||||
*
|
||||
* @param ucs4char The 32-bit UCS-4 character being tested.
|
||||
*/
|
||||
static public boolean isChar(int ucs4char) {
|
||||
// [2] Char ::= #x0009 | #x000A | #x000D
|
||||
// | [#x0020-#xD7FF]
|
||||
// ... surrogates excluded!
|
||||
// | [#xE000-#xFFFD]
|
||||
// | [#x10000-#x10ffff]
|
||||
return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
|
||||
|| ucs4char == 0x000A || ucs4char == 0x0009
|
||||
|| ucs4char == 0x000D
|
||||
|| (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
|
||||
|| (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the character is allowed to be a non-initial
|
||||
* character in names according to the XML recommendation.
|
||||
*
|
||||
* @see #isNCNameChar(char)
|
||||
* @see #isLetter(char)
|
||||
*/
|
||||
public static boolean isNameChar(char c) {
|
||||
// [4] NameChar ::= Letter | Digit | '.' | '_' | ':'
|
||||
// | CombiningChar | Extender
|
||||
|
||||
if (isLetter2(c))
|
||||
return true;
|
||||
else if (c == '>')
|
||||
return false;
|
||||
else if (c == '.' || c == '-' || c == '_' || c == ':'
|
||||
|| isExtender(c))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the character is allowed to be a non-initial
|
||||
* character in unscoped names according to the rules of the XML
|
||||
* Namespaces proposed recommendation. Except for precluding
|
||||
* the colon (used to separate names from their scopes) these
|
||||
* characters are just as allowed by the XML recommendation.
|
||||
*
|
||||
* @see #isNameChar(char)
|
||||
* @see #isLetter(char)
|
||||
*/
|
||||
public static boolean isNCNameChar(char c) {
|
||||
// [NC 5] NCNameChar ::= Letter | Digit | '.' | '_'
|
||||
// | CombiningChar | Extender
|
||||
return c != ':' && isNameChar(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the character is allowed where XML supports
|
||||
* whitespace characters, false otherwise.
|
||||
*/
|
||||
public static boolean isSpace(char c) {
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NOTE: java.lang.Character.getType() values are:
|
||||
*
|
||||
* UNASSIGNED = 0,
|
||||
*
|
||||
* UPPERCASE_LETTER = 1, // Lu
|
||||
* LOWERCASE_LETTER = 2, // Ll
|
||||
* TITLECASE_LETTER = 3, // Lt
|
||||
* MODIFIER_LETTER = 4, // Lm
|
||||
* OTHER_LETTER = 5, // Lo
|
||||
* NON_SPACING_MARK = 6, // Mn
|
||||
* ENCLOSING_MARK = 7, // Me
|
||||
* COMBINING_SPACING_MARK = 8, // Mc
|
||||
* DECIMAL_DIGIT_NUMBER = 9, // Nd
|
||||
* LETTER_NUMBER = 10, // Nl
|
||||
* OTHER_NUMBER = 11, // No
|
||||
* SPACE_SEPARATOR = 12, // Zs
|
||||
* LINE_SEPARATOR = 13, // Zl
|
||||
* PARAGRAPH_SEPARATOR = 14, // Zp
|
||||
* CONTROL = 15, // Cc
|
||||
* FORMAT = 16, // Cf
|
||||
* // 17 reserved for proposed Ci category
|
||||
* PRIVATE_USE = 18, // Co
|
||||
* SURROGATE = 19, // Cs
|
||||
* DASH_PUNCTUATION = 20, // Pd
|
||||
* START_PUNCTUATION = 21, // Ps
|
||||
* END_PUNCTUATION = 22, // Pe
|
||||
* CONNECTOR_PUNCTUATION = 23, // Pc
|
||||
* OTHER_PUNCTUATION = 24, // Po
|
||||
* MATH_SYMBOL = 25, // Sm
|
||||
* CURRENCY_SYMBOL = 26, // Sc
|
||||
* MODIFIER_SYMBOL = 27, // Sk
|
||||
* OTHER_SYMBOL = 28; // So
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns true if the character is an XML "letter". XML Names must
|
||||
* start with Letters or a few other characters, but other characters
|
||||
* in names must only satisfy the <em>isNameChar</em> predicate.
|
||||
*
|
||||
* @see #isNameChar(char)
|
||||
* @see #isNCNameChar(char)
|
||||
*/
|
||||
public static boolean isLetter(char c) {
|
||||
// [84] Letter ::= BaseChar | Ideographic
|
||||
// [85] BaseChar ::= ... too much to repeat
|
||||
// [86] Ideographic ::= ... too much to repeat
|
||||
|
||||
//
|
||||
// Optimize the typical case.
|
||||
//
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return true;
|
||||
if (c == '/')
|
||||
return false;
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return true;
|
||||
|
||||
//
|
||||
// Since the tables are too ridiculous to use in code,
|
||||
// we're using the footnotes here to drive this test.
|
||||
//
|
||||
switch (Character.getType(c)) {
|
||||
// app. B footnote says these are 'name start'
|
||||
// chars' ...
|
||||
case Character.LOWERCASE_LETTER: // Ll
|
||||
case Character.UPPERCASE_LETTER: // Lu
|
||||
case Character.OTHER_LETTER: // Lo
|
||||
case Character.TITLECASE_LETTER: // Lt
|
||||
case Character.LETTER_NUMBER: // Nl
|
||||
|
||||
// OK, here we just have some exceptions to check...
|
||||
return !isCompatibilityChar(c)
|
||||
// per "5.14 of Unicode", rule out some combiners
|
||||
&& !(c >= 0x20dd && c <= 0x20e0);
|
||||
|
||||
default:
|
||||
// check for some exceptions: these are "alphabetic"
|
||||
return ((c >= 0x02bb && c <= 0x02c1)
|
||||
|| c == 0x0559 || c == 0x06e5 || c == 0x06e6);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// XML 1.0 discourages "compatibility" characters in names; these
|
||||
// were defined to permit passing through some information stored in
|
||||
// older non-Unicode character sets. These always have alternative
|
||||
// representations in Unicode, e.g. using combining chars.
|
||||
//
|
||||
private static boolean isCompatibilityChar(char c) {
|
||||
// the numerous comparisions here seem unavoidable,
|
||||
// but the switch can reduce the number which must
|
||||
// actually be executed.
|
||||
|
||||
switch ((c >> 8) & 0x0ff) {
|
||||
case 0x00:
|
||||
// ISO Latin/1 has a few compatibility characters
|
||||
return c == 0x00aa || c == 0x00b5 || c == 0x00ba;
|
||||
|
||||
case 0x01:
|
||||
// as do Latin Extended A and (parts of) B
|
||||
return (c >= 0x0132 && c <= 0x0133)
|
||||
|| (c >= 0x013f && c <= 0x0140)
|
||||
|| c == 0x0149
|
||||
|| c == 0x017f
|
||||
|| (c >= 0x01c4 && c <= 0x01cc)
|
||||
|| (c >= 0x01f1 && c <= 0x01f3);
|
||||
|
||||
case 0x02:
|
||||
// some spacing modifiers
|
||||
return (c >= 0x02b0 && c <= 0x02b8)
|
||||
|| (c >= 0x02e0 && c <= 0x02e4);
|
||||
|
||||
case 0x03:
|
||||
return c == 0x037a; // Greek
|
||||
|
||||
case 0x05:
|
||||
return c == 0x0587; // Armenian
|
||||
|
||||
case 0x0e:
|
||||
return c >= 0x0edc && c <= 0x0edd; // Laotian
|
||||
|
||||
case 0x11:
|
||||
// big chunks of Hangul Jamo are all "compatibility"
|
||||
return c == 0x1101
|
||||
|| c == 0x1104
|
||||
|| c == 0x1108
|
||||
|| c == 0x110a
|
||||
|| c == 0x110d
|
||||
|| (c >= 0x1113 && c <= 0x113b)
|
||||
|| c == 0x113d
|
||||
|| c == 0x113f
|
||||
|| (c >= 0x1141 && c <= 0x114b)
|
||||
|| c == 0x114d
|
||||
|| c == 0x114f
|
||||
|| (c >= 0x1151 && c <= 0x1153)
|
||||
|| (c >= 0x1156 && c <= 0x1158)
|
||||
|| c == 0x1162
|
||||
|| c == 0x1164
|
||||
|| c == 0x1166
|
||||
|| c == 0x1168
|
||||
|| (c >= 0x116a && c <= 0x116c)
|
||||
|| (c >= 0x116f && c <= 0x1171)
|
||||
|| c == 0x1174
|
||||
|| (c >= 0x1176 && c <= 0x119d)
|
||||
|| (c >= 0x119f && c <= 0x11a2)
|
||||
|| (c >= 0x11a9 && c <= 0x11aa)
|
||||
|| (c >= 0x11ac && c <= 0x11ad)
|
||||
|| (c >= 0x11b0 && c <= 0x11b6)
|
||||
|| c == 0x11b9
|
||||
|| c == 0x11bb
|
||||
|| (c >= 0x11c3 && c <= 0x11ea)
|
||||
|| (c >= 0x11ec && c <= 0x11ef)
|
||||
|| (c >= 0x11f1 && c <= 0x11f8)
|
||||
;
|
||||
|
||||
case 0x20:
|
||||
return c == 0x207f; // superscript
|
||||
|
||||
case 0x21:
|
||||
return
|
||||
// various letterlike symbols
|
||||
c == 0x2102
|
||||
|| c == 0x2107
|
||||
|| (c >= 0x210a && c <= 0x2113)
|
||||
|| c == 0x2115
|
||||
|| (c >= 0x2118 && c <= 0x211d)
|
||||
|| c == 0x2124
|
||||
|| c == 0x2128
|
||||
|| (c >= 0x212c && c <= 0x212d)
|
||||
|| (c >= 0x212f && c <= 0x2138)
|
||||
|
||||
// most Roman numerals (less 1K, 5K, 10K)
|
||||
|| (c >= 0x2160 && c <= 0x217f)
|
||||
;
|
||||
|
||||
case 0x30:
|
||||
// some Hiragana
|
||||
return c >= 0x309b && c <= 0x309c;
|
||||
|
||||
case 0x31:
|
||||
// all Hangul Compatibility Jamo
|
||||
return c >= 0x3131 && c <= 0x318e;
|
||||
|
||||
case 0xf9:
|
||||
case 0xfa:
|
||||
case 0xfb:
|
||||
case 0xfc:
|
||||
case 0xfd:
|
||||
case 0xfe:
|
||||
case 0xff:
|
||||
// the whole "compatibility" area is for that purpose!
|
||||
return true;
|
||||
|
||||
default:
|
||||
// most of Unicode isn't flagged as being for compatibility
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// guts of isNameChar/isNCNameChar
|
||||
private static boolean isLetter2(char c) {
|
||||
// [84] Letter ::= BaseChar | Ideographic
|
||||
// [85] BaseChar ::= ... too much to repeat
|
||||
// [86] Ideographic ::= ... too much to repeat
|
||||
// [87] CombiningChar ::= ... too much to repeat
|
||||
|
||||
//
|
||||
// Optimize the typical case.
|
||||
//
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return true;
|
||||
if (c == '>')
|
||||
return false;
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return true;
|
||||
|
||||
//
|
||||
// Since the tables are too ridiculous to use in code,
|
||||
// we're using the footnotes here to drive this test.
|
||||
//
|
||||
switch (Character.getType(c)) {
|
||||
// app. B footnote says these are 'name start'
|
||||
// chars' ...
|
||||
case Character.LOWERCASE_LETTER: // Ll
|
||||
case Character.UPPERCASE_LETTER: // Lu
|
||||
case Character.OTHER_LETTER: // Lo
|
||||
case Character.TITLECASE_LETTER: // Lt
|
||||
case Character.LETTER_NUMBER: // Nl
|
||||
// ... and these are name characters 'other
|
||||
// than name start characters'
|
||||
case Character.COMBINING_SPACING_MARK: // Mc
|
||||
case Character.ENCLOSING_MARK: // Me
|
||||
case Character.NON_SPACING_MARK: // Mn
|
||||
case Character.MODIFIER_LETTER: // Lm
|
||||
case Character.DECIMAL_DIGIT_NUMBER: // Nd
|
||||
|
||||
// OK, here we just have some exceptions to check...
|
||||
return !isCompatibilityChar(c)
|
||||
// per "5.14 of Unicode", rule out some combiners
|
||||
&& !(c >= 0x20dd && c <= 0x20e0);
|
||||
|
||||
default:
|
||||
// added a character ...
|
||||
return c == 0x0387;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isDigit(char c) {
|
||||
// [88] Digit ::= ...
|
||||
|
||||
//
|
||||
// java.lang.Character.isDigit is correct from the XML point
|
||||
// of view except that it allows "fullwidth" digits.
|
||||
//
|
||||
return Character.isDigit(c)
|
||||
&& !((c >= 0xff10) && (c <= 0xff19));
|
||||
}
|
||||
|
||||
private static boolean isExtender(char c) {
|
||||
// [89] Extender ::= ...
|
||||
return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
|
||||
|| c == 0x0640 || c == 0x0e46 || c == 0x0ec6
|
||||
|| c == 0x3005 || (c >= 0x3031 && c <= 0x3035)
|
||||
|| (c >= 0x309d && c <= 0x309e)
|
||||
|| (c >= 0x30fc && c <= 0x30fe)
|
||||
;
|
||||
}
|
||||
}
|
||||
147
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlNames.java
Normal file
147
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlNames.java
Normal file
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
|
||||
/**
|
||||
* This class contains static methods used to determine whether identifiers
|
||||
* may appear in certain roles in XML documents. Such methods are used
|
||||
* both to parse and to create such documents.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @version 1.1, 00/08/05
|
||||
*/
|
||||
public class XmlNames {
|
||||
private XmlNames() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the value is a legal XML name.
|
||||
*
|
||||
* @param value the string being tested
|
||||
*/
|
||||
public static boolean isName(String value) {
|
||||
if (value == null)
|
||||
return false;
|
||||
|
||||
char c = value.charAt(0);
|
||||
if (!XmlChars.isLetter(c) && c != '_' && c != ':')
|
||||
return false;
|
||||
for (int i = 1; i < value.length(); i++)
|
||||
if (!XmlChars.isNameChar(value.charAt(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the value is a legal "unqualified" XML name, as
|
||||
* defined in the XML Namespaces proposed recommendation.
|
||||
* These are normal XML names, except that they may not contain
|
||||
* a "colon" character.
|
||||
*
|
||||
* @param value the string being tested
|
||||
*/
|
||||
public static boolean isUnqualifiedName(String value) {
|
||||
if (value == null || value.length() == 0)
|
||||
return false;
|
||||
|
||||
char c = value.charAt(0);
|
||||
if (!XmlChars.isLetter(c) && c != '_')
|
||||
return false;
|
||||
for (int i = 1; i < value.length(); i++)
|
||||
if (!XmlChars.isNCNameChar(value.charAt(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the value is a legal "qualified" XML name, as defined
|
||||
* in the XML Namespaces proposed recommendation. Qualified names are
|
||||
* composed of an optional prefix (an unqualified name), followed by a
|
||||
* colon, and a required "local part" (an unqualified name). Prefixes are
|
||||
* declared, and correspond to particular URIs which scope the "local
|
||||
* part" of the name. (This method cannot check whether the prefix of a
|
||||
* name has been declared.)
|
||||
*
|
||||
* @param value the string being tested
|
||||
*/
|
||||
public static boolean isQualifiedName(String value) {
|
||||
if (value == null)
|
||||
return false;
|
||||
|
||||
// [6] QName ::= (Prefix ':')? LocalPart
|
||||
// [7] Prefix ::= NCName
|
||||
// [8] LocalPart ::= NCName
|
||||
|
||||
int first = value.indexOf(':');
|
||||
|
||||
// no Prefix, only check LocalPart
|
||||
if (first <= 0)
|
||||
return isUnqualifiedName(value);
|
||||
|
||||
// Prefix exists, check everything
|
||||
|
||||
int last = value.lastIndexOf(':');
|
||||
if (last != first)
|
||||
return false;
|
||||
|
||||
return isUnqualifiedName(value.substring(0, first))
|
||||
&& isUnqualifiedName(value.substring(first + 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns true if the identifier is a "name token"
|
||||
* as defined in the XML specification. Like names, these
|
||||
* may only contain "name characters"; however, they do not need
|
||||
* to have letters as their initial characters. Attribute values
|
||||
* defined to be of type NMTOKEN(S) must satisfy this predicate.
|
||||
*
|
||||
* @param token the string being tested
|
||||
*/
|
||||
public static boolean isNmtoken(String token) {
|
||||
int length = token.length();
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
if (!XmlChars.isNameChar(token.charAt(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method returns true if the identifier is a "name token" as
|
||||
* defined by the XML Namespaces proposed recommendation.
|
||||
* These are like XML "name tokens" but they may not contain the
|
||||
* "colon" character.
|
||||
*
|
||||
* @param token the string being tested
|
||||
* @see #isNmtoken
|
||||
*/
|
||||
public static boolean isNCNmtoken(String token) {
|
||||
return isNmtoken(token) && token.indexOf(':') < 0;
|
||||
}
|
||||
}
|
||||
784
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlReader.java
Normal file
784
jdkSrc/jdk8/com/sun/xml/internal/dtdparser/XmlReader.java
Normal file
@@ -0,0 +1,784 @@
|
||||
/*
|
||||
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.xml.internal.dtdparser;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.CharConversionException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Hashtable;
|
||||
|
||||
|
||||
// NOTE: Add I18N support to this class when JDK gets the ability to
|
||||
// defer selection of locale for exception messages ... use the same
|
||||
// technique for both.
|
||||
|
||||
|
||||
/**
|
||||
* This handles several XML-related tasks that normal java.io Readers
|
||||
* don't support, inluding use of IETF standard encoding names and
|
||||
* automatic detection of most XML encodings. The former is needed
|
||||
* for interoperability; the latter is needed to conform with the XML
|
||||
* spec. This class also optimizes reading some common encodings by
|
||||
* providing low-overhead unsynchronized Reader support.
|
||||
* <p/>
|
||||
* <P> Note that the autodetection facility should be used only on
|
||||
* data streams which have an unknown character encoding. For example,
|
||||
* it should never be used on MIME text/xml entities.
|
||||
* <p/>
|
||||
* <P> Note that XML processors are only required to support UTF-8 and
|
||||
* UTF-16 character encodings. Autodetection permits the underlying Java
|
||||
* implementation to provide support for many other encodings, such as
|
||||
* US-ASCII, ISO-8859-5, Shift_JIS, EUC-JP, and ISO-2022-JP.
|
||||
*
|
||||
* @author David Brownell
|
||||
* @author Janet Koenig
|
||||
* @version 1.3 00/02/24
|
||||
*/
|
||||
// package private
|
||||
final class XmlReader extends Reader {
|
||||
private static final int MAXPUSHBACK = 512;
|
||||
|
||||
private Reader in;
|
||||
private String assignedEncoding;
|
||||
private boolean closed;
|
||||
|
||||
//
|
||||
// This class always delegates I/O to a reader, which gets
|
||||
// its data from the very beginning of the XML text. It needs
|
||||
// to use a pushback stream since (a) autodetection can read
|
||||
// partial UTF-8 characters which need to be fully processed,
|
||||
// (b) the "Unicode" readers swallow characters that they think
|
||||
// are byte order marks, so tests fail if they don't see the
|
||||
// real byte order mark.
|
||||
//
|
||||
// It's got do this efficiently: character I/O is solidly on the
|
||||
// critical path. (So keep buffer length over 2 Kbytes to avoid
|
||||
// excess buffering. Many URL handlers stuff a BufferedInputStream
|
||||
// between here and the real data source, and larger buffers keep
|
||||
// that from slowing you down.)
|
||||
//
|
||||
|
||||
/**
|
||||
* Constructs the reader from an input stream, autodetecting
|
||||
* the encoding to use according to the heuristic specified
|
||||
* in the XML 1.0 recommendation.
|
||||
*
|
||||
* @param in the input stream from which the reader is constructed
|
||||
* @throws IOException on error, such as unrecognized encoding
|
||||
*/
|
||||
public static Reader createReader(InputStream in) throws IOException {
|
||||
return new XmlReader(in);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a reader supporting the given encoding, mapping
|
||||
* from standard encoding names to ones that understood by
|
||||
* Java where necessary.
|
||||
*
|
||||
* @param in the input stream from which the reader is constructed
|
||||
* @param encoding the IETF standard name of the encoding to use;
|
||||
* if null, autodetection is used.
|
||||
* @throws IOException on error, including unrecognized encoding
|
||||
*/
|
||||
public static Reader createReader(InputStream in, String encoding)
|
||||
throws IOException {
|
||||
if (encoding == null)
|
||||
return new XmlReader(in);
|
||||
if ("UTF-8".equalsIgnoreCase(encoding)
|
||||
|| "UTF8".equalsIgnoreCase(encoding))
|
||||
return new Utf8Reader(in);
|
||||
if ("US-ASCII".equalsIgnoreCase(encoding)
|
||||
|| "ASCII".equalsIgnoreCase(encoding))
|
||||
return new AsciiReader(in);
|
||||
if ("ISO-8859-1".equalsIgnoreCase(encoding)
|
||||
// plus numerous aliases ...
|
||||
)
|
||||
return new Iso8859_1Reader(in);
|
||||
|
||||
//
|
||||
// What we really want is an administerable resource mapping
|
||||
// encoding names/aliases to classnames. For example a property
|
||||
// file resource, "readers/mapping.props", holding and a set
|
||||
// of readers in that (sub)package... defaulting to this call
|
||||
// only if no better choice is available.
|
||||
//
|
||||
return new InputStreamReader(in, std2java(encoding));
|
||||
}
|
||||
|
||||
//
|
||||
// JDK doesn't know all of the standard encoding names, and
|
||||
// in particular none of the EBCDIC ones IANA defines (and
|
||||
// which IBM encourages).
|
||||
//
|
||||
static private final Hashtable charsets = new Hashtable(31);
|
||||
|
||||
static {
|
||||
charsets.put("UTF-16", "Unicode");
|
||||
charsets.put("ISO-10646-UCS-2", "Unicode");
|
||||
|
||||
// NOTE: no support for ISO-10646-UCS-4 yet.
|
||||
|
||||
charsets.put("EBCDIC-CP-US", "cp037");
|
||||
charsets.put("EBCDIC-CP-CA", "cp037");
|
||||
charsets.put("EBCDIC-CP-NL", "cp037");
|
||||
charsets.put("EBCDIC-CP-WT", "cp037");
|
||||
|
||||
charsets.put("EBCDIC-CP-DK", "cp277");
|
||||
charsets.put("EBCDIC-CP-NO", "cp277");
|
||||
charsets.put("EBCDIC-CP-FI", "cp278");
|
||||
charsets.put("EBCDIC-CP-SE", "cp278");
|
||||
|
||||
charsets.put("EBCDIC-CP-IT", "cp280");
|
||||
charsets.put("EBCDIC-CP-ES", "cp284");
|
||||
charsets.put("EBCDIC-CP-GB", "cp285");
|
||||
charsets.put("EBCDIC-CP-FR", "cp297");
|
||||
|
||||
charsets.put("EBCDIC-CP-AR1", "cp420");
|
||||
charsets.put("EBCDIC-CP-HE", "cp424");
|
||||
charsets.put("EBCDIC-CP-BE", "cp500");
|
||||
charsets.put("EBCDIC-CP-CH", "cp500");
|
||||
|
||||
charsets.put("EBCDIC-CP-ROECE", "cp870");
|
||||
charsets.put("EBCDIC-CP-YU", "cp870");
|
||||
charsets.put("EBCDIC-CP-IS", "cp871");
|
||||
charsets.put("EBCDIC-CP-AR2", "cp918");
|
||||
|
||||
// IANA also defines two that JDK 1.2 doesn't handle:
|
||||
// EBCDIC-CP-GR --> CP423
|
||||
// EBCDIC-CP-TR --> CP905
|
||||
}
|
||||
|
||||
// returns an encoding name supported by JDK >= 1.1.6
|
||||
// for some cases required by the XML spec
|
||||
private static String std2java(String encoding) {
|
||||
String temp = encoding.toUpperCase();
|
||||
temp = (String) charsets.get(temp);
|
||||
return temp != null ? temp : encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard name of the encoding in use
|
||||
*/
|
||||
public String getEncoding() {
|
||||
return assignedEncoding;
|
||||
}
|
||||
|
||||
private XmlReader(InputStream stream) throws IOException {
|
||||
super(stream);
|
||||
|
||||
PushbackInputStream pb;
|
||||
byte buf [];
|
||||
int len;
|
||||
|
||||
if (stream instanceof PushbackInputStream)
|
||||
pb = (PushbackInputStream) stream;
|
||||
else
|
||||
pb = new PushbackInputStream(stream, MAXPUSHBACK);
|
||||
|
||||
//
|
||||
// See if we can figure out the character encoding used
|
||||
// in this file by peeking at the first few bytes.
|
||||
//
|
||||
buf = new byte[4];
|
||||
len = pb.read(buf);
|
||||
if (len > 0)
|
||||
pb.unread(buf, 0, len);
|
||||
|
||||
if (len == 4)
|
||||
switch (buf[0] & 0x0ff) {
|
||||
case 0:
|
||||
// 00 3c 00 3f == illegal UTF-16 big-endian
|
||||
if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {
|
||||
setEncoding(pb, "UnicodeBig");
|
||||
return;
|
||||
}
|
||||
// else it's probably UCS-4
|
||||
break;
|
||||
|
||||
case '<': // 0x3c: the most common cases!
|
||||
switch (buf[1] & 0x0ff) {
|
||||
// First character is '<'; could be XML without
|
||||
// an XML directive such as "<hello>", "<!-- ...",
|
||||
// and so on.
|
||||
default:
|
||||
break;
|
||||
|
||||
// 3c 00 3f 00 == illegal UTF-16 little endian
|
||||
case 0x00:
|
||||
if (buf[2] == 0x3f && buf[3] == 0x00) {
|
||||
setEncoding(pb, "UnicodeLittle");
|
||||
return;
|
||||
}
|
||||
// else probably UCS-4
|
||||
break;
|
||||
|
||||
// 3c 3f 78 6d == ASCII and supersets '<?xm'
|
||||
case '?':
|
||||
if (buf[2] != 'x' || buf[3] != 'm')
|
||||
break;
|
||||
//
|
||||
// One of several encodings could be used:
|
||||
// Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
|
||||
//
|
||||
useEncodingDecl(pb, "UTF8");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
// 4c 6f a7 94 ... some EBCDIC code page
|
||||
case 0x4c:
|
||||
if (buf[1] == 0x6f
|
||||
&& (0x0ff & buf[2]) == 0x0a7
|
||||
&& (0x0ff & buf[3]) == 0x094) {
|
||||
useEncodingDecl(pb, "CP037");
|
||||
return;
|
||||
}
|
||||
// whoops, treat as UTF-8
|
||||
break;
|
||||
|
||||
// UTF-16 big-endian
|
||||
case 0xfe:
|
||||
if ((buf[1] & 0x0ff) != 0xff)
|
||||
break;
|
||||
setEncoding(pb, "UTF-16");
|
||||
return;
|
||||
|
||||
// UTF-16 little-endian
|
||||
case 0xff:
|
||||
if ((buf[1] & 0x0ff) != 0xfe)
|
||||
break;
|
||||
setEncoding(pb, "UTF-16");
|
||||
return;
|
||||
|
||||
// default ... no XML declaration
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// If all else fails, assume XML without a declaration, and
|
||||
// using UTF-8 encoding.
|
||||
//
|
||||
setEncoding(pb, "UTF-8");
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the encoding decl on the stream, knowing that it should
|
||||
* be readable using the specified encoding (basically, ASCII or
|
||||
* EBCDIC). The body of the document may use a wider range of
|
||||
* characters than the XML/Text decl itself, so we switch to use
|
||||
* the specified encoding as soon as we can. (ASCII is a subset
|
||||
* of UTF-8, ISO-8859-*, ISO-2022-JP, EUC-JP, and more; EBCDIC
|
||||
* has a variety of "code pages" that have these characters as
|
||||
* a common subset.)
|
||||
*/
|
||||
private void useEncodingDecl(PushbackInputStream pb, String encoding)
|
||||
throws IOException {
|
||||
byte buffer [] = new byte[MAXPUSHBACK];
|
||||
int len;
|
||||
Reader r;
|
||||
int c;
|
||||
|
||||
//
|
||||
// Buffer up a bunch of input, and set up to read it in
|
||||
// the specified encoding ... we can skip the first four
|
||||
// bytes since we know that "<?xm" was read to determine
|
||||
// what encoding to use!
|
||||
//
|
||||
len = pb.read(buffer, 0, buffer.length);
|
||||
pb.unread(buffer, 0, len);
|
||||
r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len),
|
||||
encoding);
|
||||
|
||||
//
|
||||
// Next must be "l" (and whitespace) else we conclude
|
||||
// error and choose UTF-8.
|
||||
//
|
||||
if ((c = r.read()) != 'l') {
|
||||
setEncoding(pb, "UTF-8");
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// Then, we'll skip any
|
||||
// S version="..." [or single quotes]
|
||||
// bit and get any subsequent
|
||||
// S encoding="..." [or single quotes]
|
||||
//
|
||||
// We put an arbitrary size limit on how far we read; lots
|
||||
// of space will break this algorithm.
|
||||
//
|
||||
StringBuffer buf = new StringBuffer();
|
||||
StringBuffer keyBuf = null;
|
||||
String key = null;
|
||||
boolean sawEq = false;
|
||||
char quoteChar = 0;
|
||||
boolean sawQuestion = false;
|
||||
|
||||
XmlDecl:
|
||||
for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
|
||||
if ((c = r.read()) == -1)
|
||||
break;
|
||||
|
||||
// ignore whitespace before/between "key = 'value'"
|
||||
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
||||
continue;
|
||||
|
||||
// ... but require at least a little!
|
||||
if (i == 0)
|
||||
break;
|
||||
|
||||
// terminate the loop ASAP
|
||||
if (c == '?')
|
||||
sawQuestion = true;
|
||||
else if (sawQuestion) {
|
||||
if (c == '>')
|
||||
break;
|
||||
sawQuestion = false;
|
||||
}
|
||||
|
||||
// did we get the "key =" bit yet?
|
||||
if (key == null || !sawEq) {
|
||||
if (keyBuf == null) {
|
||||
if (Character.isWhitespace((char) c))
|
||||
continue;
|
||||
keyBuf = buf;
|
||||
buf.setLength(0);
|
||||
buf.append((char) c);
|
||||
sawEq = false;
|
||||
} else if (Character.isWhitespace((char) c)) {
|
||||
key = keyBuf.toString();
|
||||
} else if (c == '=') {
|
||||
if (key == null)
|
||||
key = keyBuf.toString();
|
||||
sawEq = true;
|
||||
keyBuf = null;
|
||||
quoteChar = 0;
|
||||
} else
|
||||
keyBuf.append((char) c);
|
||||
continue;
|
||||
}
|
||||
|
||||
// space before quoted value
|
||||
if (Character.isWhitespace((char) c))
|
||||
continue;
|
||||
if (c == '"' || c == '\'') {
|
||||
if (quoteChar == 0) {
|
||||
quoteChar = (char) c;
|
||||
buf.setLength(0);
|
||||
continue;
|
||||
} else if (c == quoteChar) {
|
||||
if ("encoding".equals(key)) {
|
||||
assignedEncoding = buf.toString();
|
||||
|
||||
// [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
|
||||
for (i = 0; i < assignedEncoding.length(); i++) {
|
||||
c = assignedEncoding.charAt(i);
|
||||
if ((c >= 'A' && c <= 'Z')
|
||||
|| (c >= 'a' && c <= 'z'))
|
||||
continue;
|
||||
if (i == 0)
|
||||
break XmlDecl;
|
||||
if (i > 0 && (c == '-'
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '.' || c == '_'))
|
||||
continue;
|
||||
// map illegal names to UTF-8 default
|
||||
break XmlDecl;
|
||||
}
|
||||
|
||||
setEncoding(pb, assignedEncoding);
|
||||
return;
|
||||
|
||||
} else {
|
||||
key = null;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf.append((char) c);
|
||||
}
|
||||
|
||||
setEncoding(pb, "UTF-8");
|
||||
}
|
||||
|
||||
private void setEncoding(InputStream stream, String encoding)
|
||||
throws IOException {
|
||||
assignedEncoding = encoding;
|
||||
in = createReader(stream, encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the number of characters read into the buffer, or -1 on EOF.
|
||||
*/
|
||||
public int read(char buf [], int off, int len) throws IOException {
|
||||
int val;
|
||||
|
||||
if (closed)
|
||||
return -1; // throw new IOException ("closed");
|
||||
val = in.read(buf, off, len);
|
||||
if (val == -1)
|
||||
close();
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a single character.
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
int val;
|
||||
|
||||
if (closed)
|
||||
throw new IOException("closed");
|
||||
val = in.read();
|
||||
if (val == -1)
|
||||
close();
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff the reader supports mark/reset.
|
||||
*/
|
||||
public boolean markSupported() {
|
||||
return in == null ? false : in.markSupported();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a mark allowing a limited number of characters to
|
||||
* be "peeked", by reading and then resetting.
|
||||
*
|
||||
* @param value how many characters may be "peeked".
|
||||
*/
|
||||
public void mark(int value) throws IOException {
|
||||
if (in != null) in.mark(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the current position to the last marked position.
|
||||
*/
|
||||
public void reset() throws IOException {
|
||||
if (in != null) in.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips a specified number of characters.
|
||||
*/
|
||||
public long skip(long value) throws IOException {
|
||||
return in == null ? 0 : in.skip(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff input characters are known to be ready.
|
||||
*/
|
||||
public boolean ready() throws IOException {
|
||||
return in == null ? false : in.ready();
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the reader.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if (closed)
|
||||
return;
|
||||
in.close();
|
||||
in = null;
|
||||
closed = true;
|
||||
}
|
||||
|
||||
//
|
||||
// Delegating to a converter module will always be slower than
|
||||
// direct conversion. Use a similar approach for any other
|
||||
// readers that need to be particularly fast; only block I/O
|
||||
// speed matters to this package. For UTF-16, separate readers
|
||||
// for big and little endian streams make a difference, too;
|
||||
// fewer conditionals in the critical path!
|
||||
//
|
||||
static abstract class BaseReader extends Reader {
|
||||
protected InputStream instream;
|
||||
protected byte buffer [];
|
||||
protected int start, finish;
|
||||
|
||||
BaseReader(InputStream stream) {
|
||||
super(stream);
|
||||
|
||||
instream = stream;
|
||||
buffer = new byte[8192];
|
||||
}
|
||||
|
||||
public boolean ready() throws IOException {
|
||||
return instream == null
|
||||
|| (finish - start) > 0
|
||||
|| instream.available() != 0;
|
||||
}
|
||||
|
||||
// caller shouldn't read again
|
||||
public void close() throws IOException {
|
||||
if (instream != null) {
|
||||
instream.close();
|
||||
start = finish = 0;
|
||||
buffer = null;
|
||||
instream = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// We want this reader, to make the default encoding be as fast
|
||||
// as we can make it. JDK's "UTF8" (not "UTF-8" till JDK 1.2)
|
||||
// InputStreamReader works, but 20+% slower speed isn't OK for
|
||||
// the default/primary encoding.
|
||||
//
|
||||
static final class Utf8Reader extends BaseReader {
|
||||
// 2nd half of UTF-8 surrogate pair
|
||||
private char nextChar;
|
||||
|
||||
Utf8Reader(InputStream stream) {
|
||||
super(stream);
|
||||
}
|
||||
|
||||
public int read(char buf [], int offset, int len) throws IOException {
|
||||
int i = 0, c = 0;
|
||||
|
||||
if (len <= 0)
|
||||
return 0;
|
||||
|
||||
// Consume remaining half of any surrogate pair immediately
|
||||
if (nextChar != 0) {
|
||||
buf[offset + i++] = nextChar;
|
||||
nextChar = 0;
|
||||
}
|
||||
|
||||
while (i < len) {
|
||||
// stop or read data if needed
|
||||
if (finish <= start) {
|
||||
if (instream == null) {
|
||||
c = -1;
|
||||
break;
|
||||
}
|
||||
start = 0;
|
||||
finish = instream.read(buffer, 0, buffer.length);
|
||||
if (finish <= 0) {
|
||||
this.close();
|
||||
c = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// RFC 2279 describes UTF-8; there are six encodings.
|
||||
// Each encoding takes a fixed number of characters
|
||||
// (1-6 bytes) and is flagged by a bit pattern in the
|
||||
// first byte. The five and six byte-per-character
|
||||
// encodings address characters which are disallowed
|
||||
// in XML documents, as do some four byte ones.
|
||||
//
|
||||
|
||||
//
|
||||
// Single byte == ASCII. Common; optimize.
|
||||
//
|
||||
c = buffer[start] & 0x0ff;
|
||||
if ((c & 0x80) == 0x00) {
|
||||
// 0x0000 <= c <= 0x007f
|
||||
start++;
|
||||
buf[offset + i++] = (char) c;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// Multibyte chars -- check offsets optimistically,
|
||||
// ditto the "10xx xxxx" format for subsequent bytes
|
||||
//
|
||||
int off = start;
|
||||
|
||||
try {
|
||||
// 2 bytes
|
||||
if ((buffer[off] & 0x0E0) == 0x0C0) {
|
||||
c = (buffer[off++] & 0x1f) << 6;
|
||||
c += buffer[off++] & 0x3f;
|
||||
|
||||
// 0x0080 <= c <= 0x07ff
|
||||
|
||||
// 3 bytes
|
||||
} else if ((buffer[off] & 0x0F0) == 0x0E0) {
|
||||
c = (buffer[off++] & 0x0f) << 12;
|
||||
c += (buffer[off++] & 0x3f) << 6;
|
||||
c += buffer[off++] & 0x3f;
|
||||
|
||||
// 0x0800 <= c <= 0xffff
|
||||
|
||||
// 4 bytes
|
||||
} else if ((buffer[off] & 0x0f8) == 0x0F0) {
|
||||
c = (buffer[off++] & 0x07) << 18;
|
||||
c += (buffer[off++] & 0x3f) << 12;
|
||||
c += (buffer[off++] & 0x3f) << 6;
|
||||
c += buffer[off++] & 0x3f;
|
||||
|
||||
// 0x0001 0000 <= c <= 0x001f ffff
|
||||
|
||||
// Unicode supports c <= 0x0010 ffff ...
|
||||
if (c > 0x0010ffff)
|
||||
throw new CharConversionException("UTF-8 encoding of character 0x00"
|
||||
+ Integer.toHexString(c)
|
||||
+ " can't be converted to Unicode.");
|
||||
|
||||
// Convert UCS-4 char to surrogate pair (UTF-16)
|
||||
c -= 0x10000;
|
||||
nextChar = (char) (0xDC00 + (c & 0x03ff));
|
||||
c = 0xD800 + (c >> 10);
|
||||
|
||||
// 5 and 6 byte versions are XML WF errors, but
|
||||
// typically come from mislabeled encodings
|
||||
} else
|
||||
throw new CharConversionException("Unconvertible UTF-8 character"
|
||||
+ " beginning with 0x"
|
||||
+ Integer.toHexString(buffer[start] & 0xff));
|
||||
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// off > length && length >= buffer.length
|
||||
c = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// if the buffer held only a partial character,
|
||||
// compact it and try to read the rest of the
|
||||
// character. worst case involves three
|
||||
// single-byte reads -- quite rare.
|
||||
//
|
||||
if (off > finish) {
|
||||
System.arraycopy(buffer, start,
|
||||
buffer, 0, finish - start);
|
||||
finish -= start;
|
||||
start = 0;
|
||||
off = instream.read(buffer, finish,
|
||||
buffer.length - finish);
|
||||
if (off < 0) {
|
||||
this.close();
|
||||
throw new CharConversionException("Partial UTF-8 char");
|
||||
}
|
||||
finish += off;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// check the format of the non-initial bytes
|
||||
//
|
||||
for (start++; start < off; start++) {
|
||||
if ((buffer[start] & 0xC0) != 0x80) {
|
||||
this.close();
|
||||
throw new CharConversionException("Malformed UTF-8 char -- "
|
||||
+ "is an XML encoding declaration missing?");
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// If this needed a surrogate pair, consume ASAP
|
||||
//
|
||||
buf[offset + i++] = (char) c;
|
||||
if (nextChar != 0 && i < len) {
|
||||
buf[offset + i++] = nextChar;
|
||||
nextChar = 0;
|
||||
}
|
||||
}
|
||||
if (i > 0)
|
||||
return i;
|
||||
return (c == -1) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// We want ASCII and ISO-8859 Readers since they're the most common
|
||||
// encodings in the US and Europe, and we don't want performance
|
||||
// regressions for them. They're also easy to implement efficiently,
|
||||
// since they're bitmask subsets of UNICODE.
|
||||
//
|
||||
// XXX haven't benchmarked these readers vs what we get out of JDK.
|
||||
//
|
||||
static final class AsciiReader extends BaseReader {
|
||||
AsciiReader(InputStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
public int read(char buf [], int offset, int len) throws IOException {
|
||||
int i, c;
|
||||
|
||||
if (instream == null)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (start >= finish) {
|
||||
start = 0;
|
||||
finish = instream.read(buffer, 0, buffer.length);
|
||||
if (finish <= 0) {
|
||||
if (finish <= 0)
|
||||
this.close();
|
||||
break;
|
||||
}
|
||||
}
|
||||
c = buffer[start++];
|
||||
if ((c & 0x80) != 0)
|
||||
throw new CharConversionException("Illegal ASCII character, 0x"
|
||||
+ Integer.toHexString(c & 0xff));
|
||||
buf[offset + i] = (char) c;
|
||||
}
|
||||
if (i == 0 && finish <= 0)
|
||||
return -1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Iso8859_1Reader extends BaseReader {
|
||||
Iso8859_1Reader(InputStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
public int read(char buf [], int offset, int len) throws IOException {
|
||||
int i;
|
||||
|
||||
if (instream == null)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (start >= finish) {
|
||||
start = 0;
|
||||
finish = instream.read(buffer, 0, buffer.length);
|
||||
if (finish <= 0) {
|
||||
if (finish <= 0)
|
||||
this.close();
|
||||
break;
|
||||
}
|
||||
}
|
||||
buf[offset + i] = (char) (0x0ff & buffer[start++]);
|
||||
}
|
||||
if (i == 0 && finish <= 0)
|
||||
return -1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user