2351 lines
77 KiB
Java
2351 lines
77 KiB
Java
/*
|
|
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation. Oracle designates this
|
|
* particular file as subject to the "Classpath" exception as provided
|
|
* by Oracle in the LICENSE file that accompanied this code.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
package com.sun.xml.internal.dtdparser;
|
|
|
|
import org.xml.sax.EntityResolver;
|
|
import org.xml.sax.InputSource;
|
|
import org.xml.sax.Locator;
|
|
import org.xml.sax.SAXException;
|
|
import org.xml.sax.SAXParseException;
|
|
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.Enumeration;
|
|
import java.util.Hashtable;
|
|
import java.util.Locale;
|
|
import java.util.Set;
|
|
import java.util.Vector;
|
|
|
|
/**
|
|
* This implements parsing of XML 1.0 DTDs.
|
|
* <p/>
|
|
* This conforms to the portion of the XML 1.0 specification related
|
|
* to the external DTD subset.
|
|
* <p/>
|
|
* For multi-language applications (such as web servers using XML
|
|
* processing to create dynamic content), a method supports choosing
|
|
* a locale for parser diagnostics which is both understood by the
|
|
* message recipient and supported by the parser.
|
|
* <p/>
|
|
* This parser produces a stream of parse events. It supports some
|
|
* features (exposing comments, CDATA sections, and entity references)
|
|
* which are not required to be reported by conformant XML processors.
|
|
*
|
|
* @author David Brownell
|
|
* @author Janet Koenig
|
|
* @author Kohsuke KAWAGUCHI
|
|
* @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
|
|
*/
|
|
public class DTDParser {
|
|
public final static String TYPE_CDATA = "CDATA";
|
|
public final static String TYPE_ID = "ID";
|
|
public final static String TYPE_IDREF = "IDREF";
|
|
public final static String TYPE_IDREFS = "IDREFS";
|
|
public final static String TYPE_ENTITY = "ENTITY";
|
|
public final static String TYPE_ENTITIES = "ENTITIES";
|
|
public final static String TYPE_NMTOKEN = "NMTOKEN";
|
|
public final static String TYPE_NMTOKENS = "NMTOKENS";
|
|
public final static String TYPE_NOTATION = "NOTATION";
|
|
public final static String TYPE_ENUMERATION = "ENUMERATION";
|
|
|
|
|
|
// stack of input entities being merged
|
|
private InputEntity in;
|
|
|
|
// temporaries reused during parsing
|
|
private StringBuffer strTmp;
|
|
private char nameTmp [];
|
|
private NameCache nameCache;
|
|
private char charTmp [] = new char[2];
|
|
|
|
// temporary DTD parsing state
|
|
private boolean doLexicalPE;
|
|
|
|
// DTD state, used during parsing
|
|
// private SimpleHashtable elements = new SimpleHashtable (47);
|
|
protected final Set declaredElements = new java.util.HashSet();
|
|
private SimpleHashtable params = new SimpleHashtable(7);
|
|
|
|
// exposed to package-private subclass
|
|
Hashtable notations = new Hashtable(7);
|
|
SimpleHashtable entities = new SimpleHashtable(17);
|
|
|
|
private SimpleHashtable ids = new SimpleHashtable();
|
|
|
|
// listeners for DTD parsing events
|
|
private DTDEventListener dtdHandler;
|
|
|
|
private EntityResolver resolver;
|
|
private Locale locale;
|
|
|
|
// string constants -- use these copies so "==" works
|
|
// package private
|
|
static final String strANY = "ANY";
|
|
static final String strEMPTY = "EMPTY";
|
|
|
|
/**
|
|
* Used by applications to request locale for diagnostics.
|
|
*
|
|
* @param l The locale to use, or null to use system defaults
|
|
* (which may include only message IDs).
|
|
*/
|
|
public void setLocale(Locale l) throws SAXException {
|
|
|
|
if (l != null && !messages.isLocaleSupported(l.toString())) {
|
|
throw new SAXException(messages.getMessage(locale,
|
|
"P-078", new Object[]{l}));
|
|
}
|
|
locale = l;
|
|
}
|
|
|
|
/**
|
|
* Returns the diagnostic locale.
|
|
*/
|
|
public Locale getLocale() {
|
|
return locale;
|
|
}
|
|
|
|
/**
|
|
* Chooses a client locale to use for diagnostics, using the first
|
|
* language specified in the list that is supported by this parser.
|
|
* That locale is then set using <a href="#setLocale(java.util.Locale)">
|
|
* setLocale()</a>. Such a list could be provided by a variety of user
|
|
* preference mechanisms, including the HTTP <em>Accept-Language</em>
|
|
* header field.
|
|
*
|
|
* @param languages Array of language specifiers, ordered with the most
|
|
* preferable one at the front. For example, "en-ca" then "fr-ca",
|
|
* followed by "zh_CN". Both RFC 1766 and Java styles are supported.
|
|
* @return The chosen locale, or null.
|
|
* @see MessageCatalog
|
|
*/
|
|
public Locale chooseLocale(String languages [])
|
|
throws SAXException {
|
|
|
|
Locale l = messages.chooseLocale(languages);
|
|
|
|
if (l != null) {
|
|
setLocale(l);
|
|
}
|
|
return l;
|
|
}
|
|
|
|
/**
|
|
* Lets applications control entity resolution.
|
|
*/
|
|
public void setEntityResolver(EntityResolver r) {
|
|
|
|
resolver = r;
|
|
}
|
|
|
|
/**
|
|
* Returns the object used to resolve entities
|
|
*/
|
|
public EntityResolver getEntityResolver() {
|
|
|
|
return resolver;
|
|
}
|
|
|
|
/**
|
|
* Used by applications to set handling of DTD parsing events.
|
|
*/
|
|
public void setDtdHandler(DTDEventListener handler) {
|
|
dtdHandler = handler;
|
|
if (handler != null)
|
|
handler.setDocumentLocator(new Locator() {
|
|
public String getPublicId() {
|
|
return DTDParser.this.getPublicId();
|
|
}
|
|
|
|
public String getSystemId() {
|
|
return DTDParser.this.getSystemId();
|
|
}
|
|
|
|
public int getLineNumber() {
|
|
return DTDParser.this.getLineNumber();
|
|
}
|
|
|
|
public int getColumnNumber() {
|
|
return DTDParser.this.getColumnNumber();
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Returns the handler used to for DTD parsing events.
|
|
*/
|
|
public DTDEventListener getDtdHandler() {
|
|
return dtdHandler;
|
|
}
|
|
|
|
/**
|
|
* Parse a DTD.
|
|
*/
|
|
public void parse(InputSource in)
|
|
throws IOException, SAXException {
|
|
init();
|
|
parseInternal(in);
|
|
}
|
|
|
|
/**
|
|
* Parse a DTD.
|
|
*/
|
|
public void parse(String uri)
|
|
throws IOException, SAXException {
|
|
InputSource in;
|
|
|
|
init();
|
|
// System.out.println ("parse (\"" + uri + "\")");
|
|
in = resolver.resolveEntity(null, uri);
|
|
|
|
// If custom resolver punts resolution to parser, handle it ...
|
|
if (in == null) {
|
|
in = Resolver.createInputSource(new java.net.URL(uri), false);
|
|
|
|
// ... or if custom resolver doesn't correctly construct the
|
|
// input entity, patch it up enough so relative URIs work, and
|
|
// issue a warning to minimize later confusion.
|
|
} else if (in.getSystemId() == null) {
|
|
warning("P-065", null);
|
|
in.setSystemId(uri);
|
|
}
|
|
|
|
parseInternal(in);
|
|
}
|
|
|
|
// makes sure the parser is reset to "before a document"
|
|
private void init() {
|
|
in = null;
|
|
|
|
// alloc temporary data used in parsing
|
|
strTmp = new StringBuffer();
|
|
nameTmp = new char[20];
|
|
nameCache = new NameCache();
|
|
|
|
// reset doc info
|
|
// isInAttribute = false;
|
|
|
|
doLexicalPE = false;
|
|
|
|
entities.clear();
|
|
notations.clear();
|
|
params.clear();
|
|
// elements.clear ();
|
|
declaredElements.clear();
|
|
|
|
// initialize predefined references ... re-interpreted later
|
|
builtin("amp", "&");
|
|
builtin("lt", "<");
|
|
builtin("gt", ">");
|
|
builtin("quot", "\"");
|
|
builtin("apos", "'");
|
|
|
|
if (locale == null)
|
|
locale = Locale.getDefault();
|
|
if (resolver == null)
|
|
resolver = new Resolver();
|
|
if (dtdHandler == null)
|
|
dtdHandler = new DTDHandlerBase();
|
|
}
|
|
|
|
private void builtin(String entityName, String entityValue) {
|
|
InternalEntity entity;
|
|
entity = new InternalEntity(entityName, entityValue.toCharArray());
|
|
entities.put(entityName, entity);
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
//
|
|
// parsing is by recursive descent, code roughly
|
|
// following the BNF rules except tweaked for simple
|
|
// lookahead. rules are more or less in numeric order,
|
|
// except where code sharing suggests other structures.
|
|
//
|
|
// a classic benefit of recursive descent parsers: it's
|
|
// relatively easy to get diagnostics that make sense.
|
|
//
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
|
|
private void parseInternal(InputSource input)
|
|
throws IOException, SAXException {
|
|
|
|
if (input == null)
|
|
fatal("P-000");
|
|
|
|
try {
|
|
in = InputEntity.getInputEntity(dtdHandler, locale);
|
|
in.init(input, null, null, false);
|
|
|
|
dtdHandler.startDTD(in);
|
|
|
|
// [30] extSubset ::= TextDecl? extSubsetDecl
|
|
// [31] extSubsetDecl ::= ( markupdecl | conditionalSect
|
|
// | PEReference | S )*
|
|
// ... same as [79] extPE, which is where the code is
|
|
|
|
ExternalEntity externalSubset = new ExternalEntity(in);
|
|
externalParameterEntity(externalSubset);
|
|
|
|
if (!in.isEOF()) {
|
|
fatal("P-001", new Object[]
|
|
{Integer.toHexString(((int) getc()))});
|
|
}
|
|
afterRoot();
|
|
dtdHandler.endDTD();
|
|
|
|
} catch (EndOfInputException e) {
|
|
if (!in.isDocument()) {
|
|
String name = in.getName();
|
|
do { // force a relevant URI and line number
|
|
in = in.pop();
|
|
} while (in.isInternal());
|
|
fatal("P-002", new Object[]{name});
|
|
} else {
|
|
fatal("P-003", null);
|
|
}
|
|
} catch (RuntimeException e) {
|
|
// Don't discard location that triggered the exception
|
|
// ## Should properly wrap exception
|
|
System.err.print("Internal DTD parser error: "); // ##
|
|
e.printStackTrace();
|
|
throw new SAXParseException(e.getMessage() != null
|
|
? e.getMessage() : e.getClass().getName(),
|
|
getPublicId(), getSystemId(),
|
|
getLineNumber(), getColumnNumber());
|
|
|
|
} finally {
|
|
// recycle temporary data used during parsing
|
|
strTmp = null;
|
|
nameTmp = null;
|
|
nameCache = null;
|
|
|
|
// ditto input sources etc
|
|
if (in != null) {
|
|
in.close();
|
|
in = null;
|
|
}
|
|
|
|
// get rid of all DTD info ... some of it would be
|
|
// useful for editors etc, investigate later.
|
|
|
|
params.clear();
|
|
entities.clear();
|
|
notations.clear();
|
|
declaredElements.clear();
|
|
// elements.clear();
|
|
ids.clear();
|
|
}
|
|
}
|
|
|
|
void afterRoot() throws SAXException {
|
|
// Make sure all IDREFs match declared ID attributes. We scan
|
|
// after the document element is parsed, since XML allows forward
|
|
// references, and only now can we know if they're all resolved.
|
|
|
|
for (Enumeration e = ids.keys();
|
|
e.hasMoreElements();
|
|
) {
|
|
String id = (String) e.nextElement();
|
|
Boolean value = (Boolean) ids.get(id);
|
|
if (Boolean.FALSE == value)
|
|
error("V-024", new Object[]{id});
|
|
}
|
|
}
|
|
|
|
|
|
// role is for diagnostics
|
|
private void whitespace(String roleId)
|
|
throws IOException, SAXException {
|
|
|
|
// [3] S ::= (#x20 | #x9 | #xd | #xa)+
|
|
if (!maybeWhitespace()) {
|
|
fatal("P-004", new Object[]
|
|
{messages.getMessage(locale, roleId)});
|
|
}
|
|
}
|
|
|
|
// S?
|
|
private boolean maybeWhitespace()
|
|
throws IOException, SAXException {
|
|
|
|
if (!doLexicalPE)
|
|
return in.maybeWhitespace();
|
|
|
|
// see getc() for the PE logic -- this lets us splice
|
|
// expansions of PEs in "anywhere". getc() has smarts,
|
|
// so for external PEs we don't bypass it.
|
|
|
|
// XXX we can marginally speed PE handling, and certainly
|
|
// be cleaner (hence potentially more correct), by using
|
|
// the observations that expanded PEs only start and stop
|
|
// where whitespace is allowed. getc wouldn't need any
|
|
// "lexical" PE expansion logic, and no other method needs
|
|
// to handle termination of PEs. (parsing of literals would
|
|
// still need to pop entities, but not parsing of references
|
|
// in content.)
|
|
|
|
char c = getc();
|
|
boolean saw = false;
|
|
|
|
while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
|
saw = true;
|
|
|
|
// this gracefully ends things when we stop playing
|
|
// with internal parameters. caller should have a
|
|
// grammar rule allowing whitespace at end of entity.
|
|
if (in.isEOF() && !in.isInternal())
|
|
return saw;
|
|
c = getc();
|
|
}
|
|
ungetc();
|
|
return saw;
|
|
}
|
|
|
|
private String maybeGetName()
|
|
throws IOException, SAXException {
|
|
|
|
NameCacheEntry entry = maybeGetNameCacheEntry();
|
|
return (entry == null) ? null : entry.name;
|
|
}
|
|
|
|
private NameCacheEntry maybeGetNameCacheEntry()
|
|
throws IOException, SAXException {
|
|
|
|
// [5] Name ::= (Letter|'_'|':') (Namechar)*
|
|
char c = getc();
|
|
|
|
if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
|
|
ungetc();
|
|
return null;
|
|
}
|
|
return nameCharString(c);
|
|
}
|
|
|
|
// Used when parsing enumerations
|
|
private String getNmtoken()
|
|
throws IOException, SAXException {
|
|
|
|
// [7] Nmtoken ::= (Namechar)+
|
|
char c = getc();
|
|
if (!XmlChars.isNameChar(c))
|
|
fatal("P-006", new Object[]{new Character(c)});
|
|
return nameCharString(c).name;
|
|
}
|
|
|
|
// n.b. this gets used when parsing attribute values (for
|
|
// internal references) so we can't use strTmp; it's also
|
|
// a hotspot for CPU and memory in the parser (called at least
|
|
// once for each element) so this has been optimized a bit.
|
|
|
|
private NameCacheEntry nameCharString(char c)
|
|
throws IOException, SAXException {
|
|
|
|
int i = 1;
|
|
|
|
nameTmp[0] = c;
|
|
for (; ;) {
|
|
if ((c = in.getNameChar()) == 0)
|
|
break;
|
|
if (i >= nameTmp.length) {
|
|
char tmp [] = new char[nameTmp.length + 10];
|
|
System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
|
|
nameTmp = tmp;
|
|
}
|
|
nameTmp[i++] = c;
|
|
}
|
|
return nameCache.lookupEntry(nameTmp, i);
|
|
}
|
|
|
|
//
|
|
// much similarity between parsing entity values in DTD
|
|
// and attribute values (in DTD or content) ... both follow
|
|
// literal parsing rules, newline canonicalization, etc
|
|
//
|
|
// leaves value in 'strTmp' ... either a "replacement text" (4.5),
|
|
// or else partially normalized attribute value (the first bit
|
|
// of 3.3.3's spec, without the "if not CDATA" bits).
|
|
//
|
|
private void parseLiteral(boolean isEntityValue)
|
|
throws IOException, SAXException {
|
|
|
|
// [9] EntityValue ::=
|
|
// '"' ([^"&%] | Reference | PEReference)* '"'
|
|
// | "'" ([^'&%] | Reference | PEReference)* "'"
|
|
// [10] AttValue ::=
|
|
// '"' ([^"&] | Reference )* '"'
|
|
// | "'" ([^'&] | Reference )* "'"
|
|
char quote = getc();
|
|
char c;
|
|
InputEntity source = in;
|
|
|
|
if (quote != '\'' && quote != '"') {
|
|
fatal("P-007");
|
|
}
|
|
|
|
// don't report entity expansions within attributes,
|
|
// they're reported "fully expanded" via SAX
|
|
// isInAttribute = !isEntityValue;
|
|
|
|
// get value into strTmp
|
|
strTmp = new StringBuffer();
|
|
|
|
// scan, allowing entity push/pop wherever ...
|
|
// expanded entities can't terminate the literal!
|
|
for (; ;) {
|
|
if (in != source && in.isEOF()) {
|
|
// we don't report end of parsed entities
|
|
// within attributes (no SAX hooks)
|
|
in = in.pop();
|
|
continue;
|
|
}
|
|
if ((c = getc()) == quote && in == source) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Basically the "reference in attribute value"
|
|
// row of the chart in section 4.4 of the spec
|
|
//
|
|
if (c == '&') {
|
|
String entityName = maybeGetName();
|
|
|
|
if (entityName != null) {
|
|
nextChar(';', "F-020", entityName);
|
|
|
|
// 4.4 says: bypass these here ... we'll catch
|
|
// forbidden refs to unparsed entities on use
|
|
if (isEntityValue) {
|
|
strTmp.append('&');
|
|
strTmp.append(entityName);
|
|
strTmp.append(';');
|
|
continue;
|
|
}
|
|
expandEntityInLiteral(entityName, entities, isEntityValue);
|
|
|
|
|
|
// character references are always included immediately
|
|
} else if ((c = getc()) == '#') {
|
|
int tmp = parseCharNumber();
|
|
|
|
if (tmp > 0xffff) {
|
|
tmp = surrogatesToCharTmp(tmp);
|
|
strTmp.append(charTmp[0]);
|
|
if (tmp == 2)
|
|
strTmp.append(charTmp[1]);
|
|
} else
|
|
strTmp.append((char) tmp);
|
|
} else
|
|
fatal("P-009");
|
|
continue;
|
|
|
|
}
|
|
|
|
// expand parameter entities only within entity value literals
|
|
if (c == '%' && isEntityValue) {
|
|
String entityName = maybeGetName();
|
|
|
|
if (entityName != null) {
|
|
nextChar(';', "F-021", entityName);
|
|
expandEntityInLiteral(entityName, params, isEntityValue);
|
|
continue;
|
|
} else
|
|
fatal("P-011");
|
|
}
|
|
|
|
// For attribute values ...
|
|
if (!isEntityValue) {
|
|
// 3.3.3 says whitespace normalizes to space...
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
|
strTmp.append(' ');
|
|
continue;
|
|
}
|
|
|
|
// "<" not legal in parsed literals ...
|
|
if (c == '<')
|
|
fatal("P-012");
|
|
}
|
|
|
|
strTmp.append(c);
|
|
}
|
|
// isInAttribute = false;
|
|
}
|
|
|
|
// does a SINGLE expansion of the entity (often reparsed later)
|
|
private void expandEntityInLiteral(String name, SimpleHashtable table,
|
|
boolean isEntityValue)
|
|
throws IOException, SAXException {
|
|
|
|
Object entity = table.get(name);
|
|
|
|
if (entity instanceof InternalEntity) {
|
|
InternalEntity value = (InternalEntity) entity;
|
|
pushReader(value.buf, name, !value.isPE);
|
|
|
|
} else if (entity instanceof ExternalEntity) {
|
|
if (!isEntityValue) // must be a PE ...
|
|
fatal("P-013", new Object[]{name});
|
|
// XXX if this returns false ...
|
|
pushReader((ExternalEntity) entity);
|
|
|
|
} else if (entity == null) {
|
|
//
|
|
// Note: much confusion about whether spec requires such
|
|
// errors to be fatal in many cases, but none about whether
|
|
// it allows "normal" errors to be unrecoverable!
|
|
//
|
|
fatal((table == params) ? "V-022" : "P-014",
|
|
new Object[]{name});
|
|
}
|
|
}
|
|
|
|
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
|
|
// for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
|
|
|
|
// NOTE: XML spec should explicitly say that PE ref syntax is
|
|
// ignored in PIs, comments, SystemLiterals, and Pubid Literal
|
|
// values ... can't process the XML spec's own DTD without doing
|
|
// that for comments.
|
|
|
|
private String getQuotedString(String type, String extra)
|
|
throws IOException, SAXException {
|
|
|
|
// use in.getc to bypass PE processing
|
|
char quote = in.getc();
|
|
|
|
if (quote != '\'' && quote != '"')
|
|
fatal("P-015", new Object[]{
|
|
messages.getMessage(locale, type, new Object[]{extra})
|
|
});
|
|
|
|
char c;
|
|
|
|
strTmp = new StringBuffer();
|
|
while ((c = in.getc()) != quote)
|
|
strTmp.append((char) c);
|
|
return strTmp.toString();
|
|
}
|
|
|
|
|
|
private String parsePublicId() throws IOException, SAXException {
|
|
|
|
// [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
|
|
// [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
|
|
String retval = getQuotedString("F-033", null);
|
|
for (int i = 0; i < retval.length(); i++) {
|
|
char c = retval.charAt(i);
|
|
if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
|
|
&& !(c >= 'A' && c <= 'Z')
|
|
&& !(c >= 'a' && c <= 'z'))
|
|
fatal("P-016", new Object[]{new Character(c)});
|
|
}
|
|
strTmp = new StringBuffer();
|
|
strTmp.append(retval);
|
|
return normalize(false);
|
|
}
|
|
|
|
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
|
// handled by: InputEntity.parsedContent()
|
|
|
|
private boolean maybeComment(boolean skipStart)
|
|
throws IOException, SAXException {
|
|
|
|
// [15] Comment ::= '<!--'
|
|
// ( (Char - '-') | ('-' (Char - '-'))*
|
|
// '-->'
|
|
if (!in.peek(skipStart ? "!--" : "<!--", null))
|
|
return false;
|
|
|
|
boolean savedLexicalPE = doLexicalPE;
|
|
boolean saveCommentText;
|
|
|
|
doLexicalPE = false;
|
|
saveCommentText = false;
|
|
if (saveCommentText)
|
|
strTmp = new StringBuffer();
|
|
|
|
oneComment:
|
|
for (; ;) {
|
|
try {
|
|
// bypass PE expansion, but permit PEs
|
|
// to complete ... valid docs won't care.
|
|
for (; ;) {
|
|
int c = getc();
|
|
if (c == '-') {
|
|
c = getc();
|
|
if (c != '-') {
|
|
if (saveCommentText)
|
|
strTmp.append('-');
|
|
ungetc();
|
|
continue;
|
|
}
|
|
nextChar('>', "F-022", null);
|
|
break oneComment;
|
|
}
|
|
if (saveCommentText)
|
|
strTmp.append((char) c);
|
|
}
|
|
} catch (EndOfInputException e) {
|
|
//
|
|
// This is fatal EXCEPT when we're processing a PE...
|
|
// in which case a validating processor reports an error.
|
|
// External PEs are easy to detect; internal ones we
|
|
// infer by being an internal entity outside an element.
|
|
//
|
|
if (in.isInternal()) {
|
|
error("V-021", null);
|
|
}
|
|
fatal("P-017");
|
|
}
|
|
}
|
|
doLexicalPE = savedLexicalPE;
|
|
if (saveCommentText)
|
|
dtdHandler.comment(strTmp.toString());
|
|
return true;
|
|
}
|
|
|
|
private boolean maybePI(boolean skipStart)
|
|
throws IOException, SAXException {
|
|
|
|
// [16] PI ::= '<?' PITarget
|
|
// (S (Char* - (Char* '?>' Char*)))?
|
|
// '?>'
|
|
// [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
|
|
boolean savedLexicalPE = doLexicalPE;
|
|
|
|
if (!in.peek(skipStart ? "?" : "<?", null))
|
|
return false;
|
|
doLexicalPE = false;
|
|
|
|
String target = maybeGetName();
|
|
|
|
if (target == null) {
|
|
fatal("P-018");
|
|
}
|
|
if ("xml".equals(target)) {
|
|
fatal("P-019");
|
|
}
|
|
if ("xml".equalsIgnoreCase(target)) {
|
|
fatal("P-020", new Object[]{target});
|
|
}
|
|
|
|
if (maybeWhitespace()) {
|
|
strTmp = new StringBuffer();
|
|
try {
|
|
for (; ;) {
|
|
// use in.getc to bypass PE processing
|
|
char c = in.getc();
|
|
//Reached the end of PI.
|
|
if (c == '?' && in.peekc('>'))
|
|
break;
|
|
strTmp.append(c);
|
|
}
|
|
} catch (EndOfInputException e) {
|
|
fatal("P-021");
|
|
}
|
|
dtdHandler.processingInstruction(target, strTmp.toString());
|
|
} else {
|
|
if (!in.peek("?>", null)) {
|
|
fatal("P-022");
|
|
}
|
|
dtdHandler.processingInstruction(target, "");
|
|
}
|
|
|
|
doLexicalPE = savedLexicalPE;
|
|
return true;
|
|
}
|
|
|
|
// [18] CDSect ::= CDStart CData CDEnd
|
|
// [19] CDStart ::= '<![CDATA['
|
|
// [20] CData ::= (Char* - (Char* ']]>' Char*))
|
|
// [21] CDEnd ::= ']]>'
|
|
//
|
|
// ... handled by InputEntity.unparsedContent()
|
|
|
|
// collapsing several rules together ...
|
|
// simpler than attribute literals -- no reference parsing!
|
|
private String maybeReadAttribute(String name, boolean must)
|
|
throws IOException, SAXException {
|
|
|
|
// [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
|
|
// [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
|
|
// [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
|
|
if (!maybeWhitespace()) {
|
|
if (!must) {
|
|
return null;
|
|
}
|
|
fatal("P-024", new Object[]{name});
|
|
// NOTREACHED
|
|
}
|
|
|
|
if (!peek(name)) {
|
|
if (must) {
|
|
fatal("P-024", new Object[]{name});
|
|
} else {
|
|
// To ensure that the whitespace is there so that when we
|
|
// check for the next attribute we assure that the
|
|
// whitespace still exists.
|
|
ungetc();
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// [25] Eq ::= S? '=' S?
|
|
maybeWhitespace();
|
|
nextChar('=', "F-023", null);
|
|
maybeWhitespace();
|
|
|
|
return getQuotedString("F-035", name);
|
|
}
|
|
|
|
private void readVersion(boolean must, String versionNum)
|
|
throws IOException, SAXException {
|
|
|
|
String value = maybeReadAttribute("version", must);
|
|
|
|
// [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
|
|
|
|
if (must && value == null)
|
|
fatal("P-025", new Object[]{versionNum});
|
|
if (value != null) {
|
|
int length = value.length();
|
|
for (int i = 0; i < length; i++) {
|
|
char c = value.charAt(i);
|
|
if (!((c >= '0' && c <= '9')
|
|
|| c == '_' || c == '.'
|
|
|| (c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z')
|
|
|| c == ':' || c == '-')
|
|
)
|
|
fatal("P-026", new Object[]{value});
|
|
}
|
|
}
|
|
if (value != null && !value.equals(versionNum))
|
|
error("P-027", new Object[]{versionNum, value});
|
|
}
|
|
|
|
// common code used by most markup declarations
|
|
// ... S (Q)Name ...
|
|
private String getMarkupDeclname(String roleId, boolean qname)
|
|
throws IOException, SAXException {
|
|
|
|
String name;
|
|
|
|
whitespace(roleId);
|
|
name = maybeGetName();
|
|
if (name == null)
|
|
fatal("P-005", new Object[]
|
|
{messages.getMessage(locale, roleId)});
|
|
return name;
|
|
}
|
|
|
|
private boolean maybeMarkupDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [29] markupdecl ::= elementdecl | Attlistdecl
|
|
// | EntityDecl | NotationDecl | PI | Comment
|
|
return maybeElementDecl()
|
|
|| maybeAttlistDecl()
|
|
|| maybeEntityDecl()
|
|
|| maybeNotationDecl()
|
|
|| maybePI(false)
|
|
|| maybeComment(false);
|
|
}
|
|
|
|
private static final String XmlLang = "xml:lang";
|
|
|
|
private boolean isXmlLang(String value) {
|
|
|
|
// [33] LanguageId ::= Langcode ('-' Subcode)*
|
|
// [34] Langcode ::= ISO639Code | IanaCode | UserCode
|
|
// [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
|
|
// [36] IanaCode ::= [iI] '-' SubCode
|
|
// [37] UserCode ::= [xX] '-' SubCode
|
|
// [38] SubCode ::= [a-zA-Z]+
|
|
|
|
// the ISO and IANA codes (and subcodes) are registered,
|
|
// but that's neither a WF nor a validity constraint.
|
|
|
|
int nextSuffix;
|
|
char c;
|
|
|
|
if (value.length() < 2)
|
|
return false;
|
|
c = value.charAt(1);
|
|
if (c == '-') { // IANA, or user, code
|
|
c = value.charAt(0);
|
|
if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
|
|
return false;
|
|
nextSuffix = 1;
|
|
} else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
|
// 2 letter ISO code, or error
|
|
c = value.charAt(0);
|
|
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
|
|
return false;
|
|
nextSuffix = 2;
|
|
} else
|
|
return false;
|
|
|
|
// here "suffix" ::= '-' [a-zA-Z]+ suffix*
|
|
while (nextSuffix < value.length()) {
|
|
c = value.charAt(nextSuffix);
|
|
if (c != '-')
|
|
break;
|
|
while (++nextSuffix < value.length()) {
|
|
c = value.charAt(nextSuffix);
|
|
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
|
|
break;
|
|
}
|
|
}
|
|
return value.length() == nextSuffix && c != '-';
|
|
}
|
|
|
|
|
|
//
|
|
// CHAPTER 3: Logical Structures
|
|
//
|
|
|
|
/**
|
|
* To validate, subclassers should at this time make sure that
|
|
* values are of the declared types:<UL>
|
|
* <LI> ID and IDREF(S) values are Names
|
|
* <LI> NMTOKEN(S) are Nmtokens
|
|
* <LI> ENUMERATION values match one of the tokens
|
|
* <LI> NOTATION values match a notation name
|
|
* <LI> ENTITIY(IES) values match an unparsed external entity
|
|
* </UL>
|
|
* <p/>
|
|
* <P> Separately, make sure IDREF values match some ID
|
|
* provided in the document (in the afterRoot method).
|
|
*/
|
|
/* void validateAttributeSyntax (Attribute attr, String value)
|
|
throws DTDParseException {
|
|
// ID, IDREF(S) ... values are Names
|
|
if (Attribute.ID == attr.type()) {
|
|
if (!XmlNames.isName (value))
|
|
error ("V-025", new Object [] { value });
|
|
|
|
Boolean b = (Boolean) ids.getNonInterned (value);
|
|
if (b == null || b.equals (Boolean.FALSE))
|
|
ids.put (value.intern (), Boolean.TRUE);
|
|
else
|
|
error ("V-026", new Object [] { value });
|
|
|
|
} else if (Attribute.IDREF == attr.type()) {
|
|
if (!XmlNames.isName (value))
|
|
error ("V-027", new Object [] { value });
|
|
|
|
Boolean b = (Boolean) ids.getNonInterned (value);
|
|
if (b == null)
|
|
ids.put (value.intern (), Boolean.FALSE);
|
|
|
|
} else if (Attribute.IDREFS == attr.type()) {
|
|
StringTokenizer tokenizer = new StringTokenizer (value);
|
|
Boolean b;
|
|
boolean sawValue = false;
|
|
|
|
while (tokenizer.hasMoreTokens ()) {
|
|
value = tokenizer.nextToken ();
|
|
if (!XmlNames.isName (value))
|
|
error ("V-027", new Object [] { value });
|
|
b = (Boolean) ids.getNonInterned (value);
|
|
if (b == null)
|
|
ids.put (value.intern (), Boolean.FALSE);
|
|
sawValue = true;
|
|
}
|
|
if (!sawValue)
|
|
error ("V-039", null);
|
|
|
|
|
|
// NMTOKEN(S) ... values are Nmtoken(s)
|
|
} else if (Attribute.NMTOKEN == attr.type()) {
|
|
if (!XmlNames.isNmtoken (value))
|
|
error ("V-028", new Object [] { value });
|
|
|
|
} else if (Attribute.NMTOKENS == attr.type()) {
|
|
StringTokenizer tokenizer = new StringTokenizer (value);
|
|
boolean sawValue = false;
|
|
|
|
while (tokenizer.hasMoreTokens ()) {
|
|
value = tokenizer.nextToken ();
|
|
if (!XmlNames.isNmtoken (value))
|
|
error ("V-028", new Object [] { value });
|
|
sawValue = true;
|
|
}
|
|
if (!sawValue)
|
|
error ("V-032", null);
|
|
|
|
// ENUMERATION ... values match one of the tokens
|
|
} else if (Attribute.ENUMERATION == attr.type()) {
|
|
for (int i = 0; i < attr.values().length; i++)
|
|
if (value.equals (attr.values()[i]))
|
|
return;
|
|
error ("V-029", new Object [] { value });
|
|
|
|
// NOTATION values match a notation name
|
|
} else if (Attribute.NOTATION == attr.type()) {
|
|
//
|
|
// XXX XML 1.0 spec should probably list references to
|
|
// externally defined notations in standalone docs as
|
|
// validity errors. Ditto externally defined unparsed
|
|
// entities; neither should show up in attributes, else
|
|
// one needs to read the external declarations in order
|
|
// to make sense of the document (exactly what tagging
|
|
// a doc as "standalone" intends you won't need to do).
|
|
//
|
|
for (int i = 0; i < attr.values().length; i++)
|
|
if (value.equals (attr.values()[i]))
|
|
return;
|
|
error ("V-030", new Object [] { value });
|
|
|
|
// ENTITY(IES) values match an unparsed entity(ies)
|
|
} else if (Attribute.ENTITY == attr.type()) {
|
|
// see note above re standalone
|
|
if (!isUnparsedEntity (value))
|
|
error ("V-031", new Object [] { value });
|
|
|
|
} else if (Attribute.ENTITIES == attr.type()) {
|
|
StringTokenizer tokenizer = new StringTokenizer (value);
|
|
boolean sawValue = false;
|
|
|
|
while (tokenizer.hasMoreTokens ()) {
|
|
value = tokenizer.nextToken ();
|
|
// see note above re standalone
|
|
if (!isUnparsedEntity (value))
|
|
error ("V-031", new Object [] { value });
|
|
sawValue = true;
|
|
}
|
|
if (!sawValue)
|
|
error ("V-040", null);
|
|
|
|
} else if (Attribute.CDATA != attr.type())
|
|
throw new InternalError (attr.type());
|
|
}
|
|
*/
|
|
/*
|
|
private boolean isUnparsedEntity (String name)
|
|
{
|
|
Object e = entities.getNonInterned (name);
|
|
if (e == null || !(e instanceof ExternalEntity))
|
|
return false;
|
|
return ((ExternalEntity)e).notation != null;
|
|
}
|
|
*/
|
|
private boolean maybeElementDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
|
|
// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
|
|
InputEntity start = peekDeclaration("!ELEMENT");
|
|
|
|
if (start == null)
|
|
return false;
|
|
|
|
// n.b. for content models where inter-element whitespace is
|
|
// ignorable, we mark that fact here.
|
|
String name = getMarkupDeclname("F-015", true);
|
|
// Element element = (Element) elements.get (name);
|
|
// boolean declEffective = false;
|
|
|
|
/*
|
|
if (element != null) {
|
|
if (element.contentModel() != null) {
|
|
error ("V-012", new Object [] { name });
|
|
} // else <!ATTLIST name ...> came first
|
|
} else {
|
|
element = new Element(name);
|
|
elements.put (element.name(), element);
|
|
declEffective = true;
|
|
}
|
|
*/
|
|
if (declaredElements.contains(name))
|
|
error("V-012", new Object[]{name});
|
|
else {
|
|
declaredElements.add(name);
|
|
// declEffective = true;
|
|
}
|
|
|
|
short modelType;
|
|
whitespace("F-000");
|
|
if (peek(strEMPTY)) {
|
|
/// // leave element.contentModel as null for this case.
|
|
dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
|
|
} else if (peek(strANY)) {
|
|
/// element.setContentModel(new StringModel(StringModelType.ANY));
|
|
dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
|
|
} else {
|
|
modelType = getMixedOrChildren(name);
|
|
}
|
|
|
|
dtdHandler.endContentModel(name, modelType);
|
|
|
|
maybeWhitespace();
|
|
char c = getc();
|
|
if (c != '>')
|
|
fatal("P-036", new Object[]{name, new Character(c)});
|
|
if (start != in)
|
|
error("V-013", null);
|
|
|
|
/// dtdHandler.elementDecl(element);
|
|
|
|
return true;
|
|
}
|
|
|
|
// We're leaving the content model as a regular expression;
|
|
// it's an efficient natural way to express such things, and
|
|
// libraries often interpret them. No whitespace in the
|
|
// model we store, though!
|
|
|
|
/**
|
|
* returns content model type.
|
|
*/
|
|
private short getMixedOrChildren(String elementName/*Element element*/)
|
|
throws IOException, SAXException {
|
|
|
|
InputEntity start;
|
|
|
|
// [47] children ::= (choice|seq) ('?'|'*'|'+')?
|
|
strTmp = new StringBuffer();
|
|
|
|
nextChar('(', "F-028", elementName);
|
|
start = in;
|
|
maybeWhitespace();
|
|
strTmp.append('(');
|
|
|
|
short modelType;
|
|
if (peek("#PCDATA")) {
|
|
strTmp.append("#PCDATA");
|
|
dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
|
|
getMixed(elementName, start);
|
|
} else {
|
|
dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
|
|
getcps(elementName, start);
|
|
}
|
|
|
|
return modelType;
|
|
}
|
|
|
|
// '(' S? already consumed
|
|
// matching ')' must be in "start" entity if validating
|
|
private void getcps(/*Element element,*/String elementName, InputEntity start)
|
|
throws IOException, SAXException {
|
|
|
|
// [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
|
|
// [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
|
|
// [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
|
|
boolean decided = false;
|
|
char type = 0;
|
|
// ContentModel retval, temp, current;
|
|
|
|
// retval = temp = current = null;
|
|
|
|
dtdHandler.startModelGroup();
|
|
|
|
do {
|
|
String tag;
|
|
|
|
tag = maybeGetName();
|
|
if (tag != null) {
|
|
strTmp.append(tag);
|
|
// temp = new ElementModel(tag);
|
|
// getFrequency((RepeatableContent)temp);
|
|
///->
|
|
dtdHandler.childElement(tag, getFrequency());
|
|
///<-
|
|
} else if (peek("(")) {
|
|
InputEntity next = in;
|
|
strTmp.append('(');
|
|
maybeWhitespace();
|
|
// temp = getcps(element, next);
|
|
// getFrequency(temp);
|
|
///->
|
|
getcps(elementName, next);
|
|
/// getFrequency(); <- this looks like a bug
|
|
///<-
|
|
} else
|
|
fatal((type == 0) ? "P-039" :
|
|
((type == ',') ? "P-037" : "P-038"),
|
|
new Object[]{new Character(getc())});
|
|
|
|
maybeWhitespace();
|
|
if (decided) {
|
|
char c = getc();
|
|
|
|
// if (current != null) {
|
|
// current.addChild(temp);
|
|
// }
|
|
if (c == type) {
|
|
strTmp.append(type);
|
|
maybeWhitespace();
|
|
reportConnector(type);
|
|
continue;
|
|
} else if (c == '\u0029') { // rparen
|
|
ungetc();
|
|
continue;
|
|
} else {
|
|
fatal((type == 0) ? "P-041" : "P-040",
|
|
new Object[]{
|
|
new Character(c),
|
|
new Character(type)
|
|
});
|
|
}
|
|
} else {
|
|
type = getc();
|
|
switch (type) {
|
|
case '|':
|
|
case ',':
|
|
reportConnector(type);
|
|
break;
|
|
default:
|
|
// retval = temp;
|
|
ungetc();
|
|
continue;
|
|
}
|
|
// retval = (ContentModel)current;
|
|
decided = true;
|
|
// current.addChild(temp);
|
|
strTmp.append(type);
|
|
}
|
|
maybeWhitespace();
|
|
} while (!peek(")"));
|
|
|
|
if (in != start)
|
|
error("V-014", new Object[]{elementName});
|
|
strTmp.append(')');
|
|
|
|
dtdHandler.endModelGroup(getFrequency());
|
|
// return retval;
|
|
}
|
|
|
|
private void reportConnector(char type) throws SAXException {
|
|
switch (type) {
|
|
case '|':
|
|
dtdHandler.connector(DTDEventListener.CHOICE); ///<-
|
|
return;
|
|
case ',':
|
|
dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
|
|
return;
|
|
default:
|
|
throw new Error(); //assertion failed.
|
|
}
|
|
}
|
|
|
|
private short getFrequency()
|
|
throws IOException, SAXException {
|
|
|
|
final char c = getc();
|
|
|
|
if (c == '?') {
|
|
strTmp.append(c);
|
|
return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
|
|
// original.setRepeat(Repeat.ZERO_OR_ONE);
|
|
} else if (c == '+') {
|
|
strTmp.append(c);
|
|
return DTDEventListener.OCCURENCE_ONE_OR_MORE;
|
|
// original.setRepeat(Repeat.ONE_OR_MORE);
|
|
} else if (c == '*') {
|
|
strTmp.append(c);
|
|
return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
|
|
// original.setRepeat(Repeat.ZERO_OR_MORE);
|
|
} else {
|
|
ungetc();
|
|
return DTDEventListener.OCCURENCE_ONCE;
|
|
}
|
|
}
|
|
|
|
// '(' S? '#PCDATA' already consumed
|
|
// matching ')' must be in "start" entity if validating
|
|
private void getMixed(String elementName, /*Element element,*/ InputEntity start)
|
|
throws IOException, SAXException {
|
|
|
|
// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
|
|
// | '(' S? '#PCDATA' S? ')'
|
|
maybeWhitespace();
|
|
if (peek("\u0029*") || peek("\u0029")) {
|
|
if (in != start)
|
|
error("V-014", new Object[]{elementName});
|
|
strTmp.append(')');
|
|
// element.setContentModel(new StringModel(StringModelType.PCDATA));
|
|
return;
|
|
}
|
|
|
|
ArrayList l = new ArrayList();
|
|
// l.add(new StringModel(StringModelType.PCDATA));
|
|
|
|
|
|
while (peek("|")) {
|
|
String name;
|
|
|
|
strTmp.append('|');
|
|
maybeWhitespace();
|
|
|
|
doLexicalPE = true;
|
|
name = maybeGetName();
|
|
if (name == null)
|
|
fatal("P-042", new Object[]
|
|
{elementName, Integer.toHexString(getc())});
|
|
if (l.contains(name)) {
|
|
error("V-015", new Object[]{name});
|
|
} else {
|
|
l.add(name);
|
|
dtdHandler.mixedElement(name);
|
|
}
|
|
strTmp.append(name);
|
|
maybeWhitespace();
|
|
}
|
|
|
|
if (!peek("\u0029*")) // right paren
|
|
fatal("P-043", new Object[]
|
|
{elementName, new Character(getc())});
|
|
if (in != start)
|
|
error("V-014", new Object[]{elementName});
|
|
strTmp.append(')');
|
|
// ChoiceModel cm = new ChoiceModel((Collection)l);
|
|
// cm.setRepeat(Repeat.ZERO_OR_MORE);
|
|
// element.setContentModel(cm);
|
|
}
|
|
|
|
private boolean maybeAttlistDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
|
|
InputEntity start = peekDeclaration("!ATTLIST");
|
|
|
|
if (start == null)
|
|
return false;
|
|
|
|
String elementName = getMarkupDeclname("F-016", true);
|
|
// Element element = (Element) elements.get (name);
|
|
|
|
// if (element == null) {
|
|
// // not yet declared -- no problem.
|
|
// element = new Element(name);
|
|
// elements.put(name, element);
|
|
// }
|
|
|
|
while (!peek(">")) {
|
|
|
|
// [53] AttDef ::= S Name S AttType S DefaultDecl
|
|
// [54] AttType ::= StringType | TokenizedType | EnumeratedType
|
|
|
|
// look for global attribute definitions, don't expand for now...
|
|
maybeWhitespace();
|
|
char c = getc();
|
|
if (c == '%') {
|
|
String entityName = maybeGetName();
|
|
if (entityName != null) {
|
|
nextChar(';', "F-021", entityName);
|
|
whitespace("F-021");
|
|
continue;
|
|
} else
|
|
fatal("P-011");
|
|
}
|
|
|
|
ungetc();
|
|
// look for attribute name otherwise
|
|
String attName = maybeGetName();
|
|
if (attName == null) {
|
|
fatal("P-044", new Object[]{new Character(getc())});
|
|
}
|
|
whitespace("F-001");
|
|
|
|
/// Attribute a = new Attribute (name);
|
|
|
|
String typeName;
|
|
Vector values = null; // notation/enumeration values
|
|
|
|
// Note: use the type constants from Attribute
|
|
// so that "==" may be used (faster)
|
|
|
|
// [55] StringType ::= 'CDATA'
|
|
if (peek(TYPE_CDATA))
|
|
/// a.setType(Attribute.CDATA);
|
|
typeName = TYPE_CDATA;
|
|
|
|
// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
|
|
// | 'ENTITY' | 'ENTITIES'
|
|
// | 'NMTOKEN' | 'NMTOKENS'
|
|
// n.b. if "IDREFS" is there, both "ID" and "IDREF"
|
|
// match peekahead ... so this order matters!
|
|
else if (peek(TYPE_IDREFS))
|
|
typeName = TYPE_IDREFS;
|
|
else if (peek(TYPE_IDREF))
|
|
typeName = TYPE_IDREF;
|
|
else if (peek(TYPE_ID)) {
|
|
typeName = TYPE_ID;
|
|
// TODO: should implement this error check?
|
|
/// if (element.id() != null) {
|
|
/// error ("V-016", new Object [] { element.id() });
|
|
/// } else
|
|
/// element.setId(name);
|
|
} else if (peek(TYPE_ENTITY))
|
|
typeName = TYPE_ENTITY;
|
|
else if (peek(TYPE_ENTITIES))
|
|
typeName = TYPE_ENTITIES;
|
|
else if (peek(TYPE_NMTOKENS))
|
|
typeName = TYPE_NMTOKENS;
|
|
else if (peek(TYPE_NMTOKEN))
|
|
typeName = TYPE_NMTOKEN;
|
|
|
|
// [57] EnumeratedType ::= NotationType | Enumeration
|
|
// [58] NotationType ::= 'NOTATION' S '(' S? Name
|
|
// (S? '|' S? Name)* S? ')'
|
|
else if (peek(TYPE_NOTATION)) {
|
|
typeName = TYPE_NOTATION;
|
|
whitespace("F-002");
|
|
nextChar('(', "F-029", null);
|
|
maybeWhitespace();
|
|
|
|
values = new Vector();
|
|
do {
|
|
String name;
|
|
if ((name = maybeGetName()) == null)
|
|
fatal("P-068");
|
|
// permit deferred declarations
|
|
if (notations.get(name) == null)
|
|
notations.put(name, name);
|
|
values.addElement(name);
|
|
maybeWhitespace();
|
|
if (peek("|"))
|
|
maybeWhitespace();
|
|
} while (!peek(")"));
|
|
/// a.setValues(new String [v.size ()]);
|
|
/// for (int i = 0; i < v.size (); i++)
|
|
/// a.setValue(i, (String)v.elementAt(i));
|
|
|
|
// [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
|
|
} else if (peek("(")) {
|
|
/// a.setType(Attribute.ENUMERATION);
|
|
typeName = TYPE_ENUMERATION;
|
|
|
|
maybeWhitespace();
|
|
|
|
/// Vector v = new Vector ();
|
|
values = new Vector();
|
|
do {
|
|
String name = getNmtoken();
|
|
/// v.addElement (name);
|
|
values.addElement(name);
|
|
maybeWhitespace();
|
|
if (peek("|"))
|
|
maybeWhitespace();
|
|
} while (!peek(")"));
|
|
/// a.setValues(new String [v.size ()]);
|
|
/// for (int i = 0; i < v.size (); i++)
|
|
/// a.setValue(i, (String)v.elementAt(i));
|
|
} else {
|
|
fatal("P-045",
|
|
new Object[]{attName, new Character(getc())});
|
|
typeName = null;
|
|
}
|
|
|
|
short attributeUse;
|
|
String defaultValue = null;
|
|
|
|
// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
|
|
// | (('#FIXED' S)? AttValue)
|
|
whitespace("F-003");
|
|
if (peek("#REQUIRED"))
|
|
attributeUse = DTDEventListener.USE_REQUIRED;
|
|
/// a.setIsRequired(true);
|
|
else if (peek("#FIXED")) {
|
|
/// if (a.type() == Attribute.ID)
|
|
if (typeName == TYPE_ID)
|
|
error("V-017", new Object[]{attName});
|
|
/// a.setIsFixed(true);
|
|
attributeUse = DTDEventListener.USE_FIXED;
|
|
whitespace("F-004");
|
|
parseLiteral(false);
|
|
/// if (a.type() != Attribute.CDATA)
|
|
/// a.setDefaultValue(normalize(false));
|
|
/// else
|
|
/// a.setDefaultValue(strTmp.toString());
|
|
|
|
if (typeName == TYPE_CDATA)
|
|
defaultValue = normalize(false);
|
|
else
|
|
defaultValue = strTmp.toString();
|
|
|
|
// TODO: implement this check
|
|
/// if (a.type() != Attribute.CDATA)
|
|
/// validateAttributeSyntax (a, a.defaultValue());
|
|
} else if (!peek("#IMPLIED")) {
|
|
attributeUse = DTDEventListener.USE_IMPLIED;
|
|
|
|
/// if (a.type() == Attribute.ID)
|
|
if (typeName == TYPE_ID)
|
|
error("V-018", new Object[]{attName});
|
|
parseLiteral(false);
|
|
/// if (a.type() != Attribute.CDATA)
|
|
/// a.setDefaultValue(normalize(false));
|
|
/// else
|
|
/// a.setDefaultValue(strTmp.toString());
|
|
if (typeName == TYPE_CDATA)
|
|
defaultValue = normalize(false);
|
|
else
|
|
defaultValue = strTmp.toString();
|
|
|
|
// TODO: implement this check
|
|
/// if (a.type() != Attribute.CDATA)
|
|
/// validateAttributeSyntax (a, a.defaultValue());
|
|
} else {
|
|
// TODO: this looks like an fatal error.
|
|
attributeUse = DTDEventListener.USE_NORMAL;
|
|
}
|
|
|
|
if (XmlLang.equals(attName)
|
|
&& defaultValue/* a.defaultValue()*/ != null
|
|
&& !isXmlLang(defaultValue/*a.defaultValue()*/))
|
|
error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
|
|
|
|
// TODO: isn't it an error to specify the same attribute twice?
|
|
/// if (!element.attributes().contains(a)) {
|
|
/// element.addAttribute(a);
|
|
/// dtdHandler.attributeDecl(a);
|
|
/// }
|
|
|
|
String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
|
|
dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
|
|
maybeWhitespace();
|
|
}
|
|
if (start != in)
|
|
error("V-013", null);
|
|
return true;
|
|
}
|
|
|
|
// used when parsing literal attribute values,
|
|
// or public identifiers.
|
|
//
|
|
// input in strTmp
|
|
private String normalize(boolean invalidIfNeeded) {
|
|
|
|
// this can allocate an extra string...
|
|
|
|
String s = strTmp.toString();
|
|
String s2 = s.trim();
|
|
boolean didStrip = false;
|
|
|
|
if (s != s2) {
|
|
s = s2;
|
|
s2 = null;
|
|
didStrip = true;
|
|
}
|
|
strTmp = new StringBuffer();
|
|
for (int i = 0; i < s.length(); i++) {
|
|
char c = s.charAt(i);
|
|
if (!XmlChars.isSpace(c)) {
|
|
strTmp.append(c);
|
|
continue;
|
|
}
|
|
strTmp.append(' ');
|
|
while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
|
|
didStrip = true;
|
|
i--;
|
|
}
|
|
if (didStrip)
|
|
return strTmp.toString();
|
|
else
|
|
return s;
|
|
}
|
|
|
|
private boolean maybeConditionalSect()
|
|
throws IOException, SAXException {
|
|
|
|
// [61] conditionalSect ::= includeSect | ignoreSect
|
|
|
|
if (!peek("<!["))
|
|
return false;
|
|
|
|
String keyword;
|
|
InputEntity start = in;
|
|
|
|
maybeWhitespace();
|
|
|
|
if ((keyword = maybeGetName()) == null)
|
|
fatal("P-046");
|
|
maybeWhitespace();
|
|
nextChar('[', "F-030", null);
|
|
|
|
// [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
|
|
// extSubsetDecl ']]>'
|
|
if ("INCLUDE".equals(keyword)) {
|
|
for (; ;) {
|
|
while (in.isEOF() && in != start)
|
|
in = in.pop();
|
|
if (in.isEOF()) {
|
|
error("V-020", null);
|
|
}
|
|
if (peek("]]>"))
|
|
break;
|
|
|
|
doLexicalPE = false;
|
|
if (maybeWhitespace())
|
|
continue;
|
|
if (maybePEReference())
|
|
continue;
|
|
doLexicalPE = true;
|
|
if (maybeMarkupDecl() || maybeConditionalSect())
|
|
continue;
|
|
|
|
fatal("P-047");
|
|
}
|
|
|
|
// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
|
|
// ignoreSectcontents ']]>'
|
|
// [64] ignoreSectcontents ::= Ignore ('<!['
|
|
// ignoreSectcontents ']]>' Ignore)*
|
|
// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
|
|
} else if ("IGNORE".equals(keyword)) {
|
|
int nestlevel = 1;
|
|
// ignoreSectcontents
|
|
doLexicalPE = false;
|
|
while (nestlevel > 0) {
|
|
char c = getc(); // will pop input entities
|
|
if (c == '<') {
|
|
if (peek("!["))
|
|
nestlevel++;
|
|
} else if (c == ']') {
|
|
if (peek("]>"))
|
|
nestlevel--;
|
|
} else
|
|
continue;
|
|
}
|
|
} else
|
|
fatal("P-048", new Object[]{keyword});
|
|
return true;
|
|
}
|
|
|
|
|
|
//
|
|
// CHAPTER 4: Physical Structures
|
|
//
|
|
|
|
// parse decimal or hex numeric character reference
|
|
private int parseCharNumber()
|
|
throws IOException, SAXException {
|
|
|
|
char c;
|
|
int retval = 0;
|
|
|
|
// n.b. we ignore overflow ...
|
|
if (getc() != 'x') {
|
|
ungetc();
|
|
for (; ;) {
|
|
c = getc();
|
|
if (c >= '0' && c <= '9') {
|
|
retval *= 10;
|
|
retval += (c - '0');
|
|
continue;
|
|
}
|
|
if (c == ';')
|
|
return retval;
|
|
fatal("P-049");
|
|
}
|
|
} else
|
|
for (; ;) {
|
|
c = getc();
|
|
if (c >= '0' && c <= '9') {
|
|
retval <<= 4;
|
|
retval += (c - '0');
|
|
continue;
|
|
}
|
|
if (c >= 'a' && c <= 'f') {
|
|
retval <<= 4;
|
|
retval += 10 + (c - 'a');
|
|
continue;
|
|
}
|
|
if (c >= 'A' && c <= 'F') {
|
|
retval <<= 4;
|
|
retval += 10 + (c - 'A');
|
|
continue;
|
|
}
|
|
if (c == ';')
|
|
return retval;
|
|
fatal("P-050");
|
|
}
|
|
}
|
|
|
|
// parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
|
|
// though still subject to the 'Char' construct in XML
|
|
private int surrogatesToCharTmp(int ucs4)
|
|
throws SAXException {
|
|
|
|
if (ucs4 <= 0xffff) {
|
|
if (XmlChars.isChar(ucs4)) {
|
|
charTmp[0] = (char) ucs4;
|
|
return 1;
|
|
}
|
|
} else if (ucs4 <= 0x0010ffff) {
|
|
// we represent these as UNICODE surrogate pairs
|
|
ucs4 -= 0x10000;
|
|
charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
|
|
charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
|
|
return 2;
|
|
}
|
|
fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
|
|
// NOTREACHED
|
|
return -1;
|
|
}
|
|
|
|
private boolean maybePEReference()
|
|
throws IOException, SAXException {
|
|
|
|
// This is the SYNTACTIC version of this construct.
|
|
// When processing external entities, there is also
|
|
// a LEXICAL version; see getc() and doLexicalPE.
|
|
|
|
// [69] PEReference ::= '%' Name ';'
|
|
if (!in.peekc('%'))
|
|
return false;
|
|
|
|
String name = maybeGetName();
|
|
Object entity;
|
|
|
|
if (name == null)
|
|
fatal("P-011");
|
|
nextChar(';', "F-021", name);
|
|
entity = params.get(name);
|
|
|
|
if (entity instanceof InternalEntity) {
|
|
InternalEntity value = (InternalEntity) entity;
|
|
pushReader(value.buf, name, false);
|
|
|
|
} else if (entity instanceof ExternalEntity) {
|
|
pushReader((ExternalEntity) entity);
|
|
externalParameterEntity((ExternalEntity) entity);
|
|
|
|
} else if (entity == null) {
|
|
error("V-022", new Object[]{name});
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private boolean maybeEntityDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [70] EntityDecl ::= GEDecl | PEDecl
|
|
// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
|
|
// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
|
|
// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
|
|
// [74] PEDef ::= EntityValue | ExternalID
|
|
//
|
|
InputEntity start = peekDeclaration("!ENTITY");
|
|
|
|
if (start == null)
|
|
return false;
|
|
|
|
String entityName;
|
|
SimpleHashtable defns;
|
|
ExternalEntity externalId;
|
|
boolean doStore;
|
|
|
|
// PE expansion gets selectively turned off several places:
|
|
// in ENTITY declarations (here), in comments, in PIs.
|
|
|
|
// Here, we allow PE entities to be declared, and allows
|
|
// literals to include PE refs without the added spaces
|
|
// required with their expansion in markup decls.
|
|
|
|
doLexicalPE = false;
|
|
whitespace("F-005");
|
|
if (in.peekc('%')) {
|
|
whitespace("F-006");
|
|
defns = params;
|
|
} else
|
|
defns = entities;
|
|
|
|
ungetc(); // leave some whitespace
|
|
doLexicalPE = true;
|
|
entityName = getMarkupDeclname("F-017", false);
|
|
whitespace("F-007");
|
|
externalId = maybeExternalID();
|
|
|
|
//
|
|
// first definition sticks ... e.g. internal subset PEs are used
|
|
// to override DTD defaults. It's also an "error" to incorrectly
|
|
// redefine builtin internal entities, but since reporting such
|
|
// errors is optional we only give warnings ("just in case") for
|
|
// non-parameter entities.
|
|
//
|
|
doStore = (defns.get(entityName) == null);
|
|
if (!doStore && defns == entities)
|
|
warning("P-054", new Object[]{entityName});
|
|
|
|
// internal entities
|
|
if (externalId == null) {
|
|
char value [];
|
|
InternalEntity entity;
|
|
|
|
doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
|
|
parseLiteral(true);
|
|
doLexicalPE = true;
|
|
if (doStore) {
|
|
value = new char[strTmp.length()];
|
|
if (value.length != 0)
|
|
strTmp.getChars(0, value.length, value, 0);
|
|
entity = new InternalEntity(entityName, value);
|
|
entity.isPE = (defns == params);
|
|
entity.isFromInternalSubset = false;
|
|
defns.put(entityName, entity);
|
|
if (defns == entities)
|
|
dtdHandler.internalGeneralEntityDecl(entityName,
|
|
new String(value));
|
|
}
|
|
|
|
// external entities (including unparsed)
|
|
} else {
|
|
// [76] NDataDecl ::= S 'NDATA' S Name
|
|
if (defns == entities && maybeWhitespace()
|
|
&& peek("NDATA")) {
|
|
externalId.notation = getMarkupDeclname("F-018", false);
|
|
|
|
// flag undeclared notation for checking after
|
|
// the DTD is fully processed
|
|
if (notations.get(externalId.notation) == null)
|
|
notations.put(externalId.notation, Boolean.TRUE);
|
|
}
|
|
externalId.name = entityName;
|
|
externalId.isPE = (defns == params);
|
|
externalId.isFromInternalSubset = false;
|
|
if (doStore) {
|
|
defns.put(entityName, externalId);
|
|
if (externalId.notation != null)
|
|
dtdHandler.unparsedEntityDecl(entityName,
|
|
externalId.publicId, externalId.systemId,
|
|
externalId.notation);
|
|
else if (defns == entities)
|
|
dtdHandler.externalGeneralEntityDecl(entityName,
|
|
externalId.publicId, externalId.systemId);
|
|
}
|
|
}
|
|
maybeWhitespace();
|
|
nextChar('>', "F-031", entityName);
|
|
if (start != in)
|
|
error("V-013", null);
|
|
return true;
|
|
}
|
|
|
|
private ExternalEntity maybeExternalID()
|
|
throws IOException, SAXException {
|
|
|
|
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
|
|
// | 'PUBLIC' S' PubidLiteral S Systemliteral
|
|
String temp = null;
|
|
ExternalEntity retval;
|
|
|
|
if (peek("PUBLIC")) {
|
|
whitespace("F-009");
|
|
temp = parsePublicId();
|
|
} else if (!peek("SYSTEM"))
|
|
return null;
|
|
|
|
retval = new ExternalEntity(in);
|
|
retval.publicId = temp;
|
|
whitespace("F-008");
|
|
retval.systemId = parseSystemId();
|
|
return retval;
|
|
}
|
|
|
|
private String parseSystemId()
|
|
throws IOException, SAXException {
|
|
|
|
String uri = getQuotedString("F-034", null);
|
|
int temp = uri.indexOf(':');
|
|
|
|
// resolve relative URIs ... must do it here since
|
|
// it's relative to the source file holding the URI!
|
|
|
|
// "new java.net.URL (URL, string)" conforms to RFC 1630,
|
|
// but we can't use that except when the URI is a URL.
|
|
// The entity resolver is allowed to handle URIs that are
|
|
// not URLs, so we pass URIs through with scheme intact
|
|
if (temp == -1 || uri.indexOf('/') < temp) {
|
|
String baseURI;
|
|
|
|
baseURI = in.getSystemId();
|
|
if (baseURI == null)
|
|
fatal("P-055", new Object[]{uri});
|
|
if (uri.length() == 0)
|
|
uri = ".";
|
|
baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
|
|
if (uri.charAt(0) != '/')
|
|
uri = baseURI + uri;
|
|
else {
|
|
// XXX slashes at the beginning of a relative URI are
|
|
// a special case we don't handle.
|
|
throw new InternalError();
|
|
}
|
|
|
|
// letting other code map any "/xxx/../" or "/./" to "/",
|
|
// since all URIs must handle it the same.
|
|
}
|
|
// check for fragment ID in URI
|
|
if (uri.indexOf('#') != -1)
|
|
error("P-056", new Object[]{uri});
|
|
return uri;
|
|
}
|
|
|
|
private void maybeTextDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
|
|
if (peek("<?xml")) {
|
|
readVersion(false, "1.0");
|
|
readEncoding(true);
|
|
maybeWhitespace();
|
|
if (!peek("?>"))
|
|
fatal("P-057");
|
|
}
|
|
}
|
|
|
|
private void externalParameterEntity(ExternalEntity next)
|
|
throws IOException, SAXException {
|
|
|
|
//
|
|
// Reap the intended benefits of standalone declarations:
|
|
// don't deal with external parameter entities, except to
|
|
// validate the standalone declaration.
|
|
//
|
|
|
|
// n.b. "in external parameter entities" (and external
|
|
// DTD subset, same grammar) parameter references can
|
|
// occur "within" markup declarations ... expansions can
|
|
// cross syntax rules. Flagged here; affects getc().
|
|
|
|
// [79] ExtPE ::= TextDecl? extSubsetDecl
|
|
// [31] extSubsetDecl ::= ( markupdecl | conditionalSect
|
|
// | PEReference | S )*
|
|
InputEntity pe;
|
|
|
|
// XXX if this returns false ...
|
|
|
|
pe = in;
|
|
maybeTextDecl();
|
|
while (!pe.isEOF()) {
|
|
// pop internal PEs (and whitespace before/after)
|
|
if (in.isEOF()) {
|
|
in = in.pop();
|
|
continue;
|
|
}
|
|
doLexicalPE = false;
|
|
if (maybeWhitespace())
|
|
continue;
|
|
if (maybePEReference())
|
|
continue;
|
|
doLexicalPE = true;
|
|
if (maybeMarkupDecl() || maybeConditionalSect())
|
|
continue;
|
|
break;
|
|
}
|
|
// if (in != pe) throw new InternalError("who popped my PE?");
|
|
if (!pe.isEOF())
|
|
fatal("P-059", new Object[]{in.getName()});
|
|
}
|
|
|
|
private void readEncoding(boolean must)
|
|
throws IOException, SAXException {
|
|
|
|
// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
|
|
String name = maybeReadAttribute("encoding", must);
|
|
|
|
if (name == null)
|
|
return;
|
|
for (int i = 0; i < name.length(); i++) {
|
|
char c = name.charAt(i);
|
|
if ((c >= 'A' && c <= 'Z')
|
|
|| (c >= 'a' && c <= 'z'))
|
|
continue;
|
|
if (i != 0
|
|
&& ((c >= '0' && c <= '9')
|
|
|| c == '-'
|
|
|| c == '_'
|
|
|| c == '.'
|
|
))
|
|
continue;
|
|
fatal("P-060", new Object[]{new Character(c)});
|
|
}
|
|
|
|
//
|
|
// This should be the encoding in use, and it's even an error for
|
|
// it to be anything else (in certain cases that are impractical to
|
|
// to test, and may even be insufficient). So, we do the best we
|
|
// can, and warn if things look suspicious. Note that Java doesn't
|
|
// uniformly expose the encodings, and that the names it uses
|
|
// internally are nonstandard. Also, that the XML spec allows
|
|
// such "errors" not to be reported at all.
|
|
//
|
|
String currentEncoding = in.getEncoding();
|
|
|
|
if (currentEncoding != null
|
|
&& !name.equalsIgnoreCase(currentEncoding))
|
|
warning("P-061", new Object[]{name, currentEncoding});
|
|
}
|
|
|
|
private boolean maybeNotationDecl()
|
|
throws IOException, SAXException {
|
|
|
|
// [82] NotationDecl ::= '<!NOTATION' S Name S
|
|
// (ExternalID | PublicID) S? '>'
|
|
// [83] PublicID ::= 'PUBLIC' S PubidLiteral
|
|
InputEntity start = peekDeclaration("!NOTATION");
|
|
|
|
if (start == null)
|
|
return false;
|
|
|
|
String name = getMarkupDeclname("F-019", false);
|
|
ExternalEntity entity = new ExternalEntity(in);
|
|
|
|
whitespace("F-011");
|
|
if (peek("PUBLIC")) {
|
|
whitespace("F-009");
|
|
entity.publicId = parsePublicId();
|
|
if (maybeWhitespace()) {
|
|
if (!peek(">"))
|
|
entity.systemId = parseSystemId();
|
|
else
|
|
ungetc();
|
|
}
|
|
} else if (peek("SYSTEM")) {
|
|
whitespace("F-008");
|
|
entity.systemId = parseSystemId();
|
|
} else
|
|
fatal("P-062");
|
|
maybeWhitespace();
|
|
nextChar('>', "F-032", name);
|
|
if (start != in)
|
|
error("V-013", null);
|
|
if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
|
|
error("P-056", new Object[]{entity.systemId});
|
|
|
|
Object value = notations.get(name);
|
|
if (value != null && value instanceof ExternalEntity)
|
|
warning("P-063", new Object[]{name});
|
|
|
|
else {
|
|
notations.put(name, entity);
|
|
dtdHandler.notationDecl(name, entity.publicId,
|
|
entity.systemId);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
//
|
|
// UTILITIES
|
|
//
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
private char getc() throws IOException, SAXException {
|
|
|
|
if (!doLexicalPE) {
|
|
char c = in.getc();
|
|
return c;
|
|
}
|
|
|
|
//
|
|
// External parameter entities get funky processing of '%param;'
|
|
// references. It's not clearly defined in the XML spec; but it
|
|
// boils down to having those refs be _lexical_ in most cases to
|
|
// include partial syntax productions. It also needs selective
|
|
// enabling; "<!ENTITY % foo ...>" must work, for example, and
|
|
// if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
|
|
// if it's expanded in a literal, else "ab cd". PEs also do
|
|
// not expand within comments or PIs, and external PEs are only
|
|
// allowed to have markup decls (and so aren't handled lexically).
|
|
//
|
|
// This PE handling should be merged into maybeWhitespace, where
|
|
// it can be dealt with more consistently.
|
|
//
|
|
// Also, there are some validity constraints in this area.
|
|
//
|
|
char c;
|
|
|
|
while (in.isEOF()) {
|
|
if (in.isInternal() || (doLexicalPE && !in.isDocument()))
|
|
in = in.pop();
|
|
else {
|
|
fatal("P-064", new Object[]{in.getName()});
|
|
}
|
|
}
|
|
if ((c = in.getc()) == '%' && doLexicalPE) {
|
|
// PE ref ::= '%' name ';'
|
|
String name = maybeGetName();
|
|
Object entity;
|
|
|
|
if (name == null)
|
|
fatal("P-011");
|
|
nextChar(';', "F-021", name);
|
|
entity = params.get(name);
|
|
|
|
// push a magic "entity" before and after the
|
|
// real one, so ungetc() behaves uniformly
|
|
pushReader(" ".toCharArray(), null, false);
|
|
if (entity instanceof InternalEntity)
|
|
pushReader(((InternalEntity) entity).buf, name, false);
|
|
else if (entity instanceof ExternalEntity)
|
|
// PEs can't be unparsed!
|
|
// XXX if this returns false ...
|
|
pushReader((ExternalEntity) entity);
|
|
else if (entity == null)
|
|
// see note in maybePEReference re making this be nonfatal.
|
|
fatal("V-022");
|
|
else
|
|
throw new InternalError();
|
|
pushReader(" ".toCharArray(), null, false);
|
|
return in.getc();
|
|
}
|
|
return c;
|
|
}
|
|
|
|
private void ungetc() {
|
|
|
|
in.ungetc();
|
|
}
|
|
|
|
private boolean peek(String s)
|
|
throws IOException, SAXException {
|
|
|
|
return in.peek(s, null);
|
|
}
|
|
|
|
// Return the entity starting the specified declaration
|
|
// (for validating declaration nesting) else null.
|
|
|
|
private InputEntity peekDeclaration(String s)
|
|
throws IOException, SAXException {
|
|
|
|
InputEntity start;
|
|
|
|
if (!in.peekc('<'))
|
|
return null;
|
|
start = in;
|
|
if (in.peek(s, null))
|
|
return start;
|
|
in.ungetc();
|
|
return null;
|
|
}
|
|
|
|
private void nextChar(char c, String location, String near)
|
|
throws IOException, SAXException {
|
|
|
|
while (in.isEOF() && !in.isDocument())
|
|
in = in.pop();
|
|
if (!in.peekc(c))
|
|
fatal("P-008", new Object[]
|
|
{new Character(c),
|
|
messages.getMessage(locale, location),
|
|
(near == null ? "" : ('"' + near + '"'))});
|
|
}
|
|
|
|
|
|
private void pushReader(char buf [], String name, boolean isGeneral)
|
|
throws SAXException {
|
|
|
|
InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
|
|
r.init(buf, name, in, !isGeneral);
|
|
in = r;
|
|
}
|
|
|
|
private boolean pushReader(ExternalEntity next)
|
|
throws IOException, SAXException {
|
|
|
|
InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
|
|
InputSource s;
|
|
try {
|
|
s = next.getInputSource(resolver);
|
|
} catch (IOException e) {
|
|
String msg =
|
|
"unable to open the external entity from :" + next.systemId;
|
|
if (next.publicId != null)
|
|
msg += " (public id:" + next.publicId + ")";
|
|
|
|
SAXParseException spe = new SAXParseException(msg,
|
|
getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
|
|
dtdHandler.fatalError(spe);
|
|
throw e;
|
|
}
|
|
|
|
r.init(s, next.name, in, next.isPE);
|
|
in = r;
|
|
return true;
|
|
}
|
|
|
|
public String getPublicId() {
|
|
|
|
return (in == null) ? null : in.getPublicId();
|
|
}
|
|
|
|
public String getSystemId() {
|
|
|
|
return (in == null) ? null : in.getSystemId();
|
|
}
|
|
|
|
public int getLineNumber() {
|
|
|
|
return (in == null) ? -1 : in.getLineNumber();
|
|
}
|
|
|
|
public int getColumnNumber() {
|
|
|
|
return (in == null) ? -1 : in.getColumnNumber();
|
|
}
|
|
|
|
// error handling convenience routines
|
|
|
|
private void warning(String messageId, Object parameters [])
|
|
throws SAXException {
|
|
|
|
SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
|
|
getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
|
|
|
|
dtdHandler.warning(e);
|
|
}
|
|
|
|
void error(String messageId, Object parameters [])
|
|
throws SAXException {
|
|
|
|
SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
|
|
getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
|
|
|
|
dtdHandler.error(e);
|
|
}
|
|
|
|
private void fatal(String messageId) throws SAXException {
|
|
|
|
fatal(messageId, null);
|
|
}
|
|
|
|
private void fatal(String messageId, Object parameters [])
|
|
throws SAXException {
|
|
|
|
SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
|
|
getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
|
|
|
|
dtdHandler.fatalError(e);
|
|
|
|
throw e;
|
|
}
|
|
|
|
//
|
|
// Map char arrays to strings ... cuts down both on memory and
|
|
// CPU usage for element/attribute/other names that are reused.
|
|
//
|
|
// Documents typically repeat names a lot, so we more or less
|
|
// intern all the strings within the document; since some strings
|
|
// are repeated in multiple documents (e.g. stylesheets) we go
|
|
// a bit further, and intern globally.
|
|
//
|
|
static class NameCache {
|
|
//
|
|
// Unless we auto-grow this, the default size should be a
|
|
// reasonable bit larger than needed for most XML files
|
|
// we've yet seen (and be prime). If it's too small, the
|
|
// penalty is just excess cache collisions.
|
|
//
|
|
NameCacheEntry hashtable [] = new NameCacheEntry[541];
|
|
|
|
//
|
|
// Usually we just want to get the 'symbol' for these chars
|
|
//
|
|
String lookup(char value [], int len) {
|
|
|
|
return lookupEntry(value, len).name;
|
|
}
|
|
|
|
//
|
|
// Sometimes we need to scan the chars in the resulting
|
|
// string, so there's an accessor which exposes them.
|
|
// (Mostly for element end tags.)
|
|
//
|
|
NameCacheEntry lookupEntry(char value [], int len) {
|
|
|
|
int index = 0;
|
|
NameCacheEntry entry;
|
|
|
|
// hashing to get index
|
|
for (int i = 0; i < len; i++)
|
|
index = index * 31 + value[i];
|
|
index &= 0x7fffffff;
|
|
index %= hashtable.length;
|
|
|
|
// return entry if one's there ...
|
|
for (entry = hashtable[index];
|
|
entry != null;
|
|
entry = entry.next) {
|
|
if (entry.matches(value, len))
|
|
return entry;
|
|
}
|
|
|
|
// else create new one
|
|
entry = new NameCacheEntry();
|
|
entry.chars = new char[len];
|
|
System.arraycopy(value, 0, entry.chars, 0, len);
|
|
entry.name = new String(entry.chars);
|
|
//
|
|
// NOTE: JDK 1.1 has a fixed size string intern table,
|
|
// with non-GC'd entries. It can panic here; that's a
|
|
// JDK problem, use 1.2 or later with many identifiers.
|
|
//
|
|
entry.name = entry.name.intern(); // "global" intern
|
|
entry.next = hashtable[index];
|
|
hashtable[index] = entry;
|
|
return entry;
|
|
}
|
|
}
|
|
|
|
static class NameCacheEntry {
|
|
|
|
String name;
|
|
char chars [];
|
|
NameCacheEntry next;
|
|
|
|
boolean matches(char value [], int len) {
|
|
|
|
if (chars.length != len)
|
|
return false;
|
|
for (int i = 0; i < len; i++)
|
|
if (value[i] != chars[i])
|
|
return false;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Message catalog for diagnostics.
|
|
//
|
|
static final Catalog messages = new Catalog();
|
|
|
|
static final class Catalog extends MessageCatalog {
|
|
|
|
Catalog() {
|
|
super(DTDParser.class);
|
|
}
|
|
}
|
|
|
|
}
|