378 lines
12 KiB
Java
378 lines
12 KiB
Java
/*
|
|
* Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation. Oracle designates this
|
|
* particular file as subject to the "Classpath" exception as provided
|
|
* by Oracle in the LICENSE file that accompanied this code.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
package com.sun.xml.internal.ws.encoding;
|
|
|
|
import javax.xml.ws.WebServiceException;
|
|
|
|
/**
|
|
* This class tokenizes RFC822 and MIME headers into the basic
|
|
* symbols specified by RFC822 and MIME. <p>
|
|
*
|
|
* This class handles folded headers (ie headers with embedded
|
|
* CRLF SPACE sequences). The folds are removed in the returned
|
|
* tokens.
|
|
*
|
|
* @version 1.9, 02/03/27
|
|
* @author John Mani
|
|
*/
|
|
|
|
class HeaderTokenizer {
|
|
|
|
/**
|
|
* The Token class represents tokens returned by the
|
|
* HeaderTokenizer.
|
|
*/
|
|
static class Token {
|
|
|
|
private int type;
|
|
private String value;
|
|
|
|
/**
|
|
* Token type indicating an ATOM.
|
|
*/
|
|
public static final int ATOM = -1;
|
|
|
|
/**
|
|
* Token type indicating a quoted string. The value
|
|
* field contains the string without the quotes.
|
|
*/
|
|
public static final int QUOTEDSTRING = -2;
|
|
|
|
/**
|
|
* Token type indicating a comment. The value field
|
|
* contains the comment string without the comment
|
|
* start and end symbols.
|
|
*/
|
|
public static final int COMMENT = -3;
|
|
|
|
/**
|
|
* Token type indicating end of input.
|
|
*/
|
|
public static final int EOF = -4;
|
|
|
|
/**
|
|
* Constructor.
|
|
* @param type Token type
|
|
* @param value Token value
|
|
*/
|
|
public Token(int type, String value) {
|
|
this.type = type;
|
|
this.value = value;
|
|
}
|
|
|
|
/**
|
|
* Return the type of the token. If the token represents a
|
|
* delimiter or a control character, the type is that character
|
|
* itself, converted to an integer. Otherwise, it's value is
|
|
* one of the following:
|
|
* <ul>
|
|
* <li><code>ATOM</code> A sequence of ASCII characters
|
|
* delimited by either SPACE, CTL, "(", <"> or the
|
|
* specified SPECIALS
|
|
* <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
|
|
* within quotes
|
|
* <li><code>COMMENT</code> A sequence of ASCII characters
|
|
* within "(" and ")".
|
|
* <li><code>EOF</code> End of header
|
|
* </ul>
|
|
*/
|
|
public int getType() {
|
|
return type;
|
|
}
|
|
|
|
/**
|
|
* Returns the value of the token just read. When the current
|
|
* token is a quoted string, this field contains the body of the
|
|
* string, without the quotes. When the current token is a comment,
|
|
* this field contains the body of the comment.
|
|
*
|
|
* @return token value
|
|
*/
|
|
public String getValue() {
|
|
return value;
|
|
}
|
|
}
|
|
|
|
private String string; // the string to be tokenized
|
|
private boolean skipComments; // should comments be skipped ?
|
|
private String delimiters; // delimiter string
|
|
private int currentPos; // current parse position
|
|
private int maxPos; // string length
|
|
private int nextPos; // track start of next Token for next()
|
|
private int peekPos; // track start of next Token for peek()
|
|
|
|
/**
|
|
* RFC822 specials
|
|
*/
|
|
private final static String RFC822 = "()<>@,;:\\\"\t .[]";
|
|
|
|
/**
|
|
* MIME specials
|
|
*/
|
|
final static String MIME = "()<>@,;:\\\"\t []/?=";
|
|
|
|
// The EOF Token
|
|
private final static Token EOFToken = new Token(Token.EOF, null);
|
|
|
|
/**
|
|
* Constructor that takes a rfc822 style header.
|
|
*
|
|
* @param header The rfc822 header to be tokenized
|
|
* @param delimiters Set of delimiter characters
|
|
* to be used to delimit ATOMS. These
|
|
* are usually <code>RFC822</code> or
|
|
* <code>MIME</code>
|
|
* @param skipComments If true, comments are skipped and
|
|
* not returned as tokens
|
|
*/
|
|
HeaderTokenizer(String header, String delimiters,
|
|
boolean skipComments) {
|
|
string = (header == null) ? "" : header; // paranoia ?!
|
|
this.skipComments = skipComments;
|
|
this.delimiters = delimiters;
|
|
currentPos = nextPos = peekPos = 0;
|
|
maxPos = string.length();
|
|
}
|
|
|
|
/**
|
|
* Constructor. Comments are ignored and not returned as tokens
|
|
*
|
|
* @param header The header that is tokenized
|
|
* @param delimiters The delimiters to be used
|
|
*/
|
|
HeaderTokenizer(String header, String delimiters) {
|
|
this(header, delimiters, true);
|
|
}
|
|
|
|
/**
|
|
* Constructor. The RFC822 defined delimiters - RFC822 - are
|
|
* used to delimit ATOMS. Also comments are skipped and not
|
|
* returned as tokens
|
|
*/
|
|
HeaderTokenizer(String header) {
|
|
this(header, RFC822);
|
|
}
|
|
|
|
/**
|
|
* Parses the next token from this String. <p>
|
|
*
|
|
* Clients sit in a loop calling next() to parse successive
|
|
* tokens until an EOF Token is returned.
|
|
*
|
|
* @return the next Token
|
|
* @exception WebServiceException if the parse fails
|
|
*/
|
|
Token next() throws WebServiceException {
|
|
Token tk;
|
|
|
|
currentPos = nextPos; // setup currentPos
|
|
tk = getNext();
|
|
nextPos = peekPos = currentPos; // update currentPos and peekPos
|
|
return tk;
|
|
}
|
|
|
|
/**
|
|
* Peek at the next token, without actually removing the token
|
|
* from the parse stream. Invoking this method multiple times
|
|
* will return successive tokens, until <code>next()</code> is
|
|
* called. <p>
|
|
*
|
|
* @return the next Token
|
|
* @exception WebServiceException if the parse fails
|
|
*/
|
|
Token peek() throws WebServiceException {
|
|
Token tk;
|
|
|
|
currentPos = peekPos; // setup currentPos
|
|
tk = getNext();
|
|
peekPos = currentPos; // update peekPos
|
|
return tk;
|
|
}
|
|
|
|
/**
|
|
* Return the rest of the Header.
|
|
*
|
|
* @return String rest of header. null is returned if we are
|
|
* already at end of header
|
|
*/
|
|
String getRemainder() {
|
|
return string.substring(nextPos);
|
|
}
|
|
|
|
/*
|
|
* Return the next token starting from 'currentPos'. After the
|
|
* parse, 'currentPos' is updated to point to the start of the
|
|
* next token.
|
|
*/
|
|
private Token getNext() throws WebServiceException {
|
|
// If we're already at end of string, return EOF
|
|
if (currentPos >= maxPos)
|
|
return EOFToken;
|
|
|
|
// Skip white-space, position currentPos beyond the space
|
|
if (skipWhiteSpace() == Token.EOF)
|
|
return EOFToken;
|
|
|
|
char c;
|
|
int start;
|
|
boolean filter = false;
|
|
|
|
c = string.charAt(currentPos);
|
|
|
|
// Check or Skip comments and position currentPos
|
|
// beyond the comment
|
|
while (c == '(') {
|
|
// Parsing comment ..
|
|
int nesting;
|
|
for (start = ++currentPos, nesting = 1;
|
|
nesting > 0 && currentPos < maxPos;
|
|
currentPos++) {
|
|
c = string.charAt(currentPos);
|
|
if (c == '\\') { // Escape sequence
|
|
currentPos++; // skip the escaped character
|
|
filter = true;
|
|
} else if (c == '\r')
|
|
filter = true;
|
|
else if (c == '(')
|
|
nesting++;
|
|
else if (c == ')')
|
|
nesting--;
|
|
}
|
|
if (nesting != 0)
|
|
throw new WebServiceException("Unbalanced comments");
|
|
|
|
if (!skipComments) {
|
|
// Return the comment, if we are asked to.
|
|
// Note that the comment start & end markers are ignored.
|
|
String s;
|
|
if (filter) // need to go thru the token again.
|
|
s = filterToken(string, start, currentPos-1);
|
|
else
|
|
s = string.substring(start,currentPos-1);
|
|
|
|
return new Token(Token.COMMENT, s);
|
|
}
|
|
|
|
// Skip any whitespace after the comment.
|
|
if (skipWhiteSpace() == Token.EOF)
|
|
return EOFToken;
|
|
c = string.charAt(currentPos);
|
|
}
|
|
|
|
// Check for quoted-string and position currentPos
|
|
// beyond the terminating quote
|
|
if (c == '"') {
|
|
for (start = ++currentPos; currentPos < maxPos; currentPos++) {
|
|
c = string.charAt(currentPos);
|
|
if (c == '\\') { // Escape sequence
|
|
currentPos++;
|
|
filter = true;
|
|
} else if (c == '\r')
|
|
filter = true;
|
|
else if (c == '"') {
|
|
currentPos++;
|
|
String s;
|
|
|
|
if (filter)
|
|
s = filterToken(string, start, currentPos-1);
|
|
else
|
|
s = string.substring(start,currentPos-1);
|
|
|
|
return new Token(Token.QUOTEDSTRING, s);
|
|
}
|
|
}
|
|
throw new WebServiceException("Unbalanced quoted string");
|
|
}
|
|
|
|
// Check for SPECIAL or CTL
|
|
if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
|
|
currentPos++; // re-position currentPos
|
|
char ch[] = new char[1];
|
|
ch[0] = c;
|
|
return new Token((int)c, new String(ch));
|
|
}
|
|
|
|
// Check for ATOM
|
|
for (start = currentPos; currentPos < maxPos; currentPos++) {
|
|
c = string.charAt(currentPos);
|
|
// ATOM is delimited by either SPACE, CTL, "(", <">
|
|
// or the specified SPECIALS
|
|
if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
|
|
c == '"' || delimiters.indexOf(c) >= 0)
|
|
break;
|
|
}
|
|
return new Token(Token.ATOM, string.substring(start, currentPos));
|
|
}
|
|
|
|
// Skip SPACE, HT, CR and NL
|
|
private int skipWhiteSpace() {
|
|
char c;
|
|
for (; currentPos < maxPos; currentPos++)
|
|
if (((c = string.charAt(currentPos)) != ' ') &&
|
|
(c != '\t') && (c != '\r') && (c != '\n'))
|
|
return currentPos;
|
|
return Token.EOF;
|
|
}
|
|
|
|
/* Process escape sequences and embedded LWSPs from a comment or
|
|
* quoted string.
|
|
*/
|
|
private static String filterToken(String s, int start, int end) {
|
|
StringBuffer sb = new StringBuffer();
|
|
char c;
|
|
boolean gotEscape = false;
|
|
boolean gotCR = false;
|
|
|
|
for (int i = start; i < end; i++) {
|
|
c = s.charAt(i);
|
|
if (c == '\n' && gotCR) {
|
|
// This LF is part of an unescaped
|
|
// CRLF sequence (i.e, LWSP). Skip it.
|
|
gotCR = false;
|
|
continue;
|
|
}
|
|
|
|
gotCR = false;
|
|
if (!gotEscape) {
|
|
// Previous character was NOT '\'
|
|
if (c == '\\') // skip this character
|
|
gotEscape = true;
|
|
else if (c == '\r') // skip this character
|
|
gotCR = true;
|
|
else // append this character
|
|
sb.append(c);
|
|
} else {
|
|
// Previous character was '\'. So no need to
|
|
// bother with any special processing, just
|
|
// append this character
|
|
sb.append(c);
|
|
gotEscape = false;
|
|
}
|
|
}
|
|
return sb.toString();
|
|
}
|
|
}
|