feat(jdk8): move files to new folder to avoid resources compiled.

2025-09-07 15:25:52 +08:00
parent 3f0047bf6f
commit 8c35cfb1c0
17415 changed files with 217 additions and 213 deletions
--- a/jdkSrc/jdk8/sun/text/normalizer/CharTrie.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/CharTrie.java
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+/**
+ * Trie implementation which stores data in char, 16 bits.
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @since release 2.1, Jan 01 2002
+ */
+
+ // note that i need to handle the block calculations later, since chartrie
+ // in icu4c uses the same index array.
+public class CharTrie extends Trie
+{
+    // public constructors ---------------------------------------------
+
+    /**
+    * <p>Creates a new Trie with the settings for the trie data.</p>
+    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
+    * trie.</p>
+    * @param inputStream file input stream to a ICU data file, containing
+    *                    the trie
+    * @param dataManipulate object which provides methods to parse the char
+    *                        data
+    * @throws IOException thrown when data reading fails
+    * @draft 2.1
+    */
+    public CharTrie(InputStream inputStream,
+                    DataManipulate dataManipulate) throws IOException
+    {
+        super(inputStream, dataManipulate);
+
+        if (!isCharTrie()) {
+            throw new IllegalArgumentException(
+                               "Data given does not belong to a char trie.");
+        }
+        m_friendAgent_ = new FriendAgent();
+    }
+
+    /**
+     * Make a dummy CharTrie.
+     * A dummy trie is an empty runtime trie, used when a real data trie cannot
+     * be loaded.
+     *
+     * The trie always returns the initialValue,
+     * or the leadUnitValue for lead surrogate code points.
+     * The Latin-1 part is always set up to be linear.
+     *
+     * @param initialValue the initial value that is set for all code points
+     * @param leadUnitValue the value for lead surrogate code _units_ that do not
+     *                      have associated supplementary data
+     * @param dataManipulate object which provides methods to parse the char data
+     */
+    public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
+        super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
+
+        int dataLength, latin1Length, i, limit;
+        char block;
+
+        /* calculate the actual size of the dummy trie data */
+
+        /* max(Latin-1, block 0) */
+        dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
+        if(leadUnitValue!=initialValue) {
+            dataLength+=DATA_BLOCK_LENGTH;
+        }
+        m_data_=new char[dataLength];
+        m_dataLength_=dataLength;
+
+        m_initialValue_=(char)initialValue;
+
+        /* fill the index and data arrays */
+
+        /* indexes are preset to 0 (block 0) */
+
+        /* Latin-1 data */
+        for(i=0; i<latin1Length; ++i) {
+            m_data_[i]=(char)initialValue;
+        }
+
+        if(leadUnitValue!=initialValue) {
+            /* indexes for lead surrogate code units to the block after Latin-1 */
+            block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
+            i=0xd800>>INDEX_STAGE_1_SHIFT_;
+            limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
+            for(; i<limit; ++i) {
+                m_index_[i]=block;
+            }
+
+            /* data for lead surrogate code units */
+            limit=latin1Length+DATA_BLOCK_LENGTH;
+            for(i=latin1Length; i<limit; ++i) {
+                m_data_[i]=(char)leadUnitValue;
+            }
+        }
+
+        m_friendAgent_ = new FriendAgent();
+    }
+
+    /**
+     * Java friend implementation
+     */
+    public class FriendAgent
+    {
+        /**
+         * Gives out the index array of the trie
+         * @return index array of trie
+         */
+        public char[] getPrivateIndex()
+        {
+            return m_index_;
+        }
+        /**
+         * Gives out the data array of the trie
+         * @return data array of trie
+         */
+        public char[] getPrivateData()
+        {
+            return m_data_;
+        }
+        /**
+         * Gives out the data offset in the trie
+         * @return data offset in the trie
+         */
+        public int getPrivateInitialValue()
+        {
+            return m_initialValue_;
+        }
+    }
+
+    // public methods --------------------------------------------------
+
+    /**
+     * Java friend implementation
+     * To store the index and data array into the argument.
+     * @param friend java friend UCharacterProperty object to store the array
+     */
+    public void putIndexData(UCharacterProperty friend)
+    {
+        friend.setIndexData(m_friendAgent_);
+    }
+
+    /**
+    * Gets the value associated with the codepoint.
+    * If no value is associated with the codepoint, a default value will be
+    * returned.
+    * @param ch codepoint
+    * @return offset to data
+    * @draft 2.1
+    */
+    public final char getCodePointValue(int ch)
+    {
+        int offset;
+
+        // fastpath for U+0000..U+D7FF
+        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // copy of getRawOffset()
+            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+                    + (ch & INDEX_STAGE_3_MASK_);
+            return m_data_[offset];
+        }
+
+        // handle U+D800..U+10FFFF
+        offset = getCodePointOffset(ch);
+
+        // return -1 if there is an error, in this case we return the default
+        // value: m_initialValue_
+        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
+    }
+
+    /**
+    * Gets the value to the data which this lead surrogate character points
+    * to.
+    * Returned data may contain folding offset information for the next
+    * trailing surrogate character.
+    * This method does not guarantee correct results for trail surrogates.
+    * @param ch lead surrogate character
+    * @return data value
+    * @draft 2.1
+    */
+    public final char getLeadValue(char ch)
+    {
+       return m_data_[getLeadOffset(ch)];
+    }
+
+    /**
+    * Get the value associated with a pair of surrogates.
+    * @param lead a lead surrogate
+    * @param trail a trail surrogate
+    * @draft 2.1
+    */
+    public final char getSurrogateValue(char lead, char trail)
+    {
+        int offset = getSurrogateOffset(lead, trail);
+        if (offset > 0) {
+            return m_data_[offset];
+        }
+        return m_initialValue_;
+    }
+
+    /**
+    * <p>Get a value from a folding offset (from the value of a lead surrogate)
+    * and a trail surrogate.</p>
+    * <p>If the
+    * @param leadvalue value associated with the lead surrogate which contains
+    *        the folding offset
+    * @param trail surrogate
+    * @return trie data value associated with the trail character
+    * @draft 2.1
+    */
+    public final char getTrailValue(int leadvalue, char trail)
+    {
+        if (m_dataManipulate_ == null) {
+            throw new NullPointerException(
+                             "The field DataManipulate in this Trie is null");
+        }
+        int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
+        if (offset > 0) {
+            return m_data_[getRawOffset(offset,
+                                        (char)(trail & SURROGATE_MASK_))];
+        }
+        return m_initialValue_;
+    }
+
+    // protected methods -----------------------------------------------
+
+    /**
+    * <p>Parses the input stream and stores its trie content into a index and
+    * data array</p>
+    * @param inputStream data input stream containing trie data
+    * @exception IOException thrown when data reading fails
+    */
+    protected final void unserialize(InputStream inputStream)
+                                                throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        int indexDataLength = m_dataOffset_ + m_dataLength_;
+        m_index_ = new char[indexDataLength];
+        for (int i = 0; i < indexDataLength; i ++) {
+            m_index_[i] = input.readChar();
+        }
+        m_data_           = m_index_;
+        m_initialValue_   = m_data_[m_dataOffset_];
+    }
+
+    /**
+    * Gets the offset to the data which the surrogate pair points to.
+    * @param lead lead surrogate
+    * @param trail trailing surrogate
+    * @return offset to data
+    * @draft 2.1
+    */
+    protected final int getSurrogateOffset(char lead, char trail)
+    {
+        if (m_dataManipulate_ == null) {
+            throw new NullPointerException(
+                             "The field DataManipulate in this Trie is null");
+        }
+
+        // get fold position for the next trail surrogate
+        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
+
+        // get the real data from the folded lead/trail units
+        if (offset > 0) {
+            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
+        }
+
+        // return -1 if there is an error, in this case we return the default
+        // value: m_initialValue_
+        return -1;
+    }
+
+    /**
+    * Gets the value at the argument index.
+    * For use internally in TrieIterator.
+    * @param index value at index will be retrieved
+    * @return 32 bit value
+    * @see com.ibm.icu.impl.TrieIterator
+    * @draft 2.1
+    */
+    protected final int getValue(int index)
+    {
+        return m_data_[index];
+    }
+
+    /**
+    * Gets the default initial value
+    * @return 32 bit value
+    * @draft 2.1
+    */
+    protected final int getInitialValue()
+    {
+        return m_initialValue_;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+    * Default value
+    */
+    private char m_initialValue_;
+    /**
+    * Array of char data
+    */
+    private char m_data_[];
+    /**
+     * Agent for friends
+     */
+    private FriendAgent m_friendAgent_;
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/CharacterIteratorWrapper.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/CharacterIteratorWrapper.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.text.CharacterIterator;
+
+/**
+ * This class is a wrapper around CharacterIterator and implements the
+ * UCharacterIterator protocol
+ * @author ram
+ */
+
+public class CharacterIteratorWrapper extends UCharacterIterator {
+
+    private CharacterIterator iterator;
+
+    public CharacterIteratorWrapper(CharacterIterator iter){
+        if(iter==null){
+            throw new IllegalArgumentException();
+        }
+        iterator     = iter;
+    }
+
+    /**
+     * @see UCharacterIterator#current()
+     */
+    public int current() {
+        int c = iterator.current();
+        if(c==CharacterIterator.DONE){
+          return DONE;
+        }
+        return c;
+    }
+
+    /**
+     * @see UCharacterIterator#getLength()
+     */
+    public int getLength() {
+        return (iterator.getEndIndex() - iterator.getBeginIndex());
+    }
+
+    /**
+     * @see UCharacterIterator#getIndex()
+     */
+    public int getIndex() {
+        return iterator.getIndex();
+    }
+
+    /**
+     * @see UCharacterIterator#next()
+     */
+    public int next() {
+        int i = iterator.current();
+        iterator.next();
+        if(i==CharacterIterator.DONE){
+          return DONE;
+        }
+        return i;
+    }
+
+    /**
+     * @see UCharacterIterator#previous()
+     */
+    public int previous() {
+        int i = iterator.previous();
+        if(i==CharacterIterator.DONE){
+            return DONE;
+        }
+        return i;
+    }
+
+    /**
+     * @see UCharacterIterator#setIndex(int)
+     */
+    public void setIndex(int index) {
+        iterator.setIndex(index);
+    }
+
+    //// for StringPrep
+    /**
+     * @see UCharacterIterator#getText(char[])
+     */
+    public int getText(char[] fillIn, int offset){
+        int length =iterator.getEndIndex() - iterator.getBeginIndex();
+        int currentIndex = iterator.getIndex();
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+
+        for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) {
+            fillIn[offset++] = ch;
+        }
+        iterator.setIndex(currentIndex);
+
+        return length;
+    }
+
+    /**
+     * Creates a clone of this iterator.  Clones the underlying character iterator.
+     * @see UCharacterIterator#clone()
+     */
+    public Object clone(){
+        try {
+            CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone();
+            result.iterator = (CharacterIterator)this.iterator.clone();
+            return result;
+        } catch (CloneNotSupportedException e) {
+            return null; // only invoked if bad underlying character iterator
+        }
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/ICUBinary.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/ICUBinary.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+public final class ICUBinary
+{
+    // public inner interface ------------------------------------------------
+
+    /**
+     * Special interface for data authentication
+     */
+    public static interface Authenticate
+    {
+        /**
+         * Method used in ICUBinary.readHeader() to provide data format
+         * authentication.
+         * @param version version of the current data
+         * @return true if dataformat is an acceptable version, false otherwise
+         */
+        public boolean isDataVersionAcceptable(byte version[]);
+    }
+
+    // public methods --------------------------------------------------------
+
+    /**
+    * <p>ICU data header reader method.
+    * Takes a ICU generated big-endian input stream, parse the ICU standard
+    * file header and authenticates them.</p>
+    * <p>Header format:
+    * <ul>
+    *     <li> Header size (char)
+    *     <li> Magic number 1 (byte)
+    *     <li> Magic number 2 (byte)
+    *     <li> Rest of the header size (char)
+    *     <li> Reserved word (char)
+    *     <li> Big endian indicator (byte)
+    *     <li> Character set family indicator (byte)
+    *     <li> Size of a char (byte) for c++ and c use
+    *     <li> Reserved byte (byte)
+    *     <li> Data format identifier (4 bytes), each ICU data has its own
+    *          identifier to distinguish them. [0] major [1] minor
+    *                                          [2] milli [3] micro
+    *     <li> Data version (4 bytes), the change version of the ICU data
+    *                             [0] major [1] minor [2] milli [3] micro
+    *     <li> Unicode version (4 bytes) this ICU is based on.
+    * </ul>
+    * </p>
+    * <p>
+    * Example of use:<br>
+    * <pre>
+    * try {
+    *    FileInputStream input = new FileInputStream(filename);
+    *    If (Utility.readICUDataHeader(input, dataformat, dataversion,
+    *                                  unicode) {
+    *        System.out.println("Verified file header, this is a ICU data file");
+    *    }
+    * } catch (IOException e) {
+    *    System.out.println("This is not a ICU data file");
+    * }
+    * </pre>
+    * </p>
+    * @param inputStream input stream that contains the ICU data header
+    * @param dataFormatIDExpected Data format expected. An array of 4 bytes
+    *                     information about the data format.
+    *                     E.g. data format ID 1.2.3.4. will became an array of
+    *                     {1, 2, 3, 4}
+    * @param authenticate user defined extra data authentication. This value
+    *                     can be null, if no extra authentication is needed.
+    * @exception IOException thrown if there is a read error or
+    *            when header authentication fails.
+    * @draft 2.1
+    */
+    public static final byte[] readHeader(InputStream inputStream,
+                                        byte dataFormatIDExpected[],
+                                        Authenticate authenticate)
+                                                          throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        char headersize = input.readChar();
+        int readcount = 2;
+        //reading the header format
+        byte magic1 = input.readByte();
+        readcount ++;
+        byte magic2 = input.readByte();
+        readcount ++;
+        if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+            throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+        }
+
+        input.readChar(); // reading size
+        readcount += 2;
+        input.readChar(); // reading reserved word
+        readcount += 2;
+        byte bigendian    = input.readByte();
+        readcount ++;
+        byte charset      = input.readByte();
+        readcount ++;
+        byte charsize     = input.readByte();
+        readcount ++;
+        input.readByte(); // reading reserved byte
+        readcount ++;
+
+        byte dataFormatID[] = new byte[4];
+        input.readFully(dataFormatID);
+        readcount += 4;
+        byte dataVersion[] = new byte[4];
+        input.readFully(dataVersion);
+        readcount += 4;
+        byte unicodeVersion[] = new byte[4];
+        input.readFully(unicodeVersion);
+        readcount += 4;
+        if (headersize < readcount) {
+            throw new IOException("Internal Error: Header size error");
+        }
+        input.skipBytes(headersize - readcount);
+
+        if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_
+            || charsize != CHAR_SIZE_
+            || !Arrays.equals(dataFormatIDExpected, dataFormatID)
+            || (authenticate != null
+                && !authenticate.isDataVersionAcceptable(dataVersion))) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+        }
+        return unicodeVersion;
+    }
+
+    // private variables -------------------------------------------------
+
+    /**
+    * Magic numbers to authenticate the data file
+    */
+    private static final byte MAGIC1 = (byte)0xda;
+    private static final byte MAGIC2 = (byte)0x27;
+
+    /**
+    * File format authentication values
+    */
+    private static final byte BIG_ENDIAN_ = 1;
+    private static final byte CHAR_SET_ = 0;
+    private static final byte CHAR_SIZE_ = 2;
+
+    /**
+    * Error messages
+    */
+    private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ =
+                       "ICU data file error: Not an ICU data file";
+    private static final String HEADER_AUTHENTICATION_FAILED_ =
+        "ICU data file error: Header authentication failed, please check if you have a valid ICU data file";
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/ICUData.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/ICUData.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.MissingResourceException;
+
+/**
+ * Provides access to ICU data files as InputStreams.  Implements security checking.
+ */
+public final class ICUData {
+
+    private static InputStream getStream(final Class<ICUData> root, final String resourceName, boolean required) {
+        InputStream i = null;
+
+        if (System.getSecurityManager() != null) {
+            i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
+                    public InputStream run() {
+                        return root.getResourceAsStream(resourceName);
+                    }
+                });
+        } else {
+            i = root.getResourceAsStream(resourceName);
+        }
+
+        if (i == null && required) {
+            throw new MissingResourceException("could not locate data", root.getPackage().getName(), resourceName);
+        }
+        return i;
+    }
+
+    /*
+     * Convenience override that calls getStream(ICUData.class, resourceName, false);
+     */
+    public static InputStream getStream(String resourceName) {
+        return getStream(ICUData.class, resourceName, false);
+    }
+
+    /*
+     * Convenience method that calls getStream(ICUData.class, resourceName, true).
+     */
+    public static InputStream getRequiredStream(String resourceName) {
+        return getStream(ICUData.class, resourceName, true);
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/IntTrie.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/IntTrie.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Trie implementation which stores data in int, 32 bits.
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @since release 2.1, Jan 01 2002
+ */
+public class IntTrie extends Trie
+{
+    // public constructors ---------------------------------------------
+
+    /**
+    * <p>Creates a new Trie with the settings for the trie data.</p>
+    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
+    * trie.</p>
+    * @param inputStream file input stream to a ICU data file, containing
+    *                    the trie
+    * @param dataManipulate object which provides methods to parse the char
+    *                        data
+    * @throws IOException thrown when data reading fails
+    * @draft 2.1
+    */
+    public IntTrie(InputStream inputStream, DataManipulate datamanipulate)
+                                                    throws IOException
+    {
+        super(inputStream, datamanipulate);
+        if (!isIntTrie()) {
+            throw new IllegalArgumentException(
+                               "Data given does not belong to a int trie.");
+        }
+    }
+
+    // public methods --------------------------------------------------
+
+    /**
+    * Gets the value associated with the codepoint.
+    * If no value is associated with the codepoint, a default value will be
+    * returned.
+    * @param ch codepoint
+    * @return offset to data
+    * @draft 2.1
+    */
+    public final int getCodePointValue(int ch)
+    {
+        int offset = getCodePointOffset(ch);
+        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
+    }
+
+    /**
+    * Gets the value to the data which this lead surrogate character points
+    * to.
+    * Returned data may contain folding offset information for the next
+    * trailing surrogate character.
+    * This method does not guarantee correct results for trail surrogates.
+    * @param ch lead surrogate character
+    * @return data value
+    * @draft 2.1
+    */
+    public final int getLeadValue(char ch)
+    {
+        return m_data_[getLeadOffset(ch)];
+    }
+
+    /**
+    * Get a value from a folding offset (from the value of a lead surrogate)
+    * and a trail surrogate.
+    * @param leadvalue the value of a lead surrogate that contains the
+    *        folding offset
+    * @param trail surrogate
+    * @return trie data value associated with the trail character
+    * @draft 2.1
+    */
+    public final int getTrailValue(int leadvalue, char trail)
+    {
+        if (m_dataManipulate_ == null) {
+            throw new NullPointerException(
+                             "The field DataManipulate in this Trie is null");
+        }
+        int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
+        if (offset > 0) {
+            return m_data_[getRawOffset(offset,
+                                         (char)(trail & SURROGATE_MASK_))];
+        }
+        return m_initialValue_;
+    }
+
+    // protected methods -----------------------------------------------
+
+    /**
+    * <p>Parses the input stream and stores its trie content into a index and
+    * data array</p>
+    * @param inputStream data input stream containing trie data
+    * @exception IOException thrown when data reading fails
+    */
+    protected final void unserialize(InputStream inputStream)
+                                                    throws IOException
+    {
+        super.unserialize(inputStream);
+        // one used for initial value
+        m_data_               = new int[m_dataLength_];
+        DataInputStream input = new DataInputStream(inputStream);
+        for (int i = 0; i < m_dataLength_; i ++) {
+            m_data_[i] = input.readInt();
+        }
+        m_initialValue_ = m_data_[0];
+    }
+
+    /**
+    * Gets the offset to the data which the surrogate pair points to.
+    * @param lead lead surrogate
+    * @param trail trailing surrogate
+    * @return offset to data
+    * @draft 2.1
+    */
+    protected final int getSurrogateOffset(char lead, char trail)
+    {
+        if (m_dataManipulate_ == null) {
+            throw new NullPointerException(
+                             "The field DataManipulate in this Trie is null");
+        }
+        // get fold position for the next trail surrogate
+        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
+
+        // get the real data from the folded lead/trail units
+        if (offset > 0) {
+            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
+        }
+
+        // return -1 if there is an error, in this case we return the default
+        // value: m_initialValue_
+        return -1;
+    }
+
+    /**
+    * Gets the value at the argument index.
+    * For use internally in TrieIterator
+    * @param index value at index will be retrieved
+    * @return 32 bit value
+    * @see com.ibm.icu.impl.TrieIterator
+    * @draft 2.1
+    */
+    protected final int getValue(int index)
+    {
+      return m_data_[index];
+    }
+
+    /**
+    * Gets the default initial value
+    * @return 32 bit value
+    * @draft 2.1
+    */
+    protected final int getInitialValue()
+    {
+        return m_initialValue_;
+    }
+
+    // package private methods -----------------------------------------
+
+    /**
+     * Internal constructor for builder use
+     * @param index the index array to be slotted into this trie
+     * @param data the data array to be slotted into this trie
+     * @param initialvalue the initial value for this trie
+     * @param options trie options to use
+     * @param datamanipulate folding implementation
+     */
+    IntTrie(char index[], int data[], int initialvalue, int options,
+            DataManipulate datamanipulate)
+    {
+        super(index, options, datamanipulate);
+        m_data_ = data;
+        m_dataLength_ = m_data_.length;
+        m_initialValue_ = initialvalue;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+    * Default value
+    */
+    private int m_initialValue_;
+    /**
+    * Array of char data
+    */
+    private int m_data_[];
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/NormalizerBase.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/NormalizerBase.java
--- a/jdkSrc/jdk8/sun/text/normalizer/NormalizerDataReader.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/NormalizerDataReader.java
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * @author        Ram Viswanadha
+ */
+
+    /*
+     * Description of the format of unorm.icu version 2.1.
+     *
+     * Main change from version 1 to version 2:
+     * Use of new, common Trie instead of normalization-specific tries.
+     * Change to version 2.1: add third/auxiliary trie with associated data.
+     *
+     * For more details of how to use the data structures see the code
+     * in unorm.cpp (runtime normalization code) and
+     * in gennorm.c and gennorm/store.c (build-time data generation).
+     *
+     * For the serialized format of Trie see Trie.c/TrieHeader.
+     *
+     * - Overall partition
+     *
+     * unorm.icu customarily begins with a UDataInfo structure, see udata.h and .c.
+     * After that there are the following structures:
+     *
+     * char indexes[INDEX_TOP];                   -- INDEX_TOP=32, see enum in this file
+     *
+     * Trie normTrie;                           -- size in bytes=indexes[INDEX_TRIE_SIZE]
+     *
+     * char extraData[extraDataTop];            -- extraDataTop=indexes[INDEX_UCHAR_COUNT]
+     *                                                 extraData[0] contains the number of units for
+     *                                                 FC_NFKC_Closure (formatVersion>=2.1)
+     *
+     * char combiningTable[combiningTableTop];  -- combiningTableTop=indexes[INDEX_COMBINE_DATA_COUNT]
+     *                                                 combiningTableTop may include one 16-bit padding unit
+     *                                                 to make sure that fcdTrie is 32-bit-aligned
+     *
+     * Trie fcdTrie;                            -- size in bytes=indexes[INDEX_FCD_TRIE_SIZE]
+     *
+     * Trie auxTrie;                            -- size in bytes=indexes[INDEX_AUX_TRIE_SIZE]
+     *
+     *
+     * The indexes array contains lengths and sizes of the following arrays and structures
+     * as well as the following values:
+     *  indexes[INDEX_COMBINE_FWD_COUNT]=combineFwdTop
+     *      -- one more than the highest combining index computed for forward-only-combining characters
+     *  indexes[INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop
+     *      -- number of combining indexes computed for both-ways-combining characters
+     *  indexes[INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop
+     *      -- number of combining indexes computed for backward-only-combining characters
+     *
+     *  indexes[INDEX_MIN_NF*_NO_MAYBE] (where *={ C, D, KC, KD })
+     *      -- first code point with a quick check NF* value of NO/MAYBE
+     *
+     *
+     * - Tries
+     *
+     * The main structures are two Trie tables ("compact arrays"),
+     * each with one index array and one data array.
+     * See Trie.h and Trie.c.
+     *
+     *
+     * - Tries in unorm.icu
+     *
+     * The first trie (normTrie above)
+     * provides data for the NF* quick checks and normalization.
+     * The second trie (fcdTrie above) provides data just for FCD checks.
+     *
+     *
+     * - norm32 data words from the first trie
+     *
+     * The norm32Table contains one 32-bit word "norm32" per code point.
+     * It contains the following bit fields:
+     * 31..16   extra data index, EXTRA_SHIFT is used to shift this field down
+     *          if this index is <EXTRA_INDEX_TOP then it is an index into
+     *              extraData[] where variable-length normalization data for this
+     *              code point is found
+     *          if this index is <EXTRA_INDEX_TOP+EXTRA_SURROGATE_TOP
+     *              then this is a norm32 for a leading surrogate, and the index
+     *              value is used together with the following trailing surrogate
+     *              code unit in the second trie access
+     *          if this index is >=EXTRA_INDEX_TOP+EXTRA_SURROGATE_TOP
+     *              then this is a norm32 for a "special" character,
+     *              i.e., the character is a Hangul syllable or a Jamo
+     *              see EXTRA_HANGUL etc.
+     *          generally, instead of extracting this index from the norm32 and
+     *              comparing it with the above constants,
+     *              the normalization code compares the entire norm32 value
+     *              with MIN_SPECIAL, SURROGATES_TOP, MIN_HANGUL etc.
+     *
+     * 15..8    combining class (cc) according to UnicodeData.txt
+     *
+     *  7..6    COMBINES_ANY flags, used in composition to see if a character
+     *              combines with any following or preceding character(s)
+     *              at all
+     *     7    COMBINES_BACK
+     *     6    COMBINES_FWD
+     *
+     *  5..0    quick check flags, set for "no" or "maybe", with separate flags for
+     *              each normalization form
+     *              the higher bits are "maybe" flags; for NF*D there are no such flags
+     *              the lower bits are "no" flags for all forms, in the same order
+     *              as the "maybe" flags,
+     *              which is (MSB to LSB): NFKD NFD NFKC NFC
+     *  5..4    QC_ANY_MAYBE
+     *  3..0    QC_ANY_NO
+     *              see further related constants
+     *
+     *
+     * - Extra data per code point
+     *
+     * "Extra data" is referenced by the index in norm32.
+     * It is variable-length data. It is only present, and only those parts
+     * of it are, as needed for a given character.
+     * The norm32 extra data index is added to the beginning of extraData[]
+     * to get to a vector of 16-bit words with data at the following offsets:
+     *
+     * [-1]     Combining index for composition.
+     *              Stored only if norm32&COMBINES_ANY .
+     * [0]      Lengths of the canonical and compatibility decomposition strings.
+     *              Stored only if there are decompositions, i.e.,
+     *              if norm32&(QC_NFD|QC_NFKD)
+     *          High byte: length of NFKD, or 0 if none
+     *          Low byte: length of NFD, or 0 if none
+     *          Each length byte also has another flag:
+     *              Bit 7 of a length byte is set if there are non-zero
+     *              combining classes (cc's) associated with the respective
+     *              decomposition. If this flag is set, then the decomposition
+     *              is preceded by a 16-bit word that contains the
+     *              leading and trailing cc's.
+     *              Bits 6..0 of a length byte are the length of the
+     *              decomposition string, not counting the cc word.
+     * [1..n]   NFD
+     * [n+1..]  NFKD
+     *
+     * Each of the two decompositions consists of up to two parts:
+     * - The 16-bit words with the leading and trailing cc's.
+     *   This is only stored if bit 7 of the corresponding length byte
+     *   is set. In this case, at least one of the cc's is not zero.
+     *   High byte: leading cc==cc of the first code point in the decomposition string
+     *   Low byte: trailing cc==cc of the last code point in the decomposition string
+     * - The decomposition string in UTF-16, with length code units.
+     *
+     *
+     * - Combining indexes and combiningTable[]
+     *
+     * Combining indexes are stored at the [-1] offset of the extra data
+     * if the character combines forward or backward with any other characters.
+     * They are used for (re)composition in NF*C.
+     * Values of combining indexes are arranged according to whether a character
+     * combines forward, backward, or both ways:
+     *    forward-only < both ways < backward-only
+     *
+     * The index values for forward-only and both-ways combining characters
+     * are indexes into the combiningTable[].
+     * The index values for backward-only combining characters are simply
+     * incremented from the preceding index values to be unique.
+     *
+     * In the combiningTable[], a variable-length list
+     * of variable-length (back-index, code point) pair entries is stored
+     * for each forward-combining character.
+     *
+     * These back-indexes are the combining indexes of both-ways or backward-only
+     * combining characters that the forward-combining character combines with.
+     *
+     * Each list is sorted in ascending order of back-indexes.
+     * Each list is terminated with the last back-index having bit 15 set.
+     *
+     * Each pair (back-index, code point) takes up either 2 or 3
+     * 16-bit words.
+     * The first word of a list entry is the back-index, with its bit 15 set if
+     * this is the last pair in the list.
+     *
+     * The second word contains flags in bits 15..13 that determine
+     * if there is a third word and how the combined character is encoded:
+     * 15   set if there is a third word in this list entry
+     * 14   set if the result is a supplementary character
+     * 13   set if the result itself combines forward
+     *
+     * According to these bits 15..14 of the second word,
+     * the result character is encoded as follows:
+     * 00 or 01 The result is <=0x1fff and stored in bits 12..0 of
+     *          the second word.
+     * 10       The result is 0x2000..0xffff and stored in the third word.
+     *          Bits 12..0 of the second word are not used.
+     * 11       The result is a supplementary character.
+     *          Bits 9..0 of the leading surrogate are in bits 9..0 of
+     *          the second word.
+     *          Add 0xd800 to these bits to get the complete surrogate.
+     *          Bits 12..10 of the second word are not used.
+     *          The trailing surrogate is stored in the third word.
+     *
+     *
+     * - FCD trie
+     *
+     * The FCD trie is very simple.
+     * It is a folded trie with 16-bit data words.
+     * In each word, the high byte contains the leading cc of the character,
+     * and the low byte contains the trailing cc of the character.
+     * These cc's are the cc's of the first and last code points in the
+     * canonical decomposition of the character.
+     *
+     * Since all 16 bits are used for cc's, lead surrogates must be tested
+     * by checking the code unit instead of the trie data.
+     * This is done only if the 16-bit data word is not zero.
+     * If the code unit is a leading surrogate and the data word is not zero,
+     * then instead of cc's it contains the offset for the second trie lookup.
+     *
+     *
+     * - Auxiliary trie and data
+     *
+     *
+     * The auxiliary 16-bit trie contains data for additional properties.
+     * Bits
+     * 15..13   reserved
+     *     12   not NFC_Skippable (f) (formatVersion>=2.2)
+     *     11   flag: not a safe starter for canonical closure
+     *     10   composition exclusion
+     *  9.. 0   index into extraData[] to FC_NFKC_Closure string
+     *          (not for lead surrogate),
+     *          or lead surrogate offset (for lead surrogate, if 9..0 not zero)
+     *
+     * Conditions for "NF* Skippable" from Mark Davis' com.ibm.text.UCD.NFSkippable:
+     * (used in NormalizerTransliterator)
+     *
+     * A skippable character is
+     * a) unassigned, or ALL of the following:
+     * b) of combining class 0.
+     * c) not decomposed by this normalization form.
+     * AND if NFC or NFKC,
+     * d) can never compose with a previous character.
+     * e) can never compose with a following character.
+     * f) can never change if another character is added.
+     *    Example: a-breve might satisfy all but f, but if you
+     *    add an ogonek it changes to a-ogonek + breve
+     *
+     * a)..e) must be tested from norm32.
+     * Since f) is more complicated, the (not-)NFC_Skippable flag (f) is built
+     * into the auxiliary trie.
+     * The same bit is used for NFC and NFKC; (c) differs for them.
+     * As usual, we build the "not skippable" flags so that unassigned
+     * code points get a 0 bit.
+     * This bit is only valid after (a)..(e) test FALSE; test NFD_NO before (f) as well.
+     * Test Hangul LV syllables entirely in code.
+     *
+     *
+     * - FC_NFKC_Closure strings in extraData[]
+     *
+     * Strings are either stored as a single code unit or as the length
+     * followed by that many units.
+     *
+     */
+final class NormalizerDataReader implements ICUBinary.Authenticate {
+
+   /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication
+    * @draft 2.1
+    */
+    protected NormalizerDataReader(InputStream inputStream)
+                                        throws IOException{
+
+        unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+        dataInputStream = new DataInputStream(inputStream);
+    }
+
+    // protected methods -------------------------------------------------
+
+    protected int[] readIndexes(int length)throws IOException{
+        int[] indexes = new int[length];
+        //Read the indexes
+        for (int i = 0; i <length ; i++) {
+             indexes[i] = dataInputStream.readInt();
+        }
+        return indexes;
+    }
+    /**
+    * <p>Reads unorm.icu, parse it into blocks of data to be stored in
+    * NormalizerImpl.</P
+    * @param normBytes
+    * @param fcdBytes
+    * @param auxBytes
+    * @param extraData
+    * @param combiningTable
+    * @exception thrown when data reading fails
+    * @draft 2.1
+    */
+    protected void read(byte[] normBytes, byte[] fcdBytes, byte[] auxBytes,
+                        char[] extraData, char[] combiningTable)
+                        throws IOException{
+
+         //Read the bytes that make up the normTrie
+         dataInputStream.readFully(normBytes);
+
+         //normTrieStream= new ByteArrayInputStream(normBytes);
+
+         //Read the extra data
+         for(int i=0;i<extraData.length;i++){
+             extraData[i]=dataInputStream.readChar();
+         }
+
+         //Read the combining class table
+         for(int i=0; i<combiningTable.length; i++){
+             combiningTable[i]=dataInputStream.readChar();
+         }
+
+         //Read the fcdTrie
+         dataInputStream.readFully(fcdBytes);
+
+
+         //Read the AuxTrie
+        dataInputStream.readFully(auxBytes);
+    }
+
+    public byte[] getDataFormatVersion(){
+        return DATA_FORMAT_VERSION;
+    }
+
+    public boolean isDataVersionAcceptable(byte version[])
+    {
+        return version[0] == DATA_FORMAT_VERSION[0]
+               && version[2] == DATA_FORMAT_VERSION[2]
+               && version[3] == DATA_FORMAT_VERSION[3];
+    }
+
+    public byte[] getUnicodeVersion(){
+        return unicodeVersion;
+    }
+    // private data members -------------------------------------------------
+
+
+    /**
+    * ICU data file input stream
+    */
+    private DataInputStream dataInputStream;
+
+    private byte[] unicodeVersion;
+
+    /**
+    * File format version that this class understands.
+    * No guarantees are made if a older version is used
+    * see store.c of gennorm for more information and values
+    */
+    private static final byte DATA_FORMAT_ID[] = {(byte)0x4E, (byte)0x6F,
+                                                    (byte)0x72, (byte)0x6D};
+    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x2, (byte)0x2,
+                                                        (byte)0x5, (byte)0x2};
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/NormalizerImpl.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/NormalizerImpl.java
--- a/jdkSrc/jdk8/sun/text/normalizer/RangeValueIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/RangeValueIterator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <p>Interface for enabling iteration over sets of <int index, int value>,
+ * where index is the sorted integer index in ascending order and value, its
+ * associated integer value.</p>
+ * <p>The result for each iteration is the consecutive range of
+ * <int index, int value> with the same value. Result is represented by
+ * <start, limit, value> where</p>
+ * <ul>
+ * <li> start is the starting integer of the result range
+ * <li> limit is 1 after the maximum integer that follows start, such that
+ *      all integers between start and (limit - 1), inclusive, have the same
+ *      associated integer value.
+ * <li> value is the integer value that all integers from start to (limit - 1)
+ *      share in common.
+ * </ul>
+ * <p>
+ * Hence value(start) = value(start + 1) = .... = value(start + n) = .... =
+ * value(limit - 1). However value(start -1) != value(start) and
+ * value(limit) != value(start).
+ * </p>
+ * <p>Most implementations will be created by factory methods, such as the
+ * character type iterator in UCharacter.getTypeIterator. See example below.
+ * </p>
+ * Example of use:<br>
+ * <pre>
+ * RangeValueIterator iterator = UCharacter.getTypeIterator();
+ * RangeValueIterator.Element result = new RangeValueIterator.Element();
+ * while (iterator.next(result)) {
+ *     System.out.println("Codepoint \\u" +
+ *                        Integer.toHexString(result.start) +
+ *                        " to codepoint \\u" +
+ *                        Integer.toHexString(result.limit - 1) +
+ *                        " has the character type " + result.value);
+ * }
+ * </pre>
+ * @author synwee
+ * @stable ICU 2.6
+ */
+public interface RangeValueIterator
+{
+    // public inner class ---------------------------------------------
+
+    /**
+    * Return result wrapper for com.ibm.icu.util.RangeValueIterator.
+    * Stores the start and limit of the continous result range and the
+    * common value all integers between [start, limit - 1] has.
+    * @stable ICU 2.6
+    */
+    public class Element
+    {
+        // public data member ---------------------------------------------
+
+        /**
+        * Starting integer of the continuous result range that has the same
+        * value
+        * @stable ICU 2.6
+        */
+        public int start;
+        /**
+        * (End + 1) integer of continuous result range that has the same
+        * value
+        * @stable ICU 2.6
+        */
+        public int limit;
+        /**
+        * Gets the common value of the continous result range
+        * @stable ICU 2.6
+        */
+        public int value;
+
+        // public constructor --------------------------------------------
+
+        /**
+         * Empty default constructor to make javadoc happy
+         * @stable ICU 2.4
+         */
+        public Element()
+        {
+        }
+    }
+
+    // public methods -------------------------------------------------
+
+    /**
+    * <p>Gets the next maximal result range with a common value and returns
+    * true if we are not at the end of the iteration, false otherwise.</p>
+    * <p>If the return boolean is a false, the contents of elements will not
+    * be updated.</p>
+    * @param element for storing the result range and value
+    * @return true if we are not at the end of the iteration, false otherwise.
+    * @see Element
+    * @stable ICU 2.6
+    */
+    public boolean next(Element element);
+
+    /**
+    * Resets the iterator to the beginning of the iteration.
+    * @stable ICU 2.6
+    */
+    public void reset();
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/Replaceable.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/Replaceable.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <code>Replaceable</code> is an interface representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters.  It is used by APIs that
+ * change a piece of text while retaining metadata.  Metadata is data
+ * other than the Unicode characters returned by char32At().  One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters.  For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset.  The range of characters thus specified
+ * includes the characters at offset start..limit-1.  That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ *   <li>Set the metadata of the new text to the metadata of the first
+ *   character replaced</li>
+ *   <li>If no characters are replaced, use the metadata of the
+ *   previous character</li>
+ *   <li>If there is no previous character (i.e. start == 0), use the
+ *   following character</li>
+ *   <li>If there is no following character (i.e. the replaceable was
+ *   empty), use default metadata<br>
+ *   <li>If the code point U+FFFF is seen, it should be interpreted as
+ *   a special marker having no metadata<li>
+ *   </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public interface Replaceable {
+    /**
+     * Returns the number of 16-bit code units in the text.
+     * @return number of 16-bit code units in text
+     * @stable ICU 2.0
+     */
+    int length();
+
+    /**
+     * Returns the 16-bit code unit at the given offset into the text.
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return 16-bit code unit of text at given offset
+     * @stable ICU 2.0
+     */
+    char charAt(int offset);
+
+    //// for StringPrep
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     * @stable ICU 2.0
+     */
+    void getChars(int srcStart, int srcLimit, char dst[], int dstStart);
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/ReplaceableString.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/ReplaceableString.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <code>ReplaceableString</code> is an adapter class that implements the
+ * <code>Replaceable</code> API around an ordinary <code>StringBuffer</code>.
+ *
+ * <p><em>Note:</em> This class does not support attributes and is not
+ * intended for general use.  Most clients will need to implement
+ * {@link Replaceable} in their text representation class.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @see Replaceable
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public class ReplaceableString implements Replaceable {
+
+    private StringBuffer buf;
+
+    /**
+     * Construct a new object with the given initial contents.
+     * @param str initial contents
+     * @stable ICU 2.0
+     */
+    public ReplaceableString(String str) {
+        buf = new StringBuffer(str);
+    }
+
+    //// for StringPrep
+    /**
+     * Construct a new object using <code>buf</code> for internal
+     * storage.  The contents of <code>buf</code> at the time of
+     * construction are used as the initial contents.  <em>Note!
+     * Modifications to <code>buf</code> will modify this object, and
+     * vice versa.</em>
+     * @param buf object to be used as internal storage
+     * @stable ICU 2.0
+     */
+    public ReplaceableString(StringBuffer buf) {
+        this.buf = buf;
+    }
+
+    /**
+     * Return the number of characters contained in this object.
+     * <code>Replaceable</code> API.
+     * @stable ICU 2.0
+     */
+    public int length() {
+        return buf.length();
+    }
+
+    /**
+     * Return the character at the given position in this object.
+     * <code>Replaceable</code> API.
+     * @param offset offset into the contents, from 0 to
+     * <code>length()</code> - 1
+     * @stable ICU 2.0
+     */
+    public char charAt(int offset) {
+        return buf.charAt(offset);
+    }
+
+    //// for StringPrep
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     * @stable ICU 2.0
+     */
+    public void getChars(int srcStart, int srcLimit, char dst[], int dstStart) {
+        Utility.getChars(buf, srcStart, srcLimit, dst, dstStart);
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/ReplaceableUCharacterIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/ReplaceableUCharacterIterator.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * DLF docs must define behavior when Replaceable is mutated underneath
+ * the iterator.
+ *
+ * This and ICUCharacterIterator share some code, maybe they should share
+ * an implementation, or the common state and implementation should be
+ * moved up into UCharacterIterator.
+ *
+ * What are first, last, and getBeginIndex doing here?!?!?!
+ */
+public class ReplaceableUCharacterIterator extends UCharacterIterator {
+
+    // public constructor ------------------------------------------------------
+
+    /**
+     * Public constructor
+     * @param str text which the iterator will be based on
+     */
+    public ReplaceableUCharacterIterator(String str){
+        if(str==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(str);
+        this.currentIndex = 0;
+    }
+
+    //// for StringPrep
+    /**
+     * Public constructor
+     * @param buf buffer of text on which the iterator will be based
+     */
+    public ReplaceableUCharacterIterator(StringBuffer buf){
+        if(buf==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(buf);
+        this.currentIndex = 0;
+    }
+
+    // public methods ----------------------------------------------------------
+
+    /**
+     * Creates a copy of this iterator, does not clone the underlying
+     * <code>Replaceable</code>object
+     * @return copy of this iterator
+     */
+    public Object clone(){
+        try {
+          return super.clone();
+        } catch (CloneNotSupportedException e) {
+            return null; // never invoked
+        }
+    }
+
+    /**
+     * Returns the current UTF16 character.
+     * @return current UTF16 character
+     */
+    public int current(){
+        if (currentIndex < replaceable.length()) {
+            return replaceable.charAt(currentIndex);
+        }
+        return DONE;
+    }
+
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     */
+    public int getLength(){
+        return replaceable.length();
+    }
+
+    /**
+     * Gets the current currentIndex in text.
+     * @return current currentIndex in text.
+     */
+    public int getIndex(){
+        return currentIndex;
+    }
+
+    /**
+     * Returns next UTF16 character and increments the iterator's currentIndex by 1.
+     * If the resulting currentIndex is greater or equal to the text length, the
+     * currentIndex is reset to the text length and a value of DONECODEPOINT is
+     * returned.
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the
+     *         end of the text range.
+     */
+    public int next(){
+        if (currentIndex < replaceable.length()) {
+            return replaceable.charAt(currentIndex++);
+        }
+        return DONE;
+    }
+
+
+    /**
+     * Returns previous UTF16 character and decrements the iterator's currentIndex by
+     * 1.
+     * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a
+     * value of DONECODEPOINT is returned.
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the
+     *         start of the text range.
+     */
+    public int previous(){
+        if (currentIndex > 0) {
+            return replaceable.charAt(--currentIndex);
+        }
+        return DONE;
+    }
+
+    /**
+     * <p>Sets the currentIndex to the specified currentIndex in the text and returns that
+     * single UTF16 character at currentIndex.
+     * This assumes the text is stored as 16-bit code units.</p>
+     * @param currentIndex the currentIndex within the text.
+     * @exception IllegalArgumentException is thrown if an invalid currentIndex is
+     *            supplied. i.e. currentIndex is out of bounds.
+     * @return the character at the specified currentIndex or DONE if the specified
+     *         currentIndex is equal to the end of the text.
+     */
+    public void setIndex(int currentIndex) {
+        if (currentIndex < 0 || currentIndex > replaceable.length()) {
+            throw new IllegalArgumentException();
+        }
+        this.currentIndex = currentIndex;
+    }
+
+    //// for StringPrep
+    public int getText(char[] fillIn, int offset){
+        int length = replaceable.length();
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+        replaceable.getChars(0,length,fillIn,offset);
+        return length;
+    }
+
+    // private data members ----------------------------------------------------
+
+    /**
+     * Replaceable object
+     */
+    private Replaceable replaceable;
+    /**
+     * Current currentIndex
+     */
+    private int currentIndex;
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/RuleCharacterIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/RuleCharacterIterator.java
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+/*
+ **********************************************************************
+ * Author: Alan Liu
+ * Created: September 23 2003
+ * Since: ICU 2.8
+ **********************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.text.ParsePosition;
+
+/**
+ * An iterator that returns 32-bit code points.  This class is deliberately
+ * <em>not</em> related to any of the JDK or ICU4J character iterator classes
+ * in order to minimize complexity.
+ * @author Alan Liu
+ * @since ICU 2.8
+ */
+public class RuleCharacterIterator {
+
+    // TODO: Ideas for later.  (Do not implement if not needed, lest the
+    // code coverage numbers go down due to unused methods.)
+    // 1. Add a copy constructor, equals() method, clone() method.
+    // 2. Rather than return DONE, throw an exception if the end
+    // is reached -- this is an alternate usage model, probably not useful.
+    // 3. Return isEscaped from next().  If this happens,
+    // don't keep an isEscaped member variable.
+
+    /**
+     * Text being iterated.
+     */
+    private String text;
+
+    /**
+     * Position of iterator.
+     */
+    private ParsePosition pos;
+
+    /**
+     * Symbol table used to parse and dereference variables.  May be null.
+     */
+    private SymbolTable sym;
+
+    /**
+     * Current variable expansion, or null if none.
+     */
+    private char[] buf;
+
+    /**
+     * Position within buf[].  Meaningless if buf == null.
+     */
+    private int bufPos;
+
+    /**
+     * Flag indicating whether the last character was parsed from an escape.
+     */
+    private boolean isEscaped;
+
+    /**
+     * Value returned when there are no more characters to iterate.
+     */
+    public static final int DONE = -1;
+
+    /**
+     * Bitmask option to enable parsing of variable names.  If (options &
+     * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
+     * its value.  Variables are parsed using the SymbolTable API.
+     */
+    public static final int PARSE_VARIABLES = 1;
+
+    /**
+     * Bitmask option to enable parsing of escape sequences.  If (options &
+     * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
+     * to its value.  Escapes are parsed using Utility.unescapeAt().
+     */
+    public static final int PARSE_ESCAPES   = 2;
+
+    /**
+     * Bitmask option to enable skipping of whitespace.  If (options &
+     * SKIP_WHITESPACE) != 0, then whitespace characters will be silently
+     * skipped, as if they were not present in the input.  Whitespace
+     * characters are defined by UCharacterProperty.isRuleWhiteSpace().
+     */
+    public static final int SKIP_WHITESPACE = 4;
+
+    /**
+     * Constructs an iterator over the given text, starting at the given
+     * position.
+     * @param text the text to be iterated
+     * @param sym the symbol table, or null if there is none.  If sym is null,
+     * then variables will not be deferenced, even if the PARSE_VARIABLES
+     * option is set.
+     * @param pos upon input, the index of the next character to return.  If a
+     * variable has been dereferenced, then pos will <em>not</em> increment as
+     * characters of the variable value are iterated.
+     */
+    public RuleCharacterIterator(String text, SymbolTable sym,
+                                 ParsePosition pos) {
+        if (text == null || pos.getIndex() > text.length()) {
+            throw new IllegalArgumentException();
+        }
+        this.text = text;
+        this.sym = sym;
+        this.pos = pos;
+        buf = null;
+    }
+
+    /**
+     * Returns true if this iterator has no more characters to return.
+     */
+    public boolean atEnd() {
+        return buf == null && pos.getIndex() == text.length();
+    }
+
+    /**
+     * Returns the next character using the given options, or DONE if there
+     * are no more characters, and advance the position to the next
+     * character.
+     * @param options one or more of the following options, bitwise-OR-ed
+     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
+     * @return the current 32-bit code point, or DONE
+     */
+    public int next(int options) {
+        int c = DONE;
+        isEscaped = false;
+
+        for (;;) {
+            c = _current();
+            _advance(UTF16.getCharCount(c));
+
+            if (c == SymbolTable.SYMBOL_REF && buf == null &&
+                (options & PARSE_VARIABLES) != 0 && sym != null) {
+                String name = sym.parseReference(text, pos, text.length());
+                // If name == null there was an isolated SYMBOL_REF;
+                // return it.  Caller must be prepared for this.
+                if (name == null) {
+                    break;
+                }
+                bufPos = 0;
+                buf = sym.lookup(name);
+                if (buf == null) {
+                    throw new IllegalArgumentException(
+                                "Undefined variable: " + name);
+                }
+                // Handle empty variable value
+                if (buf.length == 0) {
+                    buf = null;
+                }
+                continue;
+            }
+
+            if ((options & SKIP_WHITESPACE) != 0 &&
+                UCharacterProperty.isRuleWhiteSpace(c)) {
+                continue;
+            }
+
+            if (c == '\\' && (options & PARSE_ESCAPES) != 0) {
+                int offset[] = new int[] { 0 };
+                c = Utility.unescapeAt(lookahead(), offset);
+                jumpahead(offset[0]);
+                isEscaped = true;
+                if (c < 0) {
+                    throw new IllegalArgumentException("Invalid escape");
+                }
+            }
+
+            break;
+        }
+
+        return c;
+    }
+
+    /**
+     * Returns true if the last character returned by next() was
+     * escaped.  This will only be the case if the option passed in to
+     * next() included PARSE_ESCAPED and the next character was an
+     * escape sequence.
+     */
+    public boolean isEscaped() {
+        return isEscaped;
+    }
+
+    /**
+     * Returns true if this iterator is currently within a variable expansion.
+     */
+    public boolean inVariable() {
+        return buf != null;
+    }
+
+    /**
+     * Returns an object which, when later passed to setPos(), will
+     * restore this iterator's position.  Usage idiom:
+     *
+     * RuleCharacterIterator iterator = ...;
+     * Object pos = iterator.getPos(null); // allocate position object
+     * for (;;) {
+     *   pos = iterator.getPos(pos); // reuse position object
+     *   int c = iterator.next(...);
+     *   ...
+     * }
+     * iterator.setPos(pos);
+     *
+     * @param p a position object previously returned by getPos(),
+     * or null.  If not null, it will be updated and returned.  If
+     * null, a new position object will be allocated and returned.
+     * @return a position object which may be passed to setPos(),
+     * either `p,' or if `p' == null, a newly-allocated object
+     */
+    public Object getPos(Object p) {
+        if (p == null) {
+            return new Object[] {buf, new int[] {pos.getIndex(), bufPos}};
+        }
+        Object[] a = (Object[]) p;
+        a[0] = buf;
+        int[] v = (int[]) a[1];
+        v[0] = pos.getIndex();
+        v[1] = bufPos;
+        return p;
+    }
+
+    /**
+     * Restores this iterator to the position it had when getPos()
+     * returned the given object.
+     * @param p a position object previously returned by getPos()
+     */
+    public void setPos(Object p) {
+        Object[] a = (Object[]) p;
+        buf = (char[]) a[0];
+        int[] v = (int[]) a[1];
+        pos.setIndex(v[0]);
+        bufPos = v[1];
+    }
+
+    /**
+     * Skips ahead past any ignored characters, as indicated by the given
+     * options.  This is useful in conjunction with the lookahead() method.
+     *
+     * Currently, this only has an effect for SKIP_WHITESPACE.
+     * @param options one or more of the following options, bitwise-OR-ed
+     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
+     */
+    public void skipIgnored(int options) {
+        if ((options & SKIP_WHITESPACE) != 0) {
+            for (;;) {
+                int a = _current();
+                if (!UCharacterProperty.isRuleWhiteSpace(a)) break;
+                _advance(UTF16.getCharCount(a));
+            }
+        }
+    }
+
+    /**
+     * Returns a string containing the remainder of the characters to be
+     * returned by this iterator, without any option processing.  If the
+     * iterator is currently within a variable expansion, this will only
+     * extend to the end of the variable expansion.  This method is provided
+     * so that iterators may interoperate with string-based APIs.  The typical
+     * sequence of calls is to call skipIgnored(), then call lookahead(), then
+     * parse the string returned by lookahead(), then call jumpahead() to
+     * resynchronize the iterator.
+     * @return a string containing the characters to be returned by future
+     * calls to next()
+     */
+    public String lookahead() {
+        if (buf != null) {
+            return new String(buf, bufPos, buf.length - bufPos);
+        } else {
+            return text.substring(pos.getIndex());
+        }
+    }
+
+    /**
+     * Advances the position by the given number of 16-bit code units.
+     * This is useful in conjunction with the lookahead() method.
+     * @param count the number of 16-bit code units to jump over
+     */
+    public void jumpahead(int count) {
+        if (count < 0) {
+            throw new IllegalArgumentException();
+        }
+        if (buf != null) {
+            bufPos += count;
+            if (bufPos > buf.length) {
+                throw new IllegalArgumentException();
+            }
+            if (bufPos == buf.length) {
+                buf = null;
+            }
+        } else {
+            int i = pos.getIndex() + count;
+            pos.setIndex(i);
+            if (i > text.length()) {
+                throw new IllegalArgumentException();
+            }
+        }
+    }
+
+    /**
+     * Returns the current 32-bit code point without parsing escapes, parsing
+     * variables, or skipping whitespace.
+     * @return the current 32-bit code point
+     */
+    private int _current() {
+        if (buf != null) {
+            return UTF16.charAt(buf, 0, buf.length, bufPos);
+        } else {
+            int i = pos.getIndex();
+            return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
+        }
+    }
+
+    /**
+     * Advances the position by the given amount.
+     * @param count the number of 16-bit code units to advance past
+     */
+    private void _advance(int count) {
+        if (buf != null) {
+            bufPos += count;
+            if (bufPos == buf.length) {
+                buf = null;
+            }
+        } else {
+            pos.setIndex(pos.getIndex() + count);
+            if (pos.getIndex() > text.length()) {
+                pos.setIndex(text.length());
+            }
+        }
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/SymbolTable.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/SymbolTable.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.text.ParsePosition;
+
+/**
+ * An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ *
+ * <p>A symbol table maintains two kinds of mappings.  The first is
+ * between symbolic names and their values.  For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * <p>The second kind of mapping is between character values and
+ * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * <p>Finally, a symbol table defines parsing behavior for symbolic
+ * names.  All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF.  The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @draft ICU 2.8
+ * @deprecated This is a draft API and might change in a future release of ICU.
+ */
+@Deprecated
+public interface SymbolTable {
+
+    /**
+     * The character preceding a symbol reference name.
+     * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+    @Deprecated
+    static final char SYMBOL_REF = '$';
+
+    /**
+     * Lookup the characters associated with this string and return it.
+     * Return <tt>null</tt> if no such name exists.  The resultant
+     * array may have length zero.
+     * @param s the symbolic name to lookup
+     * @return a char array containing the name's value, or null if
+     * there is no mapping for s.
+     * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+    @Deprecated
+    char[] lookup(String s);
+
+    /**
+     * Lookup the UnicodeMatcher associated with the given character, and
+     * return it.  Return <tt>null</tt> if not found.
+     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+     * @return the UnicodeMatcher object represented by the given
+     * character, or null if there is no mapping for ch.
+     * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+    @Deprecated
+    UnicodeMatcher lookupMatcher(int ch);
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, return null and leave pos unchanged.  That is, if the
+     * character at pos cannot start a name, or if pos is at or after
+     * text.length(), then return null.  This indicates an isolated
+     * SYMBOL_REF character.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.  If the parse
+     * failed, pos is unchanged on exit.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name, or null if there is no valid symbolic
+     * name at the given position.
+     * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+    @Deprecated
+    String parseReference(String text, ParsePosition pos, int limit);
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/Trie.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/Trie.java
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * <p>A trie is a kind of compressed, serializable table of values
+ * associated with Unicode code points (0..0x10ffff).</p>
+ * <p>This class defines the basic structure of a trie and provides methods
+ * to <b>retrieve the offsets to the actual data</b>.</p>
+ * <p>Data will be the form of an array of basic types, char or int.</p>
+ * <p>The actual data format will have to be specified by the user in the
+ * inner static interface com.ibm.icu.impl.Trie.DataManipulate.</p>
+ * <p>This trie implementation is optimized for getting offset while walking
+ * forward through a UTF-16 string.
+ * Therefore, the simplest and fastest access macros are the
+ * fromLead() and fromOffsetTrail() methods.
+ * The fromBMP() method are a little more complicated; they get offsets even
+ * for lead surrogate codepoints, while the fromLead() method get special
+ * "folded" offsets for lead surrogate code units if there is relevant data
+ * associated with them.
+ * From such a folded offsets, an offset needs to be extracted to supply
+ * to the fromOffsetTrail() methods.
+ * To handle such supplementary codepoints, some offset information are kept
+ * in the data.</p>
+ * <p>Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve
+ * that offset from the folded value for the lead surrogate unit.</p>
+ * <p>For examples of use, see com.ibm.icu.impl.CharTrie or
+ * com.ibm.icu.impl.IntTrie.</p>
+ * @author synwee
+ * @see com.ibm.icu.impl.CharTrie
+ * @see com.ibm.icu.impl.IntTrie
+ * @since release 2.1, Jan 01 2002
+ */
+public abstract class Trie
+{
+    // public class declaration ----------------------------------------
+
+    /**
+    * Character data in com.ibm.impl.Trie have different user-specified format
+    * for different purposes.
+    * This interface specifies methods to be implemented in order for
+    * com.ibm.impl.Trie, to surrogate offset information encapsulated within
+    * the data.
+    */
+    public static interface DataManipulate
+    {
+        /**
+        * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's
+        * data
+        * the index array offset of the indexes for that lead surrogate.
+        * @param value data value for a surrogate from the trie, including the
+        *        folding offset
+        * @return data offset or 0 if there is no data for the lead surrogate
+        */
+        public int getFoldingOffset(int value);
+    }
+
+    // default implementation
+    private static class DefaultGetFoldingOffset implements DataManipulate {
+        public int getFoldingOffset(int value) {
+            return value;
+        }
+    }
+
+    // protected constructor -------------------------------------------
+
+    /**
+    * Trie constructor for CharTrie use.
+    * @param inputStream ICU data file input stream which contains the
+    *                        trie
+    * @param dataManipulate object containing the information to parse the
+    *                       trie data
+    * @throws IOException thrown when input stream does not have the
+    *                        right header.
+    */
+    protected Trie(InputStream inputStream,
+                   DataManipulate  dataManipulate) throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        // Magic number to authenticate the data.
+        int signature = input.readInt();
+        m_options_    = input.readInt();
+
+        if (!checkHeader(signature)) {
+            throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file");
+        }
+
+        if(dataManipulate != null) {
+            m_dataManipulate_ = dataManipulate;
+        } else {
+            m_dataManipulate_ = new DefaultGetFoldingOffset();
+        }
+        m_isLatin1Linear_ = (m_options_ &
+                             HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
+        m_dataOffset_     = input.readInt();
+        m_dataLength_     = input.readInt();
+        unserialize(inputStream);
+    }
+
+    /**
+    * Trie constructor
+    * @param index array to be used for index
+    * @param options used by the trie
+    * @param dataManipulate object containing the information to parse the
+    *                       trie data
+    */
+    protected Trie(char index[], int options, DataManipulate dataManipulate)
+    {
+        m_options_ = options;
+        if(dataManipulate != null) {
+            m_dataManipulate_ = dataManipulate;
+        } else {
+            m_dataManipulate_ = new DefaultGetFoldingOffset();
+        }
+        m_isLatin1Linear_ = (m_options_ &
+                             HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
+        m_index_ = index;
+        m_dataOffset_ = m_index_.length;
+    }
+
+    // protected data members ------------------------------------------
+
+    /**
+    * Lead surrogate code points' index displacement in the index array.
+    * 0x10000-0xd800=0x2800
+    * 0x2800 >> INDEX_STAGE_1_SHIFT_
+    */
+    protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
+    /**
+    * Shift size for shifting right the input index. 1..9
+    */
+    protected static final int INDEX_STAGE_1_SHIFT_ = 5;
+    /**
+    * Shift size for shifting left the index array values.
+    * Increases possible data size with 16-bit index values at the cost
+    * of compactability.
+    * This requires blocks of stage 2 data to be aligned by
+    * DATA_GRANULARITY.
+    * 0..INDEX_STAGE_1_SHIFT
+    */
+    protected static final int INDEX_STAGE_2_SHIFT_ = 2;
+    /**
+     * Number of data values in a stage 2 (data array) block.
+     */
+    protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
+    /**
+    * Mask for getting the lower bits from the input index.
+    * DATA_BLOCK_LENGTH - 1.
+    */
+    protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
+    /** Number of bits of a trail surrogate that are used in index table lookups. */
+    protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
+    /**
+     * Number of index (stage 1) entries per lead surrogate.
+     * Same as number of index entries for 1024 trail surrogates,
+     * ==0x400>>INDEX_STAGE_1_SHIFT_
+     */
+    protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
+    /** Length of the BMP portion of the index (stage 1) array. */
+    protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
+    /**
+    * Surrogate mask to use when shifting offset to retrieve supplementary
+    * values
+    */
+    protected static final int SURROGATE_MASK_ = 0x3FF;
+    /**
+    * Index or UTF16 characters
+    */
+    protected char m_index_[];
+    /**
+    * Internal TrieValue which handles the parsing of the data value.
+    * This class is to be implemented by the user
+    */
+    protected DataManipulate m_dataManipulate_;
+    /**
+    * Start index of the data portion of the trie. CharTrie combines
+    * index and data into a char array, so this is used to indicate the
+    * initial offset to the data portion.
+    * Note this index always points to the initial value.
+    */
+    protected int m_dataOffset_;
+    /**
+    * Length of the data array
+    */
+    protected int m_dataLength_;
+
+    // protected methods -----------------------------------------------
+
+    /**
+    * Gets the offset to the data which the surrogate pair points to.
+    * @param lead lead surrogate
+    * @param trail trailing surrogate
+    * @return offset to data
+    */
+    protected abstract int getSurrogateOffset(char lead, char trail);
+
+    /**
+    * Gets the value at the argument index
+    * @param index value at index will be retrieved
+    * @return 32 bit value
+    */
+    protected abstract int getValue(int index);
+
+    /**
+    * Gets the default initial value
+    * @return 32 bit value
+    */
+    protected abstract int getInitialValue();
+
+    /**
+    * Gets the offset to the data which the index ch after variable offset
+    * points to.
+    * Note for locating a non-supplementary character data offset, calling
+    * <p>
+    * getRawOffset(0, ch);
+    * </p>
+    * will do. Otherwise if it is a supplementary character formed by
+    * surrogates lead and trail. Then we would have to call getRawOffset()
+    * with getFoldingIndexOffset(). See getSurrogateOffset().
+    * @param offset index offset which ch is to start from
+    * @param ch index to be used after offset
+    * @return offset to the data
+    */
+    protected final int getRawOffset(int offset, char ch)
+    {
+        return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)]
+                << INDEX_STAGE_2_SHIFT_)
+                + (ch & INDEX_STAGE_3_MASK_);
+    }
+
+    /**
+    * Gets the offset to data which the BMP character points to
+    * Treats a lead surrogate as a normal code point.
+    * @param ch BMP character
+    * @return offset to data
+    */
+    protected final int getBMPOffset(char ch)
+    {
+        return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
+                && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
+                ? getRawOffset(LEAD_INDEX_OFFSET_, ch)
+                : getRawOffset(0, ch);
+                // using a getRawOffset(ch) makes no diff
+    }
+
+    /**
+    * Gets the offset to the data which this lead surrogate character points
+    * to.
+    * Data at the returned offset may contain folding offset information for
+    * the next trailing surrogate character.
+    * @param ch lead surrogate character
+    * @return offset to data
+    */
+    protected final int getLeadOffset(char ch)
+    {
+       return getRawOffset(0, ch);
+    }
+
+    /**
+    * Internal trie getter from a code point.
+    * Could be faster(?) but longer with
+    *   if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
+    * Gets the offset to data which the codepoint points to
+    * @param ch codepoint
+    * @return offset to data
+    */
+    protected final int getCodePointOffset(int ch)
+    {
+        // if ((ch >> 16) == 0) slower
+        if (ch < 0) {
+            return -1;
+        } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
+            return getRawOffset(0, (char)ch);
+        } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
+            // BMP codepoint
+            return getBMPOffset((char)ch);
+        } else if (ch <= UCharacter.MAX_VALUE) {
+            // look at the construction of supplementary characters
+            // trail forms the ends of it.
+            return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
+                                      (char)(ch & SURROGATE_MASK_));
+        } else {
+            // return -1 // if there is an error, in this case we return
+            return -1;
+        }
+    }
+
+    /**
+    * <p>Parses the inputstream and creates the trie index with it.</p>
+    * <p>This is overwritten by the child classes.
+    * @param inputStream input stream containing the trie information
+    * @exception IOException thrown when data reading fails.
+    */
+    protected void unserialize(InputStream inputStream) throws IOException
+    {
+        //indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_
+        m_index_              = new char[m_dataOffset_];
+        DataInputStream input = new DataInputStream(inputStream);
+        for (int i = 0; i < m_dataOffset_; i ++) {
+             m_index_[i] = input.readChar();
+        }
+    }
+
+    /**
+    * Determines if this is a 32 bit trie
+    * @return true if options specifies this is a 32 bit trie
+    */
+    protected final boolean isIntTrie()
+    {
+        return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) != 0;
+    }
+
+    /**
+    * Determines if this is a 16 bit trie
+    * @return true if this is a 16 bit trie
+    */
+    protected final boolean isCharTrie()
+    {
+        return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) == 0;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+    * Latin 1 option mask
+    */
+    protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200;
+    /**
+    * Constant number to authenticate the byte block
+    */
+    protected static final int HEADER_SIGNATURE_ = 0x54726965;
+    /**
+    * Header option formatting
+    */
+    private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF;
+    protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4;
+    protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100;
+
+    /**
+    * Flag indicator for Latin quick access data block
+    */
+    private boolean m_isLatin1Linear_;
+
+    /**
+    * <p>Trie options field.</p>
+    * <p>options bit field:<br>
+    * 9  1 = Latin-1 data is stored linearly at data + DATA_BLOCK_LENGTH<br>
+    * 8  0 = 16-bit data, 1=32-bit data<br>
+    * 7..4  INDEX_STAGE_1_SHIFT   // 0..INDEX_STAGE_2_SHIFT<br>
+    * 3..0  INDEX_STAGE_2_SHIFT   // 1..9<br>
+    */
+    private int m_options_;
+
+    // private methods ---------------------------------------------------
+
+    /**
+    * Authenticates raw data header.
+    * Checking the header information, signature and options.
+    * @param signature This contains the options and type of a Trie
+    * @return true if the header is authenticated valid
+    */
+    private final boolean checkHeader(int signature)
+    {
+        // check the signature
+        // Trie in big-endian US-ASCII (0x54726965).
+        // Magic number to authenticate the data.
+        if (signature != HEADER_SIGNATURE_) {
+            return false;
+        }
+
+        if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) !=
+                                                    INDEX_STAGE_1_SHIFT_ ||
+            ((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) &
+                                                HEADER_OPTIONS_SHIFT_MASK_)
+                                                 != INDEX_STAGE_2_SHIFT_) {
+            return false;
+        }
+        return true;
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/TrieIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/TrieIterator.java
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <p>Class enabling iteration of the values in a Trie.</p>
+ * <p>Result of each iteration contains the interval of codepoints that have
+ * the same value type and the value type itself.</p>
+ * <p>The comparison of each codepoint value is done via extract(), which the
+ * default implementation is to return the value as it is.</p>
+ * <p>Method extract() can be overwritten to perform manipulations on
+ * codepoint values in order to perform specialized comparison.</p>
+ * <p>TrieIterator is designed to be a generic iterator for the CharTrie
+ * and the IntTrie, hence to accommodate both types of data, the return
+ * result will be in terms of int (32 bit) values.</p>
+ * <p>See com.ibm.icu.text.UCharacterTypeIterator for examples of use.</p>
+ * <p>Notes for porting utrie_enum from icu4c to icu4j:<br>
+ * Internally, icu4c's utrie_enum performs all iterations in its body. In Java
+ * sense, the caller will have to pass a object with a callback function
+ * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
+ * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
+ * codepoints with the same value as determined by
+ * UTrieEnumValue(const void *context, uint32_t value). for each range,
+ * utrie_enum calls the callback function to perform a task. In this way,
+ * icu4c performs the iteration within utrie_enum.
+ * To follow the JDK model, icu4j is slightly different from icu4c.
+ * Instead of requesting the caller to implement an object for a callback.
+ * The caller will have to implement a subclass of TrieIterator, fleshing out
+ * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
+ * the caller will have to code his own iteration and flesh out the task
+ * (equivalent to UTrieEnumRange) to be performed in the iteration loop.
+ * </p>
+ * <p>There are basically 3 usage scenarios for porting:</p>
+ * <p>1) UTrieEnumValue is the only implemented callback then just implement a
+ * subclass of TrieIterator and override the extract(int) method. The
+ * extract(int) method is analogus to UTrieEnumValue callback.
+ * </p>
+ * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
+ * a subclass of TrieIterator, override the extract method and iterate, e.g
+ * </p>
+ * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
+ *               set);<br>
+ * In Java :<br>
+ * <pre>
+ * class TrieIteratorImpl extends TrieIterator{
+ *     public TrieIteratorImpl(Trie data){
+ *         super(data);
+ *     }
+ *     public int extract(int value){
+ *         // port the implementation of _enumPropertyStartsValue here
+ *     }
+ * }
+ * ....
+ * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
+ * while(fcdIter.next(result)) {
+ *     // port the implementation of _enumPropertyStartsRange
+ * }
+ * </pre>
+ * </p>
+ * <p>3) UTrieEnumRange is the only implemented callback then just implement
+ * the while loop, when utrie_enum is called
+ * <pre>
+ * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
+ * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
+ * while(fcdIter.next(result)){
+ *     set.add(result.start);
+ * }
+ * </pre>
+ * </p>
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @see com.ibm.icu.lang.UCharacterTypeIterator
+ * @since release 2.1, Jan 17 2002
+ */
+public class TrieIterator implements RangeValueIterator
+{
+
+    // public constructor ---------------------------------------------
+
+    /**
+    * TrieEnumeration constructor
+    * @param trie to be used
+    * @exception IllegalArgumentException throw when argument is null.
+    */
+    public TrieIterator(Trie trie)
+    {
+        if (trie == null) {
+            throw new IllegalArgumentException(
+                                          "Argument trie cannot be null");
+        }
+        m_trie_             = trie;
+        // synwee: check that extract belongs to the child class
+        m_initialValue_     = extract(m_trie_.getInitialValue());
+        reset();
+    }
+
+    // public methods -------------------------------------------------
+
+    /**
+    * <p>Returns true if we are not at the end of the iteration, false
+    * otherwise.</p>
+    * <p>The next set of codepoints with the same value type will be
+    * calculated during this call and returned in the arguement element.</p>
+    * @param element return result
+    * @return true if we are not at the end of the iteration, false otherwise.
+    * @exception NoSuchElementException - if no more elements exist.
+    * @see com.ibm.icu.util.RangeValueIterator.Element
+    */
+    public final boolean next(Element element)
+    {
+        if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
+            return false;
+        }
+        if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
+            calculateNextBMPElement(element)) {
+            return true;
+        }
+        calculateNextSupplementaryElement(element);
+        return true;
+    }
+
+    /**
+    * Resets the iterator to the beginning of the iteration
+    */
+    public final void reset()
+    {
+        m_currentCodepoint_ = 0;
+        m_nextCodepoint_    = 0;
+        m_nextIndex_        = 0;
+        m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_;
+        if (m_nextBlock_ == 0) {
+            m_nextValue_ = m_initialValue_;
+        }
+        else {
+            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_));
+        }
+        m_nextBlockIndex_ = 0;
+        m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_;
+    }
+
+    // protected methods ----------------------------------------------
+
+    /**
+    * Called by next() to extracts a 32 bit value from a trie value
+    * used for comparison.
+    * This method is to be overwritten if special manipulation is to be done
+    * to retrieve a relevant comparison.
+    * The default function is to return the value as it is.
+    * @param value a value from the trie
+    * @return extracted value
+    */
+    protected int extract(int value)
+    {
+        return value;
+    }
+
+    // private methods ------------------------------------------------
+
+    /**
+    * Set the result values
+    * @param element return result object
+    * @param start codepoint of range
+    * @param limit (end + 1) codepoint of range
+    * @param value common value of range
+    */
+    private final void setResult(Element element, int start, int limit,
+                                 int value)
+    {
+        element.start = start;
+        element.limit = limit;
+        element.value = value;
+    }
+
+    /**
+    * Finding the next element.
+    * This method is called just before returning the result of
+    * next().
+    * We always store the next element before it is requested.
+    * In the case that we have to continue calculations into the
+    * supplementary planes, a false will be returned.
+    * @param element return result object
+    * @return true if the next range is found, false if we have to proceed to
+    *         the supplementary range.
+    */
+    private final boolean calculateNextBMPElement(Element element)
+    {
+        int currentBlock    = m_nextBlock_;
+        int currentValue    = m_nextValue_;
+        m_currentCodepoint_ = m_nextCodepoint_;
+        m_nextCodepoint_ ++;
+        m_nextBlockIndex_ ++;
+        if (!checkBlockDetail(currentValue)) {
+            setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                      currentValue);
+            return true;
+        }
+        // synwee check that next block index == 0 here
+        // enumerate BMP - the main loop enumerates data blocks
+        while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) {
+            m_nextIndex_ ++;
+            // because of the way the character is split to form the index
+            // the lead surrogate and trail surrogate can not be in the
+            // mid of a block
+            if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) {
+                // skip lead surrogate code units,
+                // go to lead surrogate codepoints
+                m_nextIndex_ = BMP_INDEX_LENGTH_;
+            }
+            else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) {
+                // go back to regular BMP code points
+                m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_;
+            }
+
+            m_nextBlockIndex_ = 0;
+            if (!checkBlock(currentBlock, currentValue)) {
+                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                          currentValue);
+                return true;
+            }
+        }
+        m_nextCodepoint_ --;   // step one back since this value has not been
+        m_nextBlockIndex_ --;  // retrieved yet.
+        return false;
+    }
+
+    /**
+    * Finds the next supplementary element.
+    * For each entry in the trie, the value to be delivered is passed through
+    * extract().
+    * We always store the next element before it is requested.
+    * Called after calculateNextBMP() completes its round of BMP characters.
+    * There is a slight difference in the usage of m_currentCodepoint_
+    * here as compared to calculateNextBMP(). Though both represents the
+    * lower bound of the next element, in calculateNextBMP() it gets set
+    * at the start of any loop, where-else, in calculateNextSupplementary()
+    * since m_currentCodepoint_ already contains the lower bound of the
+    * next element (passed down from calculateNextBMP()), we keep it till
+    * the end before resetting it to the new value.
+    * Note, if there are no more iterations, it will never get to here.
+    * Blocked out by next().
+    * @param element return result object
+    */
+    private final void calculateNextSupplementaryElement(Element element)
+    {
+        int currentValue = m_nextValue_;
+        int currentBlock = m_nextBlock_;
+        m_nextCodepoint_ ++;
+        m_nextBlockIndex_ ++;
+
+        if (UTF16.getTrailSurrogate(m_nextCodepoint_)
+                                        != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
+            // this piece is only called when we are in the middle of a lead
+            // surrogate block
+            if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
+                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                          currentValue);
+                m_currentCodepoint_ = m_nextCodepoint_;
+                return;
+            }
+            // we have cleared one block
+            m_nextIndex_ ++;
+            m_nextTrailIndexOffset_ ++;
+            if (!checkTrailBlock(currentBlock, currentValue)) {
+                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                          currentValue);
+                m_currentCodepoint_ = m_nextCodepoint_;
+                return;
+            }
+        }
+        int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
+        // enumerate supplementary code points
+        while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
+            // lead surrogate access
+            int leadBlock =
+                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
+                                                   Trie.INDEX_STAGE_2_SHIFT_;
+            if (leadBlock == m_trie_.m_dataOffset_) {
+                // no entries for a whole block of lead surrogates
+                if (currentValue != m_initialValue_) {
+                    m_nextValue_      = m_initialValue_;
+                    m_nextBlock_      = 0;
+                    m_nextBlockIndex_ = 0;
+                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                              currentValue);
+                    m_currentCodepoint_ = m_nextCodepoint_;
+                    return;
+                }
+
+                nextLead += DATA_BLOCK_LENGTH_;
+                // number of total affected supplementary codepoints in one
+                // block
+                // this is not a simple addition of
+                // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
+                // that we might have moved some of the codepoints
+                m_nextCodepoint_ = UCharacterProperty.getRawSupplementary(
+                                     (char)nextLead,
+                                     (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
+                continue;
+            }
+            if (m_trie_.m_dataManipulate_ == null) {
+                throw new NullPointerException(
+                            "The field DataManipulate in this Trie is null");
+            }
+            // enumerate trail surrogates for this lead surrogate
+            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
+                               m_trie_.getValue(leadBlock +
+                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
+            if (m_nextIndex_ <= 0) {
+                // no data for this lead surrogate
+                if (currentValue != m_initialValue_) {
+                    m_nextValue_      = m_initialValue_;
+                    m_nextBlock_      = 0;
+                    m_nextBlockIndex_ = 0;
+                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                              currentValue);
+                    m_currentCodepoint_ = m_nextCodepoint_;
+                    return;
+                }
+                m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_;
+            } else {
+                m_nextTrailIndexOffset_ = 0;
+                if (!checkTrailBlock(currentBlock, currentValue)) {
+                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+                              currentValue);
+                    m_currentCodepoint_ = m_nextCodepoint_;
+                    return;
+                }
+            }
+            nextLead ++;
+         }
+
+         // deliver last range
+         setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
+                   currentValue);
+    }
+
+    /**
+    * Internal block value calculations
+    * Performs calculations on a data block to find codepoints in m_nextBlock_
+    * after the index m_nextBlockIndex_ that has the same value.
+    * Note m_*_ variables at this point is the next codepoint whose value
+    * has not been calculated.
+    * But when returned with false, it will be the last codepoint whose
+    * value has been calculated.
+    * @param currentValue the value which other codepoints are tested against
+    * @return true if the whole block has the same value as currentValue or if
+    *              the whole block has been calculated, false otherwise.
+    */
+    private final boolean checkBlockDetail(int currentValue)
+    {
+        while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) {
+            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
+                                                    m_nextBlockIndex_));
+            if (m_nextValue_ != currentValue) {
+                return false;
+            }
+            ++ m_nextBlockIndex_;
+            ++ m_nextCodepoint_;
+        }
+        return true;
+    }
+
+    /**
+    * Internal block value calculations
+    * Performs calculations on a data block to find codepoints in m_nextBlock_
+    * that has the same value.
+    * Will call checkBlockDetail() if highlevel check fails.
+    * Note m_*_ variables at this point is the next codepoint whose value
+    * has not been calculated.
+    * @param currentBlock the initial block containing all currentValue
+    * @param currentValue the value which other codepoints are tested against
+    * @return true if the whole block has the same value as currentValue or if
+    *              the whole block has been calculated, false otherwise.
+    */
+    private final boolean checkBlock(int currentBlock, int currentValue)
+    {
+        m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
+                                                  Trie.INDEX_STAGE_2_SHIFT_;
+        if (m_nextBlock_ == currentBlock &&
+            (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) {
+            // the block is the same as the previous one, filled with
+            // currentValue
+            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
+        }
+        else if (m_nextBlock_ == 0) {
+            // this is the all-initial-value block
+            if (currentValue != m_initialValue_) {
+                m_nextValue_      = m_initialValue_;
+                m_nextBlockIndex_ = 0;
+                return false;
+            }
+            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
+        }
+        else {
+            if (!checkBlockDetail(currentValue)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+    * Internal block value calculations
+    * Performs calculations on multiple data blocks for a set of trail
+    * surrogates to find codepoints in m_nextBlock_ that has the same value.
+    * Will call checkBlock() for internal block checks.
+    * Note m_*_ variables at this point is the next codepoint whose value
+    * has not been calculated.
+    * @param currentBlock the initial block containing all currentValue
+    * @param currentValue the value which other codepoints are tested against
+    * @return true if the whole block has the same value as currentValue or if
+    *              the whole block has been calculated, false otherwise.
+    */
+    private final boolean checkTrailBlock(int currentBlock,
+                                          int currentValue)
+    {
+        // enumerate code points for this lead surrogate
+        while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
+        {
+            // if we ever reach here, we are at the start of a new block
+            m_nextBlockIndex_ = 0;
+            // copy of most of the body of the BMP loop
+            if (!checkBlock(currentBlock, currentValue)) {
+                return false;
+            }
+            m_nextTrailIndexOffset_ ++;
+            m_nextIndex_ ++;
+        }
+        return true;
+    }
+
+    /**
+    * Checks if we are beginning at the start of a initial block.
+    * If we are then the rest of the codepoints in this initial block
+    * has the same values.
+    * We increment m_nextCodepoint_ and relevant data members if so.
+    * This is used only in for the supplementary codepoints because
+    * the offset to the trail indexes could be 0.
+    * @return true if we are at the start of a initial block.
+    */
+    private final boolean checkNullNextTrailIndex()
+    {
+        if (m_nextIndex_ <= 0) {
+            m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
+            int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
+            int leadBlock =
+                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
+                                                   Trie.INDEX_STAGE_2_SHIFT_;
+            if (m_trie_.m_dataManipulate_ == null) {
+                throw new NullPointerException(
+                            "The field DataManipulate in this Trie is null");
+            }
+            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
+                               m_trie_.getValue(leadBlock +
+                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
+            m_nextIndex_ --;
+            m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
+            return true;
+        }
+        return false;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+    * Size of the stage 1 BMP indexes
+    */
+    private static final int BMP_INDEX_LENGTH_ =
+                                        0x10000 >> Trie.INDEX_STAGE_1_SHIFT_;
+    /**
+    * Lead surrogate minimum value
+    */
+    private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800;
+    /**
+    * Trail surrogate minimum value
+    */
+    private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
+    /**
+    * Number of trail surrogate
+    */
+    private static final int TRAIL_SURROGATE_COUNT_ = 0x400;
+    /**
+    * Number of stage 1 indexes for supplementary calculations that maps to
+    * each lead surrogate character.
+    * See second pass into getRawOffset for the trail surrogate character.
+    * 10 for significant number of bits for trail surrogates, 5 for what we
+    * discard during shifting.
+    */
+    private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ =
+                                    1 << (10 - Trie.INDEX_STAGE_1_SHIFT_);
+    /**
+    * Number of data values in a stage 2 (data array) block.
+    */
+    private static final int DATA_BLOCK_LENGTH_ =
+                                              1 << Trie.INDEX_STAGE_1_SHIFT_;
+    /**
+    * Trie instance
+    */
+    private Trie m_trie_;
+    /**
+    * Initial value for trie values
+    */
+    private int m_initialValue_;
+    /**
+    * Next element results and data.
+    */
+    private int m_currentCodepoint_;
+    private int m_nextCodepoint_;
+    private int m_nextValue_;
+    private int m_nextIndex_;
+    private int m_nextBlock_;
+    private int m_nextBlockIndex_;
+    private int m_nextTrailIndexOffset_;
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UBiDiProps.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UBiDiProps.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+*   file name:  UBiDiProps.java
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005jan16
+*   created by: Markus W. Scherer
+*
+*   Low-level Unicode bidi/shaping properties access.
+*   Java port of ubidi_props.h/.c.
+*/
+
+package sun.text.normalizer;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+public final class UBiDiProps {
+    // constructors etc. --------------------------------------------------- ***
+
+    // port of ubidi_openProps()
+    public UBiDiProps() throws IOException{
+        InputStream is=ICUData.getStream(DATA_FILE_NAME);
+        BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */);
+        readData(b);
+        b.close();
+        is.close();
+
+    }
+
+    private void readData(InputStream is) throws IOException {
+        DataInputStream inputStream=new DataInputStream(is);
+
+        // read the header
+        ICUBinary.readHeader(inputStream, FMT, new IsAcceptable());
+
+        // read indexes[]
+        int i, count;
+        count=inputStream.readInt();
+        if(count<IX_INDEX_TOP) {
+            throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
+        }
+        indexes=new int[count];
+
+        indexes[0]=count;
+        for(i=1; i<count; ++i) {
+            indexes[i]=inputStream.readInt();
+        }
+
+        // read the trie
+        trie=new CharTrie(inputStream, null);
+
+        // read mirrors[]
+        count=indexes[IX_MIRROR_LENGTH];
+        if(count>0) {
+            mirrors=new int[count];
+            for(i=0; i<count; ++i) {
+                mirrors[i]=inputStream.readInt();
+            }
+        }
+
+        // read jgArray[]
+        count=indexes[IX_JG_LIMIT]-indexes[IX_JG_START];
+        jgArray=new byte[count];
+        for(i=0; i<count; ++i) {
+            jgArray[i]=inputStream.readByte();
+        }
+    }
+
+    // implement ICUBinary.Authenticate
+    private final class IsAcceptable implements ICUBinary.Authenticate {
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0]==1 &&
+                   version[2]==Trie.INDEX_STAGE_1_SHIFT_ && version[3]==Trie.INDEX_STAGE_2_SHIFT_;
+        }
+    }
+
+    // UBiDiProps singleton
+    private static UBiDiProps gBdp=null;
+
+    // port of ubidi_getSingleton()
+    public static final synchronized UBiDiProps getSingleton() throws IOException {
+        if(gBdp==null) {
+            gBdp=new UBiDiProps();
+        }
+        return gBdp;
+    }
+
+    // UBiDiProps dummy singleton
+    private static UBiDiProps gBdpDummy=null;
+
+    private UBiDiProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature
+        indexes=new int[IX_TOP];
+        indexes[0]=IX_TOP;
+        trie=new CharTrie(0, 0, null); // dummy trie, always returns 0
+    }
+
+    /**
+     * Get a singleton dummy object, one that works with no real data.
+     * This can be used when the real data is not available.
+     * Using the dummy can reduce checks for available data after an initial failure.
+     * Port of ucase_getDummy().
+     */
+    public static final synchronized UBiDiProps getDummy() {
+        if(gBdpDummy==null) {
+            gBdpDummy=new UBiDiProps(true);
+        }
+        return gBdpDummy;
+    }
+
+    public final int getClass(int c) {
+        return getClassFromProps(trie.getCodePointValue(c));
+    }
+
+    // data members -------------------------------------------------------- ***
+    private int indexes[];
+    private int mirrors[];
+    private byte jgArray[];
+
+    private CharTrie trie;
+
+    // data format constants ----------------------------------------------- ***
+    private static final String DATA_FILE_NAME = "/sun/text/resources/ubidi.icu";
+
+    /* format "BiDi" */
+    private static final byte FMT[]={ 0x42, 0x69, 0x44, 0x69 };
+
+    /* indexes into indexes[] */
+    private static final int IX_INDEX_TOP=0;
+    private static final int IX_MIRROR_LENGTH=3;
+
+    private static final int IX_JG_START=4;
+    private static final int IX_JG_LIMIT=5;
+
+    private static final int IX_TOP=16;
+
+    private static final int CLASS_MASK=    0x0000001f;
+
+    private static final int getClassFromProps(int props) {
+        return props&CLASS_MASK;
+    }
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UCharacter.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UCharacter.java
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.IOException;
+import java.util.MissingResourceException;
+
+/**
+ * <p>
+ * The UCharacter class provides extensions to the
+ * <a href="https://docs.oracle.com/javase/1.5.0/docs/api/java/lang/Character.html">
+ * java.lang.Character</a> class. These extensions provide support for
+ * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
+ * class, provide support for supplementary characters (those with code
+ * points above U+FFFF).
+ * Each ICU release supports the latest version of Unicode available at that time.
+ * </p>
+ * <p>
+ * Code points are represented in these API using ints. While it would be
+ * more convenient in Java to have a separate primitive datatype for them,
+ * ints suffice in the meantime.
+ * </p>
+ * <p>
+ * To use this class please add the jar file name icu4j.jar to the
+ * class path, since it contains data files which supply the information used
+ * by this file.<br>
+ * E.g. In Windows <br>
+ * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
+ * Otherwise, another method would be to copy the files uprops.dat and
+ * unames.icu from the icu4j source subdirectory
+ * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
+ * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
+ * </p>
+ * <p>
+ * Aside from the additions for UTF-16 support, and the updated Unicode
+ * properties, the main differences between UCharacter and Character are:
+ * <ul>
+ * <li> UCharacter is not designed to be a char wrapper and does not have
+ *      APIs to which involves management of that single char.<br>
+ *      These include:
+ *      <ul>
+ *        <li> char charValue(),
+ *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
+ *      </ul>
+ * <li> UCharacter does not include Character APIs that are deprecated, nor
+ *      does it include the Java-specific character information, such as
+ *      boolean isJavaIdentifierPart(char ch).
+ * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
+ *      values '10' - '35'. UCharacter also does this in digit and
+ *      getNumericValue, to adhere to the java semantics of these
+ *      methods.  New methods unicodeDigit, and
+ *      getUnicodeNumericValue do not treat the above code points
+ *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
+ * </ul>
+ * <p>
+ * Further detail differences can be determined from the program
+ *        <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
+ *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
+ * </p>
+ * <p>
+ * In addition to Java compatibility functions, which calculate derived properties,
+ * this API provides low-level access to the Unicode Character Database.
+ * </p>
+ * <p>
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ * </p>
+ * <p>
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://www.icu-project.org/userguide/properties.html).
+ * </p>
+ * <p>
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ * </p>
+ * <p>
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ * </p>
+ * <p>
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ * </p>
+ * <p>
+ * API access for C/POSIX character classes is as follows:
+ * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
+ * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
+ * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
+ * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|(1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|(1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
+ * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
+ * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
+ * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
+ * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
+ * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
+ * - cntrl:     getType(c)==CONTROL
+ * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
+ * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)
+ * </p>
+ * <p>
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * </p>
+ * <p>
+ * Note: There are several ICU (and Java) whitespace functions.
+ * Comparison:
+ * - isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ *       most of general categories "Z" (separators) + most whitespace ISO controls
+ *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - isSpaceChar: just Z (including no-break spaces)
+ * </p>
+ * <p>
+ * This class is not subclassable
+ * </p>
+ * @author Syn Wee Quek
+ * @stable ICU 2.1
+ * @see com.ibm.icu.lang.UCharacterEnums
+ */
+
+public final class UCharacter
+{
+
+    /**
+     * Numeric Type constants.
+     * @see UProperty#NUMERIC_TYPE
+     * @stable ICU 2.4
+     */
+    public static interface NumericType
+    {
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int DECIMAL = 1;
+    }
+
+    // public data members -----------------------------------------------
+
+    /**
+     * The lowest Unicode code point value.
+     * @stable ICU 2.1
+     */
+    public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
+
+    /**
+     * The highest Unicode code point value (scalar value) according to the
+     * Unicode Standard.
+     * This is a 21-bit value (21 bits, rounded up).<br>
+     * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE
+     * @stable ICU 2.1
+     */
+    public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
+
+    /**
+     * The minimum value for Supplementary code points
+     * @stable ICU 2.1
+     */
+    public static final int SUPPLEMENTARY_MIN_VALUE =
+        UTF16.SUPPLEMENTARY_MIN_VALUE;
+
+    // public methods ----------------------------------------------------
+
+    /**
+     * Retrieves the numeric value of a decimal digit code point.
+     * <br>This method observes the semantics of
+     * <code>java.lang.Character.digit()</code>.  Note that this
+     * will return positive values for code points for which isDigit
+     * returns false, just like java.lang.Character.
+     * <br><em>Semantic Change:</em> In release 1.3.1 and
+     * prior, this did not treat the European letters as having a
+     * digit value, and also treated numeric letters and other numbers as
+     * digits.
+     * This has been changed to conform to the java semantics.
+     * <br>A code point is a valid digit if and only if:
+     * <ul>
+     *   <li>ch is a decimal digit or one of the european letters, and
+     *   <li>the value of ch is less than the specified radix.
+     * </ul>
+     * @param ch the code point to query
+     * @param radix the radix
+     * @return the numeric value represented by the code point in the
+     * specified radix, or -1 if the code point is not a decimal digit
+     * or if its value is too large for the radix
+     * @stable ICU 2.1
+     */
+    public static int digit(int ch, int radix)
+    {
+        // when ch is out of bounds getProperty == 0
+        int props = getProperty(ch);
+        int value;
+        if (getNumericType(props) == NumericType.DECIMAL) {
+            value = UCharacterProperty.getUnsignedValue(props);
+        } else {
+            value = getEuropeanDigit(ch);
+        }
+        return (0 <= value && value < radix) ? value : -1;
+    }
+
+    /**
+     * Returns the Bidirection property of a code point.
+     * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
+     * property.<br>
+     * Result returned belongs to the interface
+     * <a href=UCharacterDirection.html>UCharacterDirection</a>
+     * @param ch the code point to be determined its direction
+     * @return direction constant from UCharacterDirection.
+     * @stable ICU 2.1
+     */
+    public static int getDirection(int ch)
+    {
+        return gBdp.getClass(ch);
+    }
+
+    /**
+     * Returns a code point corresponding to the two UTF16 characters.
+     * @param lead the lead char
+     * @param trail the trail char
+     * @return code point if surrogate characters are valid.
+     * @exception IllegalArgumentException thrown when argument characters do
+     *            not form a valid codepoint
+     * @stable ICU 2.1
+     */
+    public static int getCodePoint(char lead, char trail)
+    {
+        if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
+            return UCharacterProperty.getRawSupplementary(lead, trail);
+        }
+        throw new IllegalArgumentException("Illegal surrogate characters");
+    }
+
+    /**
+     * <p>Get the "age" of the code point.</p>
+     * <p>The "age" is the Unicode version when the code point was first
+     * designated (as a non-character or for Private Use) or assigned a
+     * character.
+     * <p>This can be useful to avoid emitting code points to receiving
+     * processes that do not accept newer characters.</p>
+     * <p>The data is from the UCD file DerivedAge.txt.</p>
+     * @param ch The code point.
+     * @return the Unicode version number
+     * @stable ICU 2.6
+     */
+    public static VersionInfo getAge(int ch)
+    {
+        if (ch < MIN_VALUE || ch > MAX_VALUE) {
+        throw new IllegalArgumentException("Codepoint out of bounds");
+        }
+        return PROPERTY_.getAge(ch);
+    }
+
+    // private variables -------------------------------------------------
+
+    /**
+     * Database storing the sets of character property
+     */
+    private static final UCharacterProperty PROPERTY_;
+    /**
+     * For optimization
+     */
+    private static final char[] PROPERTY_TRIE_INDEX_;
+    private static final char[] PROPERTY_TRIE_DATA_;
+    private static final int PROPERTY_INITIAL_VALUE_;
+
+    private static final UBiDiProps gBdp;
+
+    // block to initialise character property database
+    static
+    {
+        try
+        {
+            PROPERTY_ = UCharacterProperty.getInstance();
+            PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_;
+            PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_;
+            PROPERTY_INITIAL_VALUE_ = PROPERTY_.m_trieInitialValue_;
+        }
+        catch (Exception e)
+        {
+            throw new MissingResourceException(e.getMessage(),"","");
+        }
+
+        UBiDiProps bdp;
+        try {
+            bdp=UBiDiProps.getSingleton();
+        } catch(IOException e) {
+            bdp=UBiDiProps.getDummy();
+        }
+        gBdp=bdp;
+    }
+
+    /**
+     * Shift to get numeric type
+     */
+    private static final int NUMERIC_TYPE_SHIFT_ = 5;
+    /**
+     * Mask to get numeric type
+     */
+    private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_;
+
+    // private methods ---------------------------------------------------
+
+    /**
+     * Getting the digit values of characters like 'A' - 'Z', normal,
+     * half-width and full-width. This method assumes that the other digit
+     * characters are checked by the calling method.
+     * @param ch character to test
+     * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
+     *         its corresponding digit will be returned.
+     */
+    private static int getEuropeanDigit(int ch) {
+        if ((ch > 0x7a && ch < 0xff21)
+            || ch < 0x41 || (ch > 0x5a && ch < 0x61)
+            || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
+            return -1;
+        }
+        if (ch <= 0x7a) {
+            // ch >= 0x41 or ch < 0x61
+            return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
+        }
+        // ch >= 0xff21
+        if (ch <= 0xff3a) {
+            return ch + 10 - 0xff21;
+        }
+        // ch >= 0xff41 && ch <= 0xff5a
+        return ch + 10 - 0xff41;
+    }
+
+    /**
+     * Gets the numeric type of the property argument
+     * @param props 32 bit property
+     * @return the numeric type
+     */
+    private static int getNumericType(int props)
+    {
+        return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_;
+    }
+
+    /**
+     * Gets the property value at the index.
+     * This is optimized.
+     * Note this is alittle different from CharTrie the index m_trieData_
+     * is never negative.
+     * This is a duplicate of UCharacterProperty.getProperty. For optimization
+     * purposes, this method calls the trie data directly instead of through
+     * UCharacterProperty.getProperty.
+     * @param ch code point whose property value is to be retrieved
+     * @return property value of code point
+     * @stable ICU 2.6
+     */
+    private static final int getProperty(int ch)
+    {
+        if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
+            || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
+                && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
+            // BMP codepoint 0000..D7FF or DC00..FFFF
+            try { // using try for ch < 0 is faster than using an if statement
+                return PROPERTY_TRIE_DATA_[
+                              (PROPERTY_TRIE_INDEX_[ch >> 5] << 2)
+                              + (ch & 0x1f)];
+            } catch (ArrayIndexOutOfBoundsException e) {
+                return PROPERTY_INITIAL_VALUE_;
+            }
+        }
+        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+            // lead surrogate D800..DBFF
+            return PROPERTY_TRIE_DATA_[
+                              (PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2)
+                              + (ch & 0x1f)];
+        }
+        // for optimization
+        if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
+            // supplementary code point 10000..10FFFF
+            // look at the construction of supplementary characters
+            // trail forms the ends of it.
+            return PROPERTY_.m_trie_.getSurrogateValue(
+                                      UTF16.getLeadSurrogate(ch),
+                                      (char)(ch & 0x3ff));
+        }
+        // return m_dataOffset_ if there is an error, in this case we return
+        // the default value: m_initialValue_
+        // we cannot assume that m_initialValue_ is at offset 0
+        // this is for optimization.
+        return PROPERTY_INITIAL_VALUE_;
+    }
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UCharacterIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UCharacterIterator.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.text.CharacterIterator;
+
+/**
+ * Abstract class that defines an API for iteration on text objects.This is an
+ * interface for forward and backward iteration and random access into a text
+ * object. Forward iteration is done with post-increment and backward iteration
+ * is done with pre-decrement semantics, while the
+ * <code>java.text.CharacterIterator</code> interface methods provided forward
+ * iteration with "pre-increment" and backward iteration with pre-decrement
+ * semantics. This API is more efficient for forward iteration over code points.
+ * The other major difference is that this API can do both code unit and code point
+ * iteration, <code>java.text.CharacterIterator</code> can only iterate over
+ * code units and is limited to BMP (0 - 0xFFFF)
+ * @author Ram
+ * @stable ICU 2.4
+ */
+public abstract class UCharacterIterator
+                      implements Cloneable {
+
+    /**
+     * Protected default constructor for the subclasses
+     * @stable ICU 2.4
+     */
+    protected UCharacterIterator(){
+    }
+
+    /**
+     * Indicator that we have reached the ends of the UTF16 text.
+     * Moved from UForwardCharacterIterator.java
+     * @stable ICU 2.4
+     */
+    public static final int DONE = -1;
+
+    // static final methods ----------------------------------------------------
+
+    /**
+     * Returns a <code>UCharacterIterator</code> object given a
+     * source string.
+     * @param source a string
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
+     * @stable ICU 2.4
+     */
+    public static final UCharacterIterator getInstance(String source){
+        return new ReplaceableUCharacterIterator(source);
+    }
+
+    //// for StringPrep
+    /**
+     * Returns a <code>UCharacterIterator</code> object given a
+     * source StringBuffer.
+     * @param source an string buffer of UTF-16 code units
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
+     * @stable ICU 2.4
+     */
+    public static final UCharacterIterator getInstance(StringBuffer source){
+        return new ReplaceableUCharacterIterator(source);
+    }
+
+    /**
+     * Returns a <code>UCharacterIterator</code> object given a
+     * CharacterIterator.
+     * @param source a valid CharacterIterator object.
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
+     * @stable ICU 2.4
+     */
+    public static final UCharacterIterator getInstance(CharacterIterator source){
+        return new CharacterIteratorWrapper(source);
+    }
+
+    // public methods ----------------------------------------------------------
+
+    /**
+     * Returns the code unit at the current index.  If index is out
+     * of range, returns DONE.  Index is not changed.
+     * @return current code unit
+     * @stable ICU 2.4
+     */
+    public abstract int current();
+
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     * @stable ICU 2.4
+     */
+    public abstract int getLength();
+
+
+    /**
+     * Gets the current index in text.
+     * @return current index in text.
+     * @stable ICU 2.4
+     */
+    public abstract int getIndex();
+
+
+    /**
+     * Returns the UTF16 code unit at index, and increments to the next
+     * code unit (post-increment semantics).  If index is out of
+     * range, DONE is returned, and the iterator is reset to the limit
+     * of the text.
+     * @return the next UTF16 code unit, or DONE if the index is at the limit
+     *         of the text.
+     * @stable ICU 2.4
+     */
+    public abstract int next();
+
+    /**
+     * Returns the code point at index, and increments to the next code
+     * point (post-increment semantics).  If index does not point to a
+     * valid surrogate pair, the behavior is the same as
+     * <code>next()</code>.  Otherwise the iterator is incremented past
+     * the surrogate pair, and the code point represented by the pair
+     * is returned.
+     * @return the next codepoint in text, or DONE if the index is at
+     *         the limit of the text.
+     * @stable ICU 2.4
+     */
+    public int nextCodePoint(){
+        int ch1 = next();
+        if(UTF16.isLeadSurrogate((char)ch1)){
+            int ch2 = next();
+            if(UTF16.isTrailSurrogate((char)ch2)){
+                return UCharacterProperty.getRawSupplementary((char)ch1,
+                                                              (char)ch2);
+            }else if (ch2 != DONE) {
+                // unmatched surrogate so back out
+                previous();
+            }
+        }
+        return ch1;
+    }
+
+    /**
+     * Decrement to the position of the previous code unit in the
+     * text, and return it (pre-decrement semantics).  If the
+     * resulting index is less than 0, the index is reset to 0 and
+     * DONE is returned.
+     * @return the previous code unit in the text, or DONE if the new
+     *         index is before the start of the text.
+     * @stable ICU 2.4
+     */
+    public abstract int previous();
+
+    /**
+     * Sets the index to the specified index in the text.
+     * @param index the index within the text.
+     * @exception IndexOutOfBoundsException is thrown if an invalid index is
+     *            supplied
+     * @stable ICU 2.4
+     */
+    public abstract void setIndex(int index);
+
+    //// for StringPrep
+    /**
+     * Fills the buffer with the underlying text storage of the iterator
+     * If the buffer capacity is not enough a exception is thrown. The capacity
+     * of the fill in buffer should at least be equal to length of text in the
+     * iterator obtained by calling <code>getLength()</code>.
+     * <b>Usage:</b>
+     *
+     * <code>
+     * <pre>
+     *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
+     *         char[] buf = new char[iter.getLength()];
+     *         iter.getText(buf);
+     *
+     *         OR
+     *         char[] buf= new char[1];
+     *         int len = 0;
+     *         for(;;){
+     *             try{
+     *                 len = iter.getText(buf);
+     *                 break;
+     *             }catch(IndexOutOfBoundsException e){
+     *                 buf = new char[iter.getLength()];
+     *             }
+     *         }
+     * </pre>
+     * </code>
+     *
+     * @param fillIn an array of chars to fill with the underlying UTF-16 code
+     *         units.
+     * @param offset the position within the array to start putting the data.
+     * @return the number of code units added to fillIn, as a convenience
+     * @exception IndexOutOfBounds exception if there is not enough
+     *            room after offset in the array, or if offset < 0.
+     * @stable ICU 2.4
+     */
+    public abstract int getText(char[] fillIn, int offset);
+
+    //// for StringPrep
+    /**
+     * Convenience override for <code>getText(char[], int)</code> that provides
+     * an offset of 0.
+     * @param fillIn an array of chars to fill with the underlying UTF-16 code
+     *         units.
+     * @return the number of code units added to fillIn, as a convenience
+     * @exception IndexOutOfBounds exception if there is not enough
+     *            room in the array.
+     * @stable ICU 2.4
+     */
+    public final int getText(char[] fillIn) {
+        return getText(fillIn, 0);
+    }
+
+    //// for StringPrep
+    /**
+     * Convenience method for returning the underlying text storage as as string
+     * @return the underlying text storage in the iterator as a string
+     * @stable ICU 2.4
+     */
+    public String getText() {
+        char[] text = new char[getLength()];
+        getText(text);
+        return new String(text);
+    }
+
+    /**
+     * Moves the current position by the number of code units
+     * specified, either forward or backward depending on the sign
+     * of delta (positive or negative respectively).  If the resulting
+     * index would be less than zero, the index is set to zero, and if
+     * the resulting index would be greater than limit, the index is
+     * set to limit.
+     *
+     * @param delta the number of code units to move the current
+     *              index.
+     * @return the new index.
+     * @exception IndexOutOfBoundsException is thrown if an invalid index is
+     *            supplied
+     * @stable ICU 2.4
+     *
+     */
+    public int moveIndex(int delta) {
+        int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
+        setIndex(x);
+        return x;
+    }
+
+    /**
+     * Creates a copy of this iterator, independent from other iterators.
+     * If it is not possible to clone the iterator, returns null.
+     * @return copy of this iterator
+     * @stable ICU 2.4
+     */
+    public Object clone() throws CloneNotSupportedException{
+        return super.clone();
+    }
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UCharacterProperty.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UCharacterProperty.java
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.BufferedInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.MissingResourceException;
+
+/**
+* <p>Internal class used for Unicode character property database.</p>
+* <p>This classes store binary data read from uprops.icu.
+* It does not have the capability to parse the data into more high-level
+* information. It only returns bytes of information when required.</p>
+* <p>Due to the form most commonly used for retrieval, array of char is used
+* to store the binary data.</p>
+* <p>UCharacterPropertyDB also contains information on accessing indexes to
+* significant points in the binary data.</p>
+* <p>Responsibility for molding the binary data into more meaning form lies on
+* <a href=UCharacter.html>UCharacter</a>.</p>
+* @author Syn Wee Quek
+* @since release 2.1, february 1st 2002
+*/
+
+public final class UCharacterProperty
+{
+    // public data members -----------------------------------------------
+
+    /**
+    * Trie data
+    */
+    public CharTrie m_trie_;
+    /**
+     * Optimization
+     * CharTrie index array
+     */
+    public char[] m_trieIndex_;
+    /**
+     * Optimization
+     * CharTrie data array
+     */
+    public char[] m_trieData_;
+    /**
+     * Optimization
+     * CharTrie data offset
+     */
+    public int m_trieInitialValue_;
+    /**
+    * Unicode version
+    */
+    public VersionInfo m_unicodeVersion_;
+
+    // uprops.h enum UPropertySource --------------------------------------- ***
+
+    /** From uchar.c/uprops.icu properties vectors trie */
+    public static final int SRC_PROPSVEC=2;
+    /** One more than the highest UPropertySource (SRC_) constant. */
+    public static final int SRC_COUNT=9;
+
+    // public methods ----------------------------------------------------
+
+    /**
+     * Java friends implementation
+     */
+    public void setIndexData(CharTrie.FriendAgent friendagent)
+    {
+        m_trieIndex_ = friendagent.getPrivateIndex();
+        m_trieData_ = friendagent.getPrivateData();
+        m_trieInitialValue_ = friendagent.getPrivateInitialValue();
+    }
+
+    /**
+    * Gets the property value at the index.
+    * This is optimized.
+    * Note this is alittle different from CharTrie the index m_trieData_
+    * is never negative.
+    * @param ch code point whose property value is to be retrieved
+    * @return property value of code point
+    */
+    public final int getProperty(int ch)
+    {
+        if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
+            || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
+                && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
+            // BMP codepoint 0000..D7FF or DC00..FFFF
+            // optimized
+            try { // using try for ch < 0 is faster than using an if statement
+                return m_trieData_[
+                    (m_trieIndex_[ch >> Trie.INDEX_STAGE_1_SHIFT_]
+                          << Trie.INDEX_STAGE_2_SHIFT_)
+                    + (ch & Trie.INDEX_STAGE_3_MASK_)];
+            } catch (ArrayIndexOutOfBoundsException e) {
+                return m_trieInitialValue_;
+            }
+        }
+        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+            // lead surrogate D800..DBFF
+            return m_trieData_[
+                    (m_trieIndex_[Trie.LEAD_INDEX_OFFSET_
+                                  + (ch >> Trie.INDEX_STAGE_1_SHIFT_)]
+                          << Trie.INDEX_STAGE_2_SHIFT_)
+                    + (ch & Trie.INDEX_STAGE_3_MASK_)];
+        }
+        if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
+            // supplementary code point 10000..10FFFF
+            // look at the construction of supplementary characters
+            // trail forms the ends of it.
+            return m_trie_.getSurrogateValue(
+                                          UTF16.getLeadSurrogate(ch),
+                                          (char)(ch & Trie.SURROGATE_MASK_));
+        }
+        // ch is out of bounds
+        // return m_dataOffset_ if there is an error, in this case we return
+        // the default value: m_initialValue_
+        // we cannot assume that m_initialValue_ is at offset 0
+        // this is for optimization.
+        return m_trieInitialValue_;
+
+        // this all is an inlined form of return m_trie_.getCodePointValue(ch);
+    }
+
+    /**
+    * Getting the unsigned numeric value of a character embedded in the property
+    * argument
+    * @param prop the character
+    * @return unsigned numberic value
+    */
+    public static int getUnsignedValue(int prop)
+    {
+        return (prop >> VALUE_SHIFT_) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_;
+    }
+
+    /**
+     * Gets the unicode additional properties.
+     * C version getUnicodeProperties.
+     * @param codepoint codepoint whose additional properties is to be
+     *                  retrieved
+     * @param column
+     * @return unicode properties
+     */
+       public int getAdditional(int codepoint, int column) {
+        if (column == -1) {
+            return getProperty(codepoint);
+        }
+           if (column < 0 || column >= m_additionalColumnsCount_) {
+           return 0;
+       }
+       return m_additionalVectors_[
+                     m_additionalTrie_.getCodePointValue(codepoint) + column];
+       }
+
+       /**
+     * <p>Get the "age" of the code point.</p>
+     * <p>The "age" is the Unicode version when the code point was first
+     * designated (as a non-character or for Private Use) or assigned a
+     * character.</p>
+     * <p>This can be useful to avoid emitting code points to receiving
+     * processes that do not accept newer characters.</p>
+     * <p>The data is from the UCD file DerivedAge.txt.</p>
+     * <p>This API does not check the validity of the codepoint.</p>
+     * @param codepoint The code point.
+     * @return the Unicode version number
+     */
+    public VersionInfo getAge(int codepoint)
+    {
+        int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
+        return VersionInfo.getInstance(
+                           (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
+                           version & LAST_NIBBLE_MASK_, 0, 0);
+    }
+
+    /**
+    * Forms a supplementary code point from the argument character<br>
+    * Note this is for internal use hence no checks for the validity of the
+    * surrogate characters are done
+    * @param lead lead surrogate character
+    * @param trail trailing surrogate character
+    * @return code point of the supplementary character
+    */
+    public static int getRawSupplementary(char lead, char trail)
+    {
+        return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
+    }
+
+    /**
+    * Loads the property data and initialize the UCharacterProperty instance.
+    * @throws MissingResourceException when data is missing or data has been corrupted
+    */
+    public static UCharacterProperty getInstance()
+    {
+        if(INSTANCE_ == null) {
+            try {
+                INSTANCE_ = new UCharacterProperty();
+            }
+            catch (Exception e) {
+                throw new MissingResourceException(e.getMessage(),"","");
+            }
+        }
+        return INSTANCE_;
+    }
+
+    /**
+     * Checks if the argument c is to be treated as a white space in ICU
+     * rules. Usually ICU rule white spaces are ignored unless quoted.
+     * Equivalent to test for Pattern_White_Space Unicode property.
+     * Stable set of characters, won't change.
+     * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
+     * @param c codepoint to check
+     * @return true if c is a ICU white space
+     */
+    public static boolean isRuleWhiteSpace(int c)
+    {
+        /* "white space" in the sense of ICU rule parsers
+           This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
+           See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
+           U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
+           Equivalent to test for Pattern_White_Space Unicode property.
+        */
+        return (c >= 0x0009 && c <= 0x2029 &&
+                (c <= 0x000D || c == 0x0020 || c == 0x0085 ||
+                 c == 0x200E || c == 0x200F || c >= 0x2028));
+    }
+
+    // protected variables -----------------------------------------------
+
+    /**
+     * Extra property trie
+     */
+    CharTrie m_additionalTrie_;
+    /**
+     * Extra property vectors, 1st column for age and second for binary
+     * properties.
+     */
+    int m_additionalVectors_[];
+    /**
+     * Number of additional columns
+     */
+    int m_additionalColumnsCount_;
+    /**
+     * Maximum values for block, bits used as in vector word
+     * 0
+     */
+    int m_maxBlockScriptValue_;
+    /**
+     * Maximum values for script, bits used as in vector word
+     * 0
+     */
+     int m_maxJTGValue_;
+
+    // private variables -------------------------------------------------
+
+      /**
+     * UnicodeData.txt property object
+     */
+    private static UCharacterProperty INSTANCE_ = null;
+
+    /**
+    * Default name of the datafile
+    */
+    private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";
+
+    /**
+    * Default buffer size of datafile
+    */
+    private static final int DATA_BUFFER_SIZE_ = 25000;
+
+    /**
+    * Numeric value shift
+    */
+    private static final int VALUE_SHIFT_ = 8;
+
+    /**
+    * Mask to be applied after shifting to obtain an unsigned numeric value
+    */
+    private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0xFF;
+
+    /**
+    * Shift value for lead surrogate to form a supplementary character.
+    */
+    private static final int LEAD_SURROGATE_SHIFT_ = 10;
+    /**
+    * Offset to add to combined surrogate pair to avoid msking.
+    */
+    private static final int SURROGATE_OFFSET_ =
+                           UTF16.SUPPLEMENTARY_MIN_VALUE -
+                           (UTF16.SURROGATE_MIN_VALUE <<
+                           LEAD_SURROGATE_SHIFT_) -
+                           UTF16.TRAIL_SURROGATE_MIN_VALUE;
+
+    // additional properties ----------------------------------------------
+
+    /**
+     * First nibble shift
+     */
+    private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
+    /**
+     * Second nibble mask
+     */
+    private static final int LAST_NIBBLE_MASK_ = 0xF;
+    /**
+     * Age value shift
+     */
+    private static final int AGE_SHIFT_ = 24;
+
+    // private constructors --------------------------------------------------
+
+    /**
+    * Constructor
+    * @exception IOException thrown when data reading fails or data corrupted
+    */
+    private UCharacterProperty() throws IOException
+    {
+        // jar access
+        InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
+        BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE_);
+        UCharacterPropertyReader reader = new UCharacterPropertyReader(b);
+        reader.read(this);
+        b.close();
+
+        m_trie_.putIndexData(this);
+    }
+
+    public void upropsvec_addPropertyStarts(UnicodeSet set) {
+        /* add the start code point of each same-value range of the properties vectors trie */
+        if(m_additionalColumnsCount_>0) {
+            /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
+            TrieIterator propsVectorsIter = new TrieIterator(m_additionalTrie_);
+            RangeValueIterator.Element propsVectorsResult = new RangeValueIterator.Element();
+            while(propsVectorsIter.next(propsVectorsResult)){
+                set.add(propsVectorsResult.start);
+            }
+        }
+    }
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UCharacterPropertyReader.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UCharacterPropertyReader.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+* <p>Internal reader class for ICU data file uprops.icu containing
+* Unicode codepoint data.</p>
+* <p>This class simply reads uprops.icu, authenticates that it is a valid
+* ICU data file and split its contents up into blocks of data for use in
+* <a href=UCharacterProperty.html>com.ibm.icu.impl.UCharacterProperty</a>.
+* </p>
+* <p>uprops.icu which is in big-endian format is jared together with this
+* package.</p>
+*
+* Unicode character properties file format see
+* (ICU4C)/source/tools/genprops/store.c
+*
+* @author Syn Wee Quek
+* @since release 2.1, February 1st 2002
+*/
+final class UCharacterPropertyReader implements ICUBinary.Authenticate
+{
+    // public methods ----------------------------------------------------
+
+    public boolean isDataVersionAcceptable(byte version[])
+    {
+        return version[0] == DATA_FORMAT_VERSION_[0]
+               && version[2] == DATA_FORMAT_VERSION_[2]
+               && version[3] == DATA_FORMAT_VERSION_[3];
+    }
+
+    // protected constructor ---------------------------------------------
+
+    /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication
+    */
+    protected UCharacterPropertyReader(InputStream inputStream)
+                                                        throws IOException
+    {
+        m_unicodeVersion_ = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
+                                                 this);
+        m_dataInputStream_ = new DataInputStream(inputStream);
+    }
+
+    // protected methods -------------------------------------------------
+
+    /**
+    * <p>Reads uprops.icu, parse it into blocks of data to be stored in
+    * UCharacterProperty.</P
+    * @param ucharppty UCharacterProperty instance
+    * @exception IOException thrown when data reading fails
+    */
+    protected void read(UCharacterProperty ucharppty) throws IOException
+    {
+        // read the indexes
+        int count = INDEX_SIZE_;
+        m_propertyOffset_          = m_dataInputStream_.readInt();
+        count --;
+        m_exceptionOffset_         = m_dataInputStream_.readInt();
+        count --;
+        m_caseOffset_              = m_dataInputStream_.readInt();
+        count --;
+        m_additionalOffset_        = m_dataInputStream_.readInt();
+        count --;
+        m_additionalVectorsOffset_ = m_dataInputStream_.readInt();
+        count --;
+        m_additionalColumnsCount_  = m_dataInputStream_.readInt();
+        count --;
+        m_reservedOffset_          = m_dataInputStream_.readInt();
+        count --;
+        m_dataInputStream_.skipBytes(3 << 2);
+        count -= 3;
+        ucharppty.m_maxBlockScriptValue_ = m_dataInputStream_.readInt();
+        count --; // 10
+        ucharppty.m_maxJTGValue_ = m_dataInputStream_.readInt();
+        count --; // 11
+        m_dataInputStream_.skipBytes(count << 2);
+
+        // read the trie index block
+        // m_props_index_ in terms of ints
+        ucharppty.m_trie_ = new CharTrie(m_dataInputStream_, null);
+
+        // skip the 32 bit properties block
+        int size = m_exceptionOffset_ - m_propertyOffset_;
+        m_dataInputStream_.skipBytes(size * 4);
+
+        // reads the 32 bit exceptions block
+        size = m_caseOffset_ - m_exceptionOffset_;
+        m_dataInputStream_.skipBytes(size * 4);
+
+        // reads the 32 bit case block
+        size = (m_additionalOffset_ - m_caseOffset_) << 1;
+        m_dataInputStream_.skipBytes(size * 2);
+
+        if(m_additionalColumnsCount_ > 0) {
+            // reads the additional property block
+            ucharppty.m_additionalTrie_ = new CharTrie(m_dataInputStream_, null);
+
+            // additional properties
+            size = m_reservedOffset_ - m_additionalVectorsOffset_;
+            ucharppty.m_additionalVectors_ = new int[size];
+            for (int i = 0; i < size; i ++) {
+                ucharppty.m_additionalVectors_[i] = m_dataInputStream_.readInt();
+            }
+        }
+
+        m_dataInputStream_.close();
+        ucharppty.m_additionalColumnsCount_ = m_additionalColumnsCount_;
+        ucharppty.m_unicodeVersion_ = VersionInfo.getInstance(
+                         (int)m_unicodeVersion_[0], (int)m_unicodeVersion_[1],
+                         (int)m_unicodeVersion_[2], (int)m_unicodeVersion_[3]);
+    }
+
+    // private variables -------------------------------------------------
+
+    /**
+    * Index size
+    */
+    private static final int INDEX_SIZE_ = 16;
+
+    /**
+    * ICU data file input stream
+    */
+    private DataInputStream m_dataInputStream_;
+
+    /**
+    * Offset information in the indexes.
+    */
+    private int m_propertyOffset_;
+    private int m_exceptionOffset_;
+    private int m_caseOffset_;
+    private int m_additionalOffset_;
+    private int m_additionalVectorsOffset_;
+    private int m_additionalColumnsCount_;
+    private int m_reservedOffset_;
+    private byte m_unicodeVersion_[];
+
+    /**
+    * Data format "UPro".
+    */
+    private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x50,
+                                                    (byte)0x72, (byte)0x6F};
+    /**
+     * Format version; this code works with all versions with the same major
+     * version number and the same Trie bit distribution.
+     */
+    private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x5, (byte)0,
+                                             (byte)Trie.INDEX_STAGE_1_SHIFT_,
+                                             (byte)Trie.INDEX_STAGE_2_SHIFT_};
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UTF16.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UTF16.java
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <p>Standalone utility class providing UTF16 character conversions and
+ * indexing conversions.</p>
+ * <p>Code that uses strings alone rarely need modification.
+ * By design, UTF-16 does not allow overlap, so searching for strings is a safe
+ * operation. Similarly, concatenation is always safe. Substringing is safe if
+ * the start and end are both on UTF-32 boundaries. In normal code, the values
+ * for start and end are on those boundaries, since they arose from operations
+ * like searching. If not, the nearest UTF-32 boundaries can be determined
+ * using <code>bounds()</code>.</p>
+ * <strong>Examples:</strong>
+ * <p>The following examples illustrate use of some of these methods.
+ * <pre>
+ * // iteration forwards: Original
+ * for (int i = 0; i &lt; s.length(); ++i) {
+ *     char ch = s.charAt(i);
+ *     doSomethingWith(ch);
+ * }
+ *
+ * // iteration forwards: Changes for UTF-32
+ * int ch;
+ * for (int i = 0; i &lt; s.length(); i+=UTF16.getCharCount(ch)) {
+ *     ch = UTF16.charAt(s,i);
+ *     doSomethingWith(ch);
+ * }
+ *
+ * // iteration backwards: Original
+ * for (int i = s.length() -1; i >= 0; --i) {
+ *     char ch = s.charAt(i);
+ *     doSomethingWith(ch);
+ * }
+ *
+ * // iteration backwards: Changes for UTF-32
+ * int ch;
+ * for (int i = s.length() -1; i > 0; i-=UTF16.getCharCount(ch)) {
+ *     ch = UTF16.charAt(s,i);
+ *     doSomethingWith(ch);
+ * }
+ * </pre>
+ * <strong>Notes:</strong>
+ * <ul>
+ *   <li>
+ *   <strong>Naming:</strong> For clarity, High and Low surrogates are called
+ *   <code>Lead</code> and <code>Trail</code> in the API, which gives a better
+ *   sense of their ordering in a string. <code>offset16</code> and
+ *   <code>offset32</code> are used to distinguish offsets to UTF-16
+ *   boundaries vs offsets to UTF-32 boundaries. <code>int char32</code> is
+ *   used to contain UTF-32 characters, as opposed to <code>char16</code>,
+ *   which is a UTF-16 code unit.
+ *   </li>
+ *   <li>
+ *   <strong>Roundtripping Offsets:</strong> You can always roundtrip from a
+ *   UTF-32 offset to a UTF-16 offset and back. Because of the difference in
+ *   structure, you can roundtrip from a UTF-16 offset to a UTF-32 offset and
+ *   back if and only if <code>bounds(string, offset16) != TRAIL</code>.
+ *   </li>
+ *   <li>
+ *    <strong>Exceptions:</strong> The error checking will throw an exception
+ *   if indices are out of bounds. Other than than that, all methods will
+ *   behave reasonably, even if unmatched surrogates or out-of-bounds UTF-32
+ *   values are present. <code>UCharacter.isLegal()</code> can be used to check
+ *   for validity if desired.
+ *   </li>
+ *   <li>
+ *   <strong>Unmatched Surrogates:</strong> If the string contains unmatched
+ *   surrogates, then these are counted as one UTF-32 value. This matches
+ *   their iteration behavior, which is vital. It also matches common display
+ *   practice as missing glyphs (see the Unicode Standard Section 5.4, 5.5).
+ *   </li>
+ *   <li>
+ *     <strong>Optimization:</strong> The method implementations may need
+ *     optimization if the compiler doesn't fold static final methods. Since
+ *     surrogate pairs will form an exceeding small percentage of all the text
+ *     in the world, the singleton case should always be optimized for.
+ *   </li>
+ * </ul>
+ * @author Mark Davis, with help from Markus Scherer
+ * @stable ICU 2.1
+ */
+
+public final class UTF16
+{
+    // public variables ---------------------------------------------------
+
+    /**
+     * The lowest Unicode code point value.
+     * @stable ICU 2.1
+     */
+    public static final int CODEPOINT_MIN_VALUE = 0;
+    /**
+     * The highest Unicode code point value (scalar value) according to the
+     * Unicode Standard.
+     * @stable ICU 2.1
+     */
+    public static final int CODEPOINT_MAX_VALUE = 0x10ffff;
+    /**
+     * The minimum value for Supplementary code points
+     * @stable ICU 2.1
+     */
+    public static final int SUPPLEMENTARY_MIN_VALUE  = 0x10000;
+    /**
+     * Lead surrogate minimum value
+     * @stable ICU 2.1
+     */
+    public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
+    /**
+     * Trail surrogate minimum value
+     * @stable ICU 2.1
+     */
+    public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
+    /**
+     * Lead surrogate maximum value
+     * @stable ICU 2.1
+     */
+    public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
+    /**
+     * Trail surrogate maximum value
+     * @stable ICU 2.1
+     */
+    public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
+    /**
+     * Surrogate minimum value
+     * @stable ICU 2.1
+     */
+    public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
+
+    // public method ------------------------------------------------------
+
+    /**
+     * Extract a single UTF-32 value from a string.
+     * Used when iterating forwards or backwards (with
+     * <code>UTF16.getCharCount()</code>, as well as random access. If a
+     * validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">
+     * UCharacter.isLegal()</a></code> on the return value.
+     * If the char retrieved is part of a surrogate pair, its supplementary
+     * character will be returned. If a complete supplementary character is
+     * not found the incomplete character will be returned
+     * @param source array of UTF-16 chars
+     * @param offset16 UTF-16 offset to the start of the character.
+     * @return UTF-32 value for the UTF-32 value that contains the char at
+     *         offset16. The boundaries of that codepoint are the same as in
+     *         <code>bounds32()</code>.
+     * @exception IndexOutOfBoundsException thrown if offset16 is out of
+     *            bounds.
+     * @stable ICU 2.1
+     */
+    public static int charAt(String source, int offset16) {
+        char single = source.charAt(offset16);
+        if (single < LEAD_SURROGATE_MIN_VALUE) {
+            return single;
+        }
+        return _charAt(source, offset16, single);
+    }
+
+    private static int _charAt(String source, int offset16, char single) {
+        if (single > TRAIL_SURROGATE_MAX_VALUE) {
+            return single;
+        }
+
+        // Convert the UTF-16 surrogate pair if necessary.
+        // For simplicity in usage, and because the frequency of pairs is
+        // low, look both directions.
+
+        if (single <= LEAD_SURROGATE_MAX_VALUE) {
+            ++offset16;
+            if (source.length() != offset16) {
+                char trail = source.charAt(offset16);
+                if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(single, trail);
+                }
+            }
+        } else {
+            --offset16;
+            if (offset16 >= 0) {
+                // single is a trail surrogate so
+                char lead = source.charAt(offset16);
+                if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(lead, single);
+                }
+            }
+        }
+        return single; // return unmatched surrogate
+    }
+
+    /**
+     * Extract a single UTF-32 value from a substring.
+     * Used when iterating forwards or backwards (with
+     * <code>UTF16.getCharCount()</code>, as well as random access. If a
+     * validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
+     * </a></code> on the return value.
+     * If the char retrieved is part of a surrogate pair, its supplementary
+     * character will be returned. If a complete supplementary character is
+     * not found the incomplete character will be returned
+     * @param source array of UTF-16 chars
+     * @param start offset to substring in the source array for analyzing
+     * @param limit offset to substring in the source array for analyzing
+     * @param offset16 UTF-16 offset relative to start
+     * @return UTF-32 value for the UTF-32 value that contains the char at
+     *         offset16. The boundaries of that codepoint are the same as in
+     *         <code>bounds32()</code>.
+     * @exception IndexOutOfBoundsException thrown if offset16 is not within
+     *            the range of start and limit.
+     * @stable ICU 2.1
+     */
+    public static int charAt(char source[], int start, int limit,
+                             int offset16)
+    {
+        offset16 += start;
+        if (offset16 < start || offset16 >= limit) {
+            throw new ArrayIndexOutOfBoundsException(offset16);
+        }
+
+        char single = source[offset16];
+        if (!isSurrogate(single)) {
+            return single;
+        }
+
+        // Convert the UTF-16 surrogate pair if necessary.
+        // For simplicity in usage, and because the frequency of pairs is
+        // low, look both directions.
+        if (single <= LEAD_SURROGATE_MAX_VALUE) {
+            offset16 ++;
+            if (offset16 >= limit) {
+                return single;
+            }
+            char trail = source[offset16];
+            if (isTrailSurrogate(trail)) {
+                return UCharacterProperty.getRawSupplementary(single, trail);
+            }
+        }
+        else { // isTrailSurrogate(single), so
+            if (offset16 == start) {
+                return single;
+            }
+            offset16 --;
+            char lead = source[offset16];
+            if (isLeadSurrogate(lead))
+                return UCharacterProperty.getRawSupplementary(lead, single);
+        }
+        return single; // return unmatched surrogate
+    }
+
+    /**
+     * Determines how many chars this char32 requires.
+     * If a validity check is required, use <code>
+     * <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+     * char32 before calling.
+     * @param char32 the input codepoint.
+     * @return 2 if is in supplementary space, otherwise 1.
+     * @stable ICU 2.1
+     */
+    public static int getCharCount(int char32)
+    {
+        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
+            return 1;
+        }
+        return 2;
+    }
+
+    /**
+     * Determines whether the code value is a surrogate.
+     * @param char16 the input character.
+     * @return true iff the input character is a surrogate.
+     * @stable ICU 2.1
+     */
+    public static boolean isSurrogate(char char16)
+    {
+        return LEAD_SURROGATE_MIN_VALUE <= char16 &&
+            char16 <= TRAIL_SURROGATE_MAX_VALUE;
+    }
+
+    /**
+     * Determines whether the character is a trail surrogate.
+     * @param char16 the input character.
+     * @return true iff the input character is a trail surrogate.
+     * @stable ICU 2.1
+     */
+    public static boolean isTrailSurrogate(char char16)
+    {
+        return (TRAIL_SURROGATE_MIN_VALUE <= char16 &&
+                char16 <= TRAIL_SURROGATE_MAX_VALUE);
+    }
+
+    /**
+     * Determines whether the character is a lead surrogate.
+     * @param char16 the input character.
+     * @return true iff the input character is a lead surrogate
+     * @stable ICU 2.1
+     */
+    public static boolean isLeadSurrogate(char char16)
+    {
+        return LEAD_SURROGATE_MIN_VALUE <= char16 &&
+            char16 <= LEAD_SURROGATE_MAX_VALUE;
+    }
+
+    /**
+     * Returns the lead surrogate.
+     * If a validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
+     * on char32 before calling.
+     * @param char32 the input character.
+     * @return lead surrogate if the getCharCount(ch) is 2; <br>
+     *         and 0 otherwise (note: 0 is not a valid lead surrogate).
+     * @stable ICU 2.1
+     */
+    public static char getLeadSurrogate(int char32)
+    {
+        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
+            return (char)(LEAD_SURROGATE_OFFSET_ +
+                          (char32 >> LEAD_SURROGATE_SHIFT_));
+        }
+
+        return 0;
+    }
+
+    /**
+     * Returns the trail surrogate.
+     * If a validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
+     * on char32 before calling.
+     * @param char32 the input character.
+     * @return the trail surrogate if the getCharCount(ch) is 2; <br>otherwise
+     *         the character itself
+     * @stable ICU 2.1
+     */
+    public static char getTrailSurrogate(int char32)
+    {
+        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
+            return (char)(TRAIL_SURROGATE_MIN_VALUE +
+                          (char32 & TRAIL_SURROGATE_MASK_));
+        }
+
+        return (char)char32;
+    }
+
+    /**
+     * Convenience method corresponding to String.valueOf(char). Returns a one
+     * or two char string containing the UTF-32 value in UTF16 format. If a
+     * validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
+     * on char32 before calling.
+     * @param char32 the input character.
+     * @return string value of char32 in UTF16 format
+     * @exception IllegalArgumentException thrown if char32 is a invalid
+     *            codepoint.
+     * @stable ICU 2.1
+     */
+    public static String valueOf(int char32)
+    {
+        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
+            throw new IllegalArgumentException("Illegal codepoint");
+        }
+        return toString(char32);
+    }
+
+    /**
+     * Append a single UTF-32 value to the end of a StringBuffer.
+     * If a validity check is required, use
+     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
+     * on char32 before calling.
+     * @param target the buffer to append to
+     * @param char32 value to append.
+     * @return the updated StringBuffer
+     * @exception IllegalArgumentException thrown when char32 does not lie
+     *            within the range of the Unicode codepoints
+     * @stable ICU 2.1
+     */
+    public static StringBuffer append(StringBuffer target, int char32)
+    {
+        // Check for irregular values
+        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
+            throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32));
+        }
+
+        // Write the UTF-16 values
+        if (char32 >= SUPPLEMENTARY_MIN_VALUE)
+            {
+                target.append(getLeadSurrogate(char32));
+                target.append(getTrailSurrogate(char32));
+            }
+        else {
+            target.append((char)char32);
+        }
+        return target;
+    }
+
+    //// for StringPrep
+    /**
+     * Shifts offset16 by the argument number of codepoints within a subarray.
+     * @param source char array
+     * @param start position of the subarray to be performed on
+     * @param limit position of the subarray to be performed on
+     * @param offset16 UTF16 position to shift relative to start
+     * @param shift32 number of codepoints to shift
+     * @return new shifted offset16 relative to start
+     * @exception IndexOutOfBoundsException if the new offset16 is out of
+     *            bounds with respect to the subarray or the subarray bounds
+     *            are out of range.
+     * @stable ICU 2.1
+     */
+    public static int moveCodePointOffset(char source[], int start, int limit,
+                                          int offset16, int shift32)
+    {
+        int         size = source.length;
+        int         count;
+        char        ch;
+        int         result = offset16 + start;
+        if (start<0 || limit<start) {
+            throw new StringIndexOutOfBoundsException(start);
+        }
+        if (limit>size) {
+            throw new StringIndexOutOfBoundsException(limit);
+        }
+        if (offset16<0 || result>limit) {
+            throw new StringIndexOutOfBoundsException(offset16);
+        }
+        if (shift32 > 0 ) {
+            if (shift32 + result > size) {
+                throw new StringIndexOutOfBoundsException(result);
+            }
+            count = shift32;
+            while (result < limit && count > 0)
+            {
+                ch = source[result];
+                if (isLeadSurrogate(ch) && (result+1 < limit) &&
+                        isTrailSurrogate(source[result+1])) {
+                    result ++;
+                }
+                count --;
+                result ++;
+            }
+        } else {
+            if (result + shift32 < start) {
+                throw new StringIndexOutOfBoundsException(result);
+            }
+            for (count=-shift32; count>0; count--) {
+                result--;
+                if (result<start) {
+                    break;
+                }
+                ch = source[result];
+                if (isTrailSurrogate(ch) && result>start && isLeadSurrogate(source[result-1])) {
+                    result--;
+                }
+            }
+        }
+        if (count != 0)  {
+            throw new StringIndexOutOfBoundsException(shift32);
+        }
+        result -= start;
+        return result;
+    }
+
+    // private data members -------------------------------------------------
+
+    /**
+     * Shift value for lead surrogate to form a supplementary character.
+     */
+    private static final int LEAD_SURROGATE_SHIFT_ = 10;
+
+    /**
+     * Mask to retrieve the significant value from a trail surrogate.
+     */
+    private static final int TRAIL_SURROGATE_MASK_     = 0x3FF;
+
+    /**
+     * Value that all lead surrogate starts with
+     */
+    private static final int LEAD_SURROGATE_OFFSET_ =
+        LEAD_SURROGATE_MIN_VALUE -
+        (SUPPLEMENTARY_MIN_VALUE
+         >> LEAD_SURROGATE_SHIFT_);
+
+    // private methods ------------------------------------------------------
+
+    /**
+     * <p>Converts argument code point and returns a String object representing
+     * the code point's value in UTF16 format.</p>
+     * <p>This method does not check for the validity of the codepoint, the
+     * results are not guaranteed if a invalid codepoint is passed as
+     * argument.</p>
+     * <p>The result is a string whose length is 1 for non-supplementary code
+     * points, 2 otherwise.</p>
+     * @param ch code point
+     * @return string representation of the code point
+     */
+    private static String toString(int ch)
+    {
+        if (ch < SUPPLEMENTARY_MIN_VALUE) {
+            return String.valueOf((char)ch);
+        }
+
+        StringBuffer result = new StringBuffer();
+        result.append(getLeadSurrogate(ch));
+        result.append(getTrailSurrogate(ch));
+        return result.toString();
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/UnicodeMatcher.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UnicodeMatcher.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+/**
+ * <code>UnicodeMatcher</code> defines a protocol for objects that can
+ * match a range of characters in a Replaceable string.
+ * @stable ICU 2.0
+ */
+public interface UnicodeMatcher {
+
+    /**
+     * The character at index i, where i < contextStart || i >= contextLimit,
+     * is ETHER.  This allows explicit matching by rules and UnicodeSets
+     * of text outside the context.  In traditional terms, this allows anchoring
+     * at the start and/or end.
+     * @stable ICU 2.0
+     */
+    static final char ETHER = '\uFFFF';
+
+}
+
+//eof
--- a/jdkSrc/jdk8/sun/text/normalizer/UnicodeSet.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UnicodeSet.java
--- a/jdkSrc/jdk8/sun/text/normalizer/UnicodeSetIterator.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/UnicodeSetIterator.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.util.Iterator;
+
+/**
+ * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
+ * iterates over either code points or code point ranges.  After all
+ * code points or ranges have been returned, it returns the
+ * multicharacter strings of the UnicodSet, if any.
+ *
+ * <p>To iterate over code points, use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (set.next()) {
+ *   if (set.codepoint != UnicodeSetIterator::IS_STRING) {
+ *     processCodepoint(set.codepoint);
+ *   } else {
+ *     processString(set.string);
+ *   }
+ * }
+ * </pre>
+ *
+ * <p>To iterate over code point ranges, use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (set.nextRange()) {
+ *   if (set.codepoint != UnicodeSetIterator::IS_STRING) {
+ *     processCodepointRange(set.codepoint, set.codepointEnd);
+ *   } else {
+ *     processString(set.string);
+ *   }
+ * }
+ * </pre>
+ * @author M. Davis
+ * @stable ICU 2.0
+ */
+public class UnicodeSetIterator {
+
+    /**
+     * Value of <tt>codepoint</tt> if the iterator points to a string.
+     * If <tt>codepoint == IS_STRING</tt>, then examine
+     * <tt>string</tt> for the current iteration result.
+     * @stable ICU 2.0
+     */
+    public static int IS_STRING = -1;
+
+    /**
+     * Current code point, or the special value <tt>IS_STRING</tt>, if
+     * the iterator points to a string.
+     * @stable ICU 2.0
+     */
+    public int codepoint;
+
+    /**
+     * When iterating over ranges using <tt>nextRange()</tt>,
+     * <tt>codepointEnd</tt> contains the inclusive end of the
+     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
+     * iterating over code points using <tt>next()</tt>, or if
+     * <tt>codepoint == IS_STRING</tt>, then the value of
+     * <tt>codepointEnd</tt> is undefined.
+     * @stable ICU 2.0
+     */
+    public int codepointEnd;
+
+    /**
+     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
+     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
+     * value of <tt>string</tt> is undefined.
+     * @stable ICU 2.0
+     */
+    public String string;
+
+    /**
+     * Create an iterator over the given set.
+     * @param set set to iterate over
+     * @stable ICU 2.0
+     */
+    public UnicodeSetIterator(UnicodeSet set) {
+        reset(set);
+    }
+
+    /**
+     * Returns the next element in the set, either a code point range
+     * or a string.  If there are no more elements in the set, return
+     * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
+     * string in the <tt>string</tt> field.  Otherwise the value is a
+     * range of one or more code points from <tt>codepoint</tt> to
+     * <tt>codepointeEnd</tt> inclusive.
+     *
+     * <p>The order of iteration is all code points ranges in sorted
+     * order, followed by all strings sorted order.  Ranges are
+     * disjoint and non-contiguous.  <tt>string</tt> is undefined
+     * unless <tt>codepoint == IS_STRING</tt>.  Do not mix calls to
+     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
+     * <tt>reset()</tt> between them.  The results of doing so are
+     * undefined.
+     *
+     * @return true if there was another element in the set and this
+     * object contains the element.
+     * @stable ICU 2.0
+     */
+    public boolean nextRange() {
+        if (nextElement <= endElement) {
+            codepointEnd = endElement;
+            codepoint = nextElement;
+            nextElement = endElement+1;
+            return true;
+        }
+        if (range < endRange) {
+            loadRange(++range);
+            codepointEnd = endElement;
+            codepoint = nextElement;
+            nextElement = endElement+1;
+            return true;
+        }
+
+        // stringIterator == null iff there are no string elements remaining
+
+        if (stringIterator == null) return false;
+        codepoint = IS_STRING; // signal that value is actually a string
+        string = stringIterator.next();
+        if (!stringIterator.hasNext()) stringIterator = null;
+        return true;
+    }
+
+    /**
+     * Sets this iterator to visit the elements of the given set and
+     * resets it to the start of that set.  The iterator is valid only
+     * so long as <tt>set</tt> is valid.
+     * @param set the set to iterate over.
+     * @stable ICU 2.0
+     */
+    public void reset(UnicodeSet uset) {
+        set = uset;
+        reset();
+    }
+
+    /**
+     * Resets this iterator to the start of the set.
+     * @stable ICU 2.0
+     */
+    public void reset() {
+        endRange = set.getRangeCount() - 1;
+        range = 0;
+        endElement = -1;
+        nextElement = 0;
+        if (endRange >= 0) {
+            loadRange(range);
+        }
+        stringIterator = null;
+        if (set.strings != null) {
+            stringIterator = set.strings.iterator();
+            if (!stringIterator.hasNext()) stringIterator = null;
+        }
+    }
+
+    // ======================= PRIVATES ===========================
+
+    private UnicodeSet set;
+    private int endRange = 0;
+    private int range = 0;
+    /**
+     * @internal
+     */
+    protected int endElement;
+    /**
+     * @internal
+     */
+    protected int nextElement;
+    private Iterator<String> stringIterator = null;
+
+    /**
+     * Invariant: stringIterator is null when there are no (more) strings remaining
+     */
+
+    /**
+     * @internal
+     */
+    protected void loadRange(int aRange) {
+        nextElement = set.getRangeStart(aRange);
+        endElement = set.getRangeEnd(aRange);
+    }
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/Utility.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/Utility.java
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+public final class Utility {
+
+    /**
+     * Convenience utility to compare two Object[]s
+     * Ought to be in System.
+     * @param len the length to compare.
+     * The start indices and start+len must be valid.
+     */
+    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
+                                            char[] target, int targetStart,
+                                            int len)
+    {
+        int sourceEnd = sourceStart + len;
+        int delta = targetStart - sourceStart;
+        for (int i = sourceStart; i < sourceEnd; i++) {
+            if (source[i]!=target[i + delta])
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Convert characters outside the range U+0020 to U+007F to
+     * Unicode escapes, and convert backslash to a double backslash.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ) {
+            int c = UTF16.charAt(s, i);
+            i += UTF16.getCharCount(c);
+            if (c >= ' ' && c <= 0x007F) {
+                if (c == '\\') {
+                    buf.append("\\\\"); // That is, "\\"
+                } else {
+                    buf.append((char)c);
+                }
+            } else {
+                boolean four = c <= 0xFFFF;
+                buf.append(four ? "\\u" : "\\U");
+                hex(c, four ? 4 : 8, buf);
+            }
+        }
+        return buf.toString();
+    }
+
+    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+    static private final char[] UNESCAPE_MAP = {
+        /*"   0x22, 0x22 */
+        /*'   0x27, 0x27 */
+        /*?   0x3F, 0x3F */
+        /*\   0x5C, 0x5C */
+        /*a*/ 0x61, 0x07,
+        /*b*/ 0x62, 0x08,
+        /*e*/ 0x65, 0x1b,
+        /*f*/ 0x66, 0x0c,
+        /*n*/ 0x6E, 0x0a,
+        /*r*/ 0x72, 0x0d,
+        /*t*/ 0x74, 0x09,
+        /*v*/ 0x76, 0x0b
+    };
+
+    /**
+     * Convert an escape to a 32-bit code point value.  We attempt
+     * to parallel the icu4c unescapeAt() function.
+     * @param offset16 an array containing offset to the character
+     * <em>after</em> the backslash.  Upon return offset16[0] will
+     * be updated to point after the escape sequence.
+     * @return character value from 0 to 10FFFF, or -1 on error.
+     */
+    public static int unescapeAt(String s, int[] offset16) {
+        int c;
+        int result = 0;
+        int n = 0;
+        int minDig = 0;
+        int maxDig = 0;
+        int bitsPerDigit = 4;
+        int dig;
+        int i;
+        boolean braces = false;
+
+        /* Check that offset is in range */
+        int offset = offset16[0];
+        int length = s.length();
+        if (offset < 0 || offset >= length) {
+            return -1;
+        }
+
+        /* Fetch first UChar after '\\' */
+        c = UTF16.charAt(s, offset);
+        offset += UTF16.getCharCount(c);
+
+        /* Convert hexadecimal and octal escapes */
+        switch (c) {
+        case 'u':
+            minDig = maxDig = 4;
+            break;
+        case 'U':
+            minDig = maxDig = 8;
+            break;
+        case 'x':
+            minDig = 1;
+            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
+                ++offset;
+                braces = true;
+                maxDig = 8;
+            } else {
+                maxDig = 2;
+            }
+            break;
+        default:
+            dig = UCharacter.digit(c, 8);
+            if (dig >= 0) {
+                minDig = 1;
+                maxDig = 3;
+                n = 1; /* Already have first octal digit */
+                bitsPerDigit = 3;
+                result = dig;
+            }
+            break;
+        }
+        if (minDig != 0) {
+            while (offset < length && n < maxDig) {
+                c = UTF16.charAt(s, offset);
+                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+                if (dig < 0) {
+                    break;
+                }
+                result = (result << bitsPerDigit) | dig;
+                offset += UTF16.getCharCount(c);
+                ++n;
+            }
+            if (n < minDig) {
+                return -1;
+            }
+            if (braces) {
+                if (c != 0x7D /*}*/) {
+                    return -1;
+                }
+                ++offset;
+            }
+            if (result < 0 || result >= 0x110000) {
+                return -1;
+            }
+            // If an escape sequence specifies a lead surrogate, see
+            // if there is a trail surrogate after it, either as an
+            // escape or as a literal.  If so, join them up into a
+            // supplementary.
+            if (offset < length &&
+                UTF16.isLeadSurrogate((char) result)) {
+                int ahead = offset+1;
+                c = s.charAt(offset); // [sic] get 16-bit code unit
+                if (c == '\\' && ahead < length) {
+                    int o[] = new int[] { ahead };
+                    c = unescapeAt(s, o);
+                    ahead = o[0];
+                }
+                if (UTF16.isTrailSurrogate((char) c)) {
+                    offset = ahead;
+                result = UCharacterProperty.getRawSupplementary(
+                                  (char) result, (char) c);
+                }
+            }
+            offset16[0] = offset;
+            return result;
+        }
+
+        /* Convert C-style escapes in table */
+        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
+            if (c == UNESCAPE_MAP[i]) {
+                offset16[0] = offset;
+                return UNESCAPE_MAP[i+1];
+            } else if (c < UNESCAPE_MAP[i]) {
+                break;
+            }
+        }
+
+        /* Map \cX to control-X: X & 0x1F */
+        if (c == 'c' && offset < length) {
+            c = UTF16.charAt(s, offset);
+            offset16[0] = offset + UTF16.getCharCount(c);
+            return 0x1F & c;
+        }
+
+        /* If no special forms are recognized, then consider
+         * the backslash to generically escape the next character. */
+        offset16[0] = offset;
+        return c;
+    }
+
+    /**
+     * Convert a integer to size width hex uppercase digits.
+     * E.g., hex('a', 4, str) => "0041".
+     * Append the output to the given StringBuffer.
+     * If width is too small to fit, nothing will be appended to output.
+     */
+    public static StringBuffer hex(int ch, int width, StringBuffer output) {
+        return appendNumber(output, ch, 16, width);
+    }
+
+    /**
+     * Convert a integer to size width (minimum) hex uppercase digits.
+     * E.g., hex('a', 4, str) => "0041".  If the integer requires more
+     * than width digits, more will be used.
+     */
+    public static String hex(int ch, int width) {
+        StringBuffer buf = new StringBuffer();
+        return appendNumber(buf, ch, 16, width).toString();
+    }
+
+    /**
+     * Skip over a sequence of zero or more white space characters
+     * at pos.  Return the index of the first non-white-space character
+     * at or after pos, or str.length(), if there is none.
+     */
+    public static int skipWhitespace(String str, int pos) {
+        while (pos < str.length()) {
+            int c = UTF16.charAt(str, pos);
+            if (!UCharacterProperty.isRuleWhiteSpace(c)) {
+                break;
+            }
+            pos += UTF16.getCharCount(c);
+        }
+        return pos;
+    }
+
+    static final char DIGITS[] = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+        'U', 'V', 'W', 'X', 'Y', 'Z'
+    };
+
+    /**
+     * Append the digits of a positive integer to the given
+     * <code>StringBuffer</code> in the given radix. This is
+     * done recursively since it is easiest to generate the low-
+     * order digit first, but it must be appended last.
+     *
+     * @param result is the <code>StringBuffer</code> to append to
+     * @param n is the positive integer
+     * @param radix is the radix, from 2 to 36 inclusive
+     * @param minDigits is the minimum number of digits to append.
+     */
+    private static void recursiveAppendNumber(StringBuffer result, int n,
+                                                int radix, int minDigits)
+    {
+        int digit = n % radix;
+
+        if (n >= radix || minDigits > 1) {
+            recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
+        }
+
+        result.append(DIGITS[digit]);
+    }
+
+    /**
+     * Append a number to the given StringBuffer in the given radix.
+     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
+     * radices 11 through 36.
+     * @param result the digits of the number are appended here
+     * @param n the number to be converted to digits; may be negative.
+     * If negative, a '-' is prepended to the digits.
+     * @param radix a radix from 2 to 36 inclusive.
+     * @param minDigits the minimum number of digits, not including
+     * any '-', to produce.  Values less than 2 have no effect.  One
+     * digit is always emitted regardless of this parameter.
+     * @return a reference to result
+     */
+    public static StringBuffer appendNumber(StringBuffer result, int n,
+                                             int radix, int minDigits)
+        throws IllegalArgumentException
+    {
+        if (radix < 2 || radix > 36) {
+            throw new IllegalArgumentException("Illegal radix " + radix);
+        }
+
+
+        int abs = n;
+
+        if (n < 0) {
+            abs = -n;
+            result.append("-");
+        }
+
+        recursiveAppendNumber(result, abs, radix, minDigits);
+
+        return result;
+    }
+
+    /**
+     * Return true if the character is NOT printable ASCII.  The tab,
+     * newline and linefeed characters are considered unprintable.
+     */
+    public static boolean isUnprintable(int c) {
+        return !(c >= 0x20 && c <= 0x7E);
+    }
+
+    /**
+     * Escape unprintable characters using <backslash>uxxxx notation
+     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
+     * above.  If the character is printable ASCII, then do nothing
+     * and return FALSE.  Otherwise, append the escaped notation and
+     * return TRUE.
+     */
+    public static boolean escapeUnprintable(StringBuffer result, int c) {
+        if (isUnprintable(c)) {
+            result.append('\\');
+            if ((c & ~0xFFFF) != 0) {
+                result.append('U');
+                result.append(DIGITS[0xF&(c>>28)]);
+                result.append(DIGITS[0xF&(c>>24)]);
+                result.append(DIGITS[0xF&(c>>20)]);
+                result.append(DIGITS[0xF&(c>>16)]);
+            } else {
+                result.append('u');
+            }
+            result.append(DIGITS[0xF&(c>>12)]);
+            result.append(DIGITS[0xF&(c>>8)]);
+            result.append(DIGITS[0xF&(c>>4)]);
+            result.append(DIGITS[0xF&c]);
+            return true;
+        }
+        return false;
+    }
+
+    /**
+    * Similar to StringBuffer.getChars, version 1.3.
+    * Since JDK 1.2 implements StringBuffer.getChars differently, this method
+    * is here to provide consistent results.
+    * To be removed after JDK 1.2 ceased to be the reference platform.
+    * @param src source string buffer
+    * @param srcBegin offset to the start of the src to retrieve from
+    * @param srcEnd offset to the end of the src to retrieve from
+    * @param dst char array to store the retrieved chars
+    * @param dstBegin offset to the start of the destination char array to
+    *                 store the retrieved chars
+    */
+    public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
+                                char dst[], int dstBegin)
+    {
+        if (srcBegin == srcEnd) {
+            return;
+        }
+        src.getChars(srcBegin, srcEnd, dst, dstBegin);
+    }
+
+}
--- a/jdkSrc/jdk8/sun/text/normalizer/VersionInfo.java
+++ b/jdkSrc/jdk8/sun/text/normalizer/VersionInfo.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package sun.text.normalizer;
+
+import java.util.HashMap;
+
+/**
+ * Class to store version numbers of the form major.minor.milli.micro.
+ * @author synwee
+ * @stable ICU 2.6
+ */
+public final class VersionInfo
+{
+
+    // public methods ------------------------------------------------------
+
+    /**
+     * Returns an instance of VersionInfo with the argument version.
+     * @param version version String in the format of "major.minor.milli.micro"
+     *                or "major.minor.milli" or "major.minor" or "major",
+     *                where major, minor, milli, micro are non-negative numbers
+     *                <= 255. If the trailing version numbers are
+     *                not specified they are taken as 0s. E.g. Version "3.1" is
+     *                equivalent to "3.1.0.0".
+     * @return an instance of VersionInfo with the argument version.
+     * @exception throws an IllegalArgumentException when the argument version
+     *                is not in the right format
+     * @stable ICU 2.6
+     */
+    public static VersionInfo getInstance(String version)
+    {
+        int length  = version.length();
+        int array[] = {0, 0, 0, 0};
+        int count   = 0;
+        int index   = 0;
+
+        while (count < 4 && index < length) {
+            char c = version.charAt(index);
+            if (c == '.') {
+                count ++;
+            }
+            else {
+                c -= '0';
+                if (c < 0 || c > 9) {
+                    throw new IllegalArgumentException(INVALID_VERSION_NUMBER_);
+                }
+                array[count] *= 10;
+                array[count] += c;
+            }
+            index ++;
+        }
+        if (index != length) {
+            throw new IllegalArgumentException(
+                                               "Invalid version number: String '" + version + "' exceeds version format");
+        }
+        for (int i = 0; i < 4; i ++) {
+            if (array[i] < 0 || array[i] > 255) {
+                throw new IllegalArgumentException(INVALID_VERSION_NUMBER_);
+            }
+        }
+
+        return getInstance(array[0], array[1], array[2], array[3]);
+    }
+
+    /**
+     * Returns an instance of VersionInfo with the argument version.
+     * @param major major version, non-negative number <= 255.
+     * @param minor minor version, non-negative number <= 255.
+     * @param milli milli version, non-negative number <= 255.
+     * @param micro micro version, non-negative number <= 255.
+     * @exception throws an IllegalArgumentException when either arguments are
+     *                                     negative or > 255
+     * @stable ICU 2.6
+     */
+    public static VersionInfo getInstance(int major, int minor, int milli,
+                                          int micro)
+    {
+        // checks if it is in the hashmap
+        // else
+        if (major < 0 || major > 255 || minor < 0 || minor > 255 ||
+            milli < 0 || milli > 255 || micro < 0 || micro > 255) {
+            throw new IllegalArgumentException(INVALID_VERSION_NUMBER_);
+        }
+        int     version = getInt(major, minor, milli, micro);
+        Integer key     = Integer.valueOf(version);
+        Object  result  = MAP_.get(key);
+        if (result == null) {
+            result = new VersionInfo(version);
+            MAP_.put(key, result);
+        }
+        return (VersionInfo)result;
+    }
+
+    /**
+     * Compares other with this VersionInfo.
+     * @param other VersionInfo to be compared
+     * @return 0 if the argument is a VersionInfo object that has version
+     *           information equals to this object.
+     *           Less than 0 if the argument is a VersionInfo object that has
+     *           version information greater than this object.
+     *           Greater than 0 if the argument is a VersionInfo object that
+     *           has version information less than this object.
+     * @stable ICU 2.6
+     */
+    public int compareTo(VersionInfo other)
+    {
+        return m_version_ - other.m_version_;
+    }
+
+    // private data members ----------------------------------------------
+
+    /**
+     * Version number stored as a byte for each of the major, minor, milli and
+     * micro numbers in the 32 bit int.
+     * Most significant for the major and the least significant contains the
+     * micro numbers.
+     */
+    private int m_version_;
+    /**
+     * Map of singletons
+     */
+    private static final HashMap<Integer, Object> MAP_ = new HashMap<>();
+    /**
+     * Error statement string
+     */
+    private static final String INVALID_VERSION_NUMBER_ =
+        "Invalid version number: Version number may be negative or greater than 255";
+
+    // private constructor -----------------------------------------------
+
+    /**
+     * Constructor with int
+     * @param compactversion a 32 bit int with each byte representing a number
+     */
+    private VersionInfo(int compactversion)
+    {
+        m_version_ = compactversion;
+    }
+
+    /**
+     * Gets the int from the version numbers
+     * @param major non-negative version number
+     * @param minor non-negativeversion number
+     * @param milli non-negativeversion number
+     * @param micro non-negativeversion number
+     */
+    private static int getInt(int major, int minor, int milli, int micro)
+    {
+        return (major << 24) | (minor << 16) | (milli << 8) | micro;
+    }
+}