some changes
This commit is contained in:
parent
6cb3116b81
commit
98e7e333bb
Binary file not shown.
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Defines common decoding methods for byte array decoders.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface BinaryDecoder extends Decoder {
|
||||
|
||||
/**
|
||||
* Decodes a byte array and returns the results as a byte array.
|
||||
*
|
||||
* @param source
|
||||
* A byte array which has been encoded with the appropriate encoder
|
||||
* @return a byte array that contains decoded content
|
||||
* @throws DecoderException
|
||||
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
|
||||
*/
|
||||
byte[] decode(byte[] source) throws DecoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Defines common encoding methods for byte array encoders.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface BinaryEncoder extends Encoder {
|
||||
|
||||
/**
|
||||
* Encodes a byte array and return the encoded data as a byte array.
|
||||
*
|
||||
* @param source
|
||||
* Data to be encoded
|
||||
* @return A byte array containing the encoded data
|
||||
* @throws EncoderException
|
||||
* thrown if the Encoder encounters a failure condition during the encoding process.
|
||||
*/
|
||||
byte[] encode(byte[] source) throws EncoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Character encoding names required of every implementation of the Java platform.
|
||||
*
|
||||
* From the Java documentation <a
|
||||
* href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>:
|
||||
* <p>
|
||||
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
|
||||
* release documentation for your implementation to see if any other encodings are supported. Consult the release
|
||||
* documentation for your implementation to see if any other encodings are supported.</cite>
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>US-ASCII</code><br>
|
||||
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
|
||||
* <li><code>ISO-8859-1</code><br>
|
||||
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
|
||||
* <li><code>UTF-8</code><br>
|
||||
* Eight-bit Unicode Transformation Format.</li>
|
||||
* <li><code>UTF-16BE</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
|
||||
* <li><code>UTF-16LE</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
|
||||
* <li><code>UTF-16</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
|
||||
* accepted on input, big-endian used on output.)</li>
|
||||
* </ul>
|
||||
*
|
||||
* This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not
|
||||
* foreseen that [codec] would be made to depend on [lang].
|
||||
*
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @since 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
public class CharEncoding {
|
||||
/**
|
||||
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String ISO_8859_1 = "ISO-8859-1";
|
||||
|
||||
/**
|
||||
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String US_ASCII = "US-ASCII";
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
|
||||
* (either order accepted on input, big-endian used on output)
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String UTF_16 = "UTF-16";
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String UTF_16BE = "UTF-16BE";
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String UTF_16LE = "UTF-16LE";
|
||||
|
||||
/**
|
||||
* Eight-bit Unicode Transformation Format.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public static final String UTF_8 = "UTF-8";
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* Charsets required of every implementation of the Java platform.
|
||||
*
|
||||
* From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
|
||||
* charsets</a>:
|
||||
* <p>
|
||||
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
|
||||
* release documentation for your implementation to see if any other encodings are supported. Consult the release
|
||||
* documentation for your implementation to see if any other encodings are supported. </cite>
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>US-ASCII</code><br>
|
||||
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
|
||||
* <li><code>ISO-8859-1</code><br>
|
||||
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
|
||||
* <li><code>UTF-8</code><br>
|
||||
* Eight-bit Unicode Transformation Format.</li>
|
||||
* <li><code>UTF-16BE</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
|
||||
* <li><code>UTF-16LE</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
|
||||
* <li><code>UTF-16</code><br>
|
||||
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
|
||||
* accepted on input, big-endian used on output.)</li>
|
||||
* </ul>
|
||||
*
|
||||
* This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is
|
||||
* not foreseen that Commons Codec would be made to depend on Commons Lang.
|
||||
*
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @since 1.7
|
||||
* @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
|
||||
*/
|
||||
public class Charsets {
|
||||
|
||||
//
|
||||
// This class should only contain Charset instances for required encodings. This guarantees that it will load
|
||||
// correctly and without delay on all Java platforms.
|
||||
//
|
||||
|
||||
/**
|
||||
* Returns the given Charset or the default Charset if the given Charset is null.
|
||||
*
|
||||
* @param charset
|
||||
* A charset or null.
|
||||
* @return the given Charset or the default Charset if the given Charset is null
|
||||
*/
|
||||
public static Charset toCharset(final Charset charset) {
|
||||
return charset == null ? Charset.defaultCharset() : charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Charset for the named charset. If the name is null, return the default Charset.
|
||||
*
|
||||
* @param charset
|
||||
* The name of the requested charset, may be null.
|
||||
* @return a Charset for the named charset
|
||||
* @throws java.nio.charset.UnsupportedCharsetException
|
||||
* If the named charset is unavailable
|
||||
*/
|
||||
public static Charset toCharset(final String charset) {
|
||||
return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.ISO_8859_1} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
|
||||
|
||||
/**
|
||||
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.US_ASCII} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
|
||||
* (either order accepted on input, big-endian used on output)
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16BE} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
|
||||
|
||||
/**
|
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16LE} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
|
||||
|
||||
/**
|
||||
* Eight-bit Unicode Transformation Format.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this character encoding.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_8}
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Provides the highest level of abstraction for Decoders.
|
||||
* <p>
|
||||
* This is the sister interface of {@link Encoder}. All Decoders implement this common generic interface.
|
||||
* Allows a user to pass a generic Object to any Decoder implementation in the codec package.
|
||||
* <p>
|
||||
* One of the two interfaces at the center of the codec package.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface Decoder {
|
||||
|
||||
/**
|
||||
* Decodes an "encoded" Object and returns a "decoded" Object. Note that the implementation of this interface will
|
||||
* try to cast the Object parameter to the specific type expected by a particular Decoder implementation. If a
|
||||
* {@link ClassCastException} occurs this decode method will throw a DecoderException.
|
||||
*
|
||||
* @param source
|
||||
* the object to decode
|
||||
* @return a 'decoded" object
|
||||
* @throws DecoderException
|
||||
* a decoder exception can be thrown for any number of reasons. Some good candidates are that the
|
||||
* parameter passed to this method is null, a param cannot be cast to the appropriate type for a
|
||||
* specific encoder.
|
||||
*/
|
||||
Object decode(Object source) throws DecoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Thrown when there is a failure condition during the decoding process. This exception is thrown when a {@link Decoder}
|
||||
* encounters a decoding specific exception such as invalid data, or characters outside of the expected range.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class DecoderException extends Exception {
|
||||
|
||||
/**
|
||||
* Declares the Serial Version Uid.
|
||||
*
|
||||
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a>
|
||||
*/
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may
|
||||
* subsequently be initialized by a call to {@link #initCause}.
|
||||
*
|
||||
* @since 1.4
|
||||
*/
|
||||
public DecoderException() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently
|
||||
* be initialized by a call to {@link #initCause}.
|
||||
*
|
||||
* @param message
|
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
|
||||
*/
|
||||
public DecoderException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified detail message and cause.
|
||||
* <p>
|
||||
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this
|
||||
* exception's detail message.
|
||||
*
|
||||
* @param message
|
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
|
||||
* @param cause
|
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
|
||||
* value is permitted, and indicates that the cause is nonexistent or unknown.
|
||||
* @since 1.4
|
||||
*/
|
||||
public DecoderException(final String message, final Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ?
|
||||
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>).
|
||||
* This constructor is useful for exceptions that are little more than wrappers for other throwables.
|
||||
*
|
||||
* @param cause
|
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
|
||||
* value is permitted, and indicates that the cause is nonexistent or unknown.
|
||||
* @since 1.4
|
||||
*/
|
||||
public DecoderException(final Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Provides the highest level of abstraction for Encoders.
|
||||
* <p>
|
||||
* This is the sister interface of {@link Decoder}. Every implementation of Encoder provides this
|
||||
* common generic interface which allows a user to pass a generic Object to any Encoder implementation
|
||||
* in the codec package.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface Encoder {
|
||||
|
||||
/**
|
||||
* Encodes an "Object" and returns the encoded content as an Object. The Objects here may just be
|
||||
* <code>byte[]</code> or <code>String</code>s depending on the implementation used.
|
||||
*
|
||||
* @param source
|
||||
* An object to encode
|
||||
* @return An "encoded" Object
|
||||
* @throws EncoderException
|
||||
* An encoder exception is thrown if the encoder experiences a failure condition during the encoding
|
||||
* process.
|
||||
*/
|
||||
Object encode(Object source) throws EncoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Thrown when there is a failure condition during the encoding process. This exception is thrown when an
|
||||
* {@link Encoder} encounters a encoding specific exception such as invalid data, inability to calculate a checksum,
|
||||
* characters outside of the expected range.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class EncoderException extends Exception {
|
||||
|
||||
/**
|
||||
* Declares the Serial Version Uid.
|
||||
*
|
||||
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a>
|
||||
*/
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may
|
||||
* subsequently be initialized by a call to {@link #initCause}.
|
||||
*
|
||||
* @since 1.4
|
||||
*/
|
||||
public EncoderException() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently
|
||||
* be initialized by a call to {@link #initCause}.
|
||||
*
|
||||
* @param message
|
||||
* a useful message relating to the encoder specific error.
|
||||
*/
|
||||
public EncoderException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified detail message and cause.
|
||||
*
|
||||
* <p>
|
||||
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this
|
||||
* exception's detail message.
|
||||
* </p>
|
||||
*
|
||||
* @param message
|
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
|
||||
* @param cause
|
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
|
||||
* value is permitted, and indicates that the cause is nonexistent or unknown.
|
||||
* @since 1.4
|
||||
*/
|
||||
public EncoderException(final String message, final Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ?
|
||||
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>).
|
||||
* This constructor is useful for exceptions that are little more than wrappers for other throwables.
|
||||
*
|
||||
* @param cause
|
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
|
||||
* value is permitted, and indicates that the cause is nonexistent or unknown.
|
||||
* @since 1.4
|
||||
*/
|
||||
public EncoderException(final Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Defines common decoding methods for String decoders.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface StringDecoder extends Decoder {
|
||||
|
||||
/**
|
||||
* Decodes a String and returns a String.
|
||||
*
|
||||
* @param source
|
||||
* the String to decode
|
||||
* @return the encoded String
|
||||
* @throws DecoderException
|
||||
* thrown if there is an error condition during the Encoding process.
|
||||
*/
|
||||
String decode(String source) throws DecoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
/**
|
||||
* Defines common encoding methods for String encoders.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface StringEncoder extends Encoder {
|
||||
|
||||
/**
|
||||
* Encodes a String and returns a String.
|
||||
*
|
||||
* @param source
|
||||
* the String to encode
|
||||
* @return the encoded String
|
||||
* @throws EncoderException
|
||||
* thrown if there is an error condition during the encoding process.
|
||||
*/
|
||||
String encode(String source) throws EncoderException;
|
||||
}
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* Compares Strings using a {@link StringEncoder}. This comparator is used to sort Strings by an encoding scheme such as
|
||||
* Soundex, Metaphone, etc. This class can come in handy if one need to sort Strings by an encoded form of a name such
|
||||
* as Soundex.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
// TODO ought to implement Comparator<String> but that's not possible whilst maintaining binary compatibility.
|
||||
public class StringEncoderComparator implements Comparator {
|
||||
|
||||
/**
|
||||
* Internal encoder instance.
|
||||
*/
|
||||
private final StringEncoder stringEncoder;
|
||||
|
||||
/**
|
||||
* Constructs a new instance.
|
||||
*
|
||||
* @deprecated Creating an instance without a {@link StringEncoder} leads to a {@link NullPointerException}. Will be
|
||||
* removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
public StringEncoderComparator() {
|
||||
this.stringEncoder = null; // Trying to use this will cause things to break
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new instance with the given algorithm.
|
||||
*
|
||||
* @param stringEncoder
|
||||
* the StringEncoder used for comparisons.
|
||||
*/
|
||||
public StringEncoderComparator(final StringEncoder stringEncoder) {
|
||||
this.stringEncoder = stringEncoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two strings based not on the strings themselves, but on an encoding of the two strings using the
|
||||
* StringEncoder this Comparator was created with.
|
||||
*
|
||||
* If an {@link EncoderException} is encountered, return <code>0</code>.
|
||||
*
|
||||
* @param o1
|
||||
* the object to compare
|
||||
* @param o2
|
||||
* the object to compare to
|
||||
* @return the Comparable.compareTo() return code or 0 if an encoding error was caught.
|
||||
* @see Comparable
|
||||
*/
|
||||
@Override
|
||||
public int compare(final Object o1, final Object o2) {
|
||||
|
||||
int compareCode = 0;
|
||||
|
||||
try {
|
||||
@SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable
|
||||
// However this was always the case.
|
||||
final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1);
|
||||
final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2);
|
||||
compareCode = s1.compareTo(s2);
|
||||
} catch (final EncoderException ee) {
|
||||
compareCode = 0;
|
||||
}
|
||||
return compareCode;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,539 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
/**
|
||||
* Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
|
||||
*
|
||||
* <p>
|
||||
* The class can be parameterized in the following manner with various constructors:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
|
||||
* <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of
|
||||
* 8 in the encoded data.
|
||||
* <li>Line separator: Default is CRLF ("\r\n")</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* This class operates directly on byte streams, and not character streams.
|
||||
* </p>
|
||||
* <p>
|
||||
* This class is thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
|
||||
*
|
||||
* @since 1.5
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Base32 extends BaseNCodec {
|
||||
|
||||
/**
|
||||
* BASE32 characters are 5 bits in length.
|
||||
* They are formed by taking a block of five octets to form a 40-bit string,
|
||||
* which is converted into eight BASE32 characters.
|
||||
*/
|
||||
private static final int BITS_PER_ENCODED_BYTE = 5;
|
||||
private static final int BYTES_PER_ENCODED_BLOCK = 8;
|
||||
private static final int BYTES_PER_UNENCODED_BLOCK = 5;
|
||||
|
||||
/**
|
||||
* Chunk separator per RFC 2045 section 2.1.
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
|
||||
*/
|
||||
private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
|
||||
* in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32
|
||||
* alphabet but fall within the bounds of the array are translated to -1.
|
||||
*/
|
||||
private static final byte[] DECODE_TABLE = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
|
||||
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
|
||||
};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
|
||||
* equivalents as specified in Table 3 of RFC 4648.
|
||||
*/
|
||||
private static final byte[] ENCODE_TABLE = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'2', '3', '4', '5', '6', '7',
|
||||
};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as
|
||||
* specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the
|
||||
* Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
|
||||
*/
|
||||
private static final byte[] HEX_DECODE_TABLE = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
|
||||
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
|
||||
25, 26, 27, 28, 29, 30, 31, // 50-57 P-V
|
||||
};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates 5-bit positive integer index values into their
|
||||
* "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648.
|
||||
*/
|
||||
private static final byte[] HEX_ENCODE_TABLE = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
|
||||
};
|
||||
|
||||
/** Mask used to extract 5 bits, used when encoding Base32 bytes */
|
||||
private static final int MASK_5BITS = 0x1f;
|
||||
|
||||
// The static final fields above are used for the original static byte[] methods on Base32.
|
||||
// The private member fields below are used with the new streaming approach, which requires
|
||||
// some state be preserved between calls of encode() and decode().
|
||||
|
||||
/**
|
||||
* Place holder for the bytes we're dealing with for our based logic.
|
||||
* Bitwise operations store and extract the encoding or decoding from this variable.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
|
||||
* <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code>
|
||||
*/
|
||||
private final int decodeSize;
|
||||
|
||||
/**
|
||||
* Decode table to use.
|
||||
*/
|
||||
private final byte[] decodeTable;
|
||||
|
||||
/**
|
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
|
||||
* <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code>
|
||||
*/
|
||||
private final int encodeSize;
|
||||
|
||||
/**
|
||||
* Encode table to use.
|
||||
*/
|
||||
private final byte[] encodeTable;
|
||||
|
||||
/**
|
||||
* Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
|
||||
*/
|
||||
private final byte[] lineSeparator;
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length is 0 (no chunking).
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
public Base32() {
|
||||
this(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length is 0 (no chunking).
|
||||
* </p>
|
||||
* @param pad byte used as padding byte.
|
||||
*/
|
||||
public Base32(final byte pad) {
|
||||
this(false, pad);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length is 0 (no chunking).
|
||||
* </p>
|
||||
* @param useHex if {@code true} then use Base32 Hex alphabet
|
||||
*/
|
||||
public Base32(final boolean useHex) {
|
||||
this(0, null, useHex, PAD_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length is 0 (no chunking).
|
||||
* </p>
|
||||
* @param useHex if {@code true} then use Base32 Hex alphabet
|
||||
* @param pad byte used as padding byte.
|
||||
*/
|
||||
public Base32(final boolean useHex, final byte pad) {
|
||||
this(0, null, useHex, pad);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length is given in the constructor, the line separator is CRLF.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
*/
|
||||
public Base32(final int lineLength) {
|
||||
this(lineLength, CHUNK_SEPARATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length and line separator are given in the constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @param lineSeparator
|
||||
* Each line of encoded data will end with this sequence of bytes.
|
||||
* @throws IllegalArgumentException
|
||||
* The provided lineSeparator included some Base32 characters. That's not going to work!
|
||||
*/
|
||||
public Base32(final int lineLength, final byte[] lineSeparator) {
|
||||
this(lineLength, lineSeparator, false, PAD_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length and line separator are given in the constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @param lineSeparator
|
||||
* Each line of encoded data will end with this sequence of bytes.
|
||||
* @param useHex
|
||||
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
|
||||
* @throws IllegalArgumentException
|
||||
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
|
||||
* lineLength > 0 and lineSeparator is null.
|
||||
*/
|
||||
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
|
||||
this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
|
||||
* <p>
|
||||
* When encoding the line length and line separator are given in the constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @param lineSeparator
|
||||
* Each line of encoded data will end with this sequence of bytes.
|
||||
* @param useHex
|
||||
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
|
||||
* @param pad byte used as padding byte.
|
||||
* @throws IllegalArgumentException
|
||||
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
|
||||
* lineLength > 0 and lineSeparator is null.
|
||||
*/
|
||||
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte pad) {
|
||||
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength,
|
||||
lineSeparator == null ? 0 : lineSeparator.length, pad);
|
||||
if (useHex) {
|
||||
this.encodeTable = HEX_ENCODE_TABLE;
|
||||
this.decodeTable = HEX_DECODE_TABLE;
|
||||
} else {
|
||||
this.encodeTable = ENCODE_TABLE;
|
||||
this.decodeTable = DECODE_TABLE;
|
||||
}
|
||||
if (lineLength > 0) {
|
||||
if (lineSeparator == null) {
|
||||
throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
|
||||
}
|
||||
// Must be done after initializing the tables
|
||||
if (containsAlphabetOrPad(lineSeparator)) {
|
||||
final String sep = StringUtils.newStringUtf8(lineSeparator);
|
||||
throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
|
||||
}
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
|
||||
this.lineSeparator = new byte[lineSeparator.length];
|
||||
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
|
||||
} else {
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
|
||||
this.lineSeparator = null;
|
||||
}
|
||||
this.decodeSize = this.encodeSize - 1;
|
||||
|
||||
if (isInAlphabet(pad) || isWhiteSpace(pad)) {
|
||||
throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
|
||||
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
|
||||
* call is not necessary when decoding, but it doesn't hurt, either.
|
||||
* </p>
|
||||
* <p>
|
||||
* Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
|
||||
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
|
||||
* garbage-out philosophy: it will not check the provided data for validity.
|
||||
* </p>
|
||||
*
|
||||
* @param in
|
||||
* byte[] array of ascii data to Base32 decode.
|
||||
* @param inPos
|
||||
* Position to start reading data from.
|
||||
* @param inAvail
|
||||
* Amount of bytes available from input for encoding.
|
||||
* @param context the context to be used
|
||||
*
|
||||
* Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position
|
||||
*/
|
||||
@Override
|
||||
void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
|
||||
// package protected for access from I/O streams
|
||||
|
||||
if (context.eof) {
|
||||
return;
|
||||
}
|
||||
if (inAvail < 0) {
|
||||
context.eof = true;
|
||||
}
|
||||
for (int i = 0; i < inAvail; i++) {
|
||||
final byte b = in[inPos++];
|
||||
if (b == pad) {
|
||||
// We're done.
|
||||
context.eof = true;
|
||||
break;
|
||||
} else {
|
||||
final byte[] buffer = ensureBufferSize(decodeSize, context);
|
||||
if (b >= 0 && b < this.decodeTable.length) {
|
||||
final int result = this.decodeTable[b];
|
||||
if (result >= 0) {
|
||||
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
|
||||
// collect decoded bytes
|
||||
context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
|
||||
if (context.modulus == 0) { // we can output the 5 bytes
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Two forms of EOF as far as Base32 decoder is concerned: actual
|
||||
// EOF (-1) and first time '=' character is encountered in stream.
|
||||
// This approach makes the '=' padding characters completely optional.
|
||||
if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
|
||||
final byte[] buffer = ensureBufferSize(decodeSize, context);
|
||||
|
||||
// we ignore partial bytes, i.e. only multiples of 8 count
|
||||
switch (context.modulus) {
|
||||
case 2 : // 10 bits, drop 2 and output one byte
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS);
|
||||
break;
|
||||
case 3 : // 15 bits, drop 7 and output 1 byte
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS);
|
||||
break;
|
||||
case 4 : // 20 bits = 2*8 + 4
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
case 5 : // 25bits = 3*8 + 1
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 1;
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
case 6 : // 30bits = 3*8 + 6
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 6;
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
case 7 : // 35 = 4*8 +3
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 3;
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
default:
|
||||
// modulus can be 0-7, and we excluded 0,1 already
|
||||
throw new IllegalStateException("Impossible modulus "+context.modulus);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
|
||||
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last
|
||||
* remaining bytes (if not multiple of 5).
|
||||
* </p>
|
||||
*
|
||||
* @param in
|
||||
* byte[] array of binary data to Base32 encode.
|
||||
* @param inPos
|
||||
* Position to start reading data from.
|
||||
* @param inAvail
|
||||
* Amount of bytes available from input for encoding.
|
||||
* @param context the context to be used
|
||||
*/
|
||||
@Override
|
||||
void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
|
||||
// package protected for access from I/O streams
|
||||
|
||||
if (context.eof) {
|
||||
return;
|
||||
}
|
||||
// inAvail < 0 is how we're informed of EOF in the underlying data we're
|
||||
// encoding.
|
||||
if (inAvail < 0) {
|
||||
context.eof = true;
|
||||
if (0 == context.modulus && lineLength == 0) {
|
||||
return; // no leftovers to process and not using chunking
|
||||
}
|
||||
final byte[] buffer = ensureBufferSize(encodeSize, context);
|
||||
final int savedPos = context.pos;
|
||||
switch (context.modulus) { // % 5
|
||||
case 0 :
|
||||
break;
|
||||
case 1 : // Only 1 octet; take top 5 bits then remainder
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
break;
|
||||
case 2 : // 2 octets = 16 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
break;
|
||||
case 3 : // 3 octets = 24 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
break;
|
||||
case 4 : // 4 octets = 32 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
|
||||
buffer[context.pos++] = pad;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Impossible modulus "+context.modulus);
|
||||
}
|
||||
context.currentLinePos += context.pos - savedPos; // keep track of current line position
|
||||
// if currentPos == 0 we are at the start of a line, so don't add CRLF
|
||||
if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required
|
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
|
||||
context.pos += lineSeparator.length;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < inAvail; i++) {
|
||||
final byte[] buffer = ensureBufferSize(encodeSize, context);
|
||||
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
|
||||
int b = in[inPos++];
|
||||
if (b < 0) {
|
||||
b += 256;
|
||||
}
|
||||
context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
|
||||
if (0 == context.modulus) { // we have enough bytes to create our output
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS];
|
||||
buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS];
|
||||
context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
|
||||
if (lineLength > 0 && lineLength <= context.currentLinePos) {
|
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
|
||||
context.pos += lineSeparator.length;
|
||||
context.currentLinePos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether or not the {@code octet} is in the Base32 alphabet.
|
||||
*
|
||||
* @param octet
|
||||
* The value to test
|
||||
* @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean isInAlphabet(final byte octet) {
|
||||
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
|
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
|
||||
* constructor.
|
||||
* <p>
|
||||
* The default behaviour of the Base32InputStream is to DECODE, whereas the default behaviour of the Base32OutputStream
|
||||
* is to ENCODE, but this behaviour can be overridden by using a different constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
|
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
|
||||
* @since 1.5
|
||||
*/
|
||||
public class Base32InputStream extends BaseNCodecInputStream {
|
||||
|
||||
/**
|
||||
* Creates a Base32InputStream such that all data read is Base32-decoded from the original provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
*/
|
||||
public Base32InputStream(final InputStream in) {
|
||||
this(in, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
|
||||
* provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data read from us, false if we should decode.
|
||||
*/
|
||||
public Base32InputStream(final InputStream in, final boolean doEncode) {
|
||||
super(in, new Base32(false), doEncode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
|
||||
* provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data read from us, false if we should decode.
|
||||
* @param lineLength
|
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
|
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
|
||||
* is false, lineLength is ignored.
|
||||
* @param lineSeparator
|
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
|
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
|
||||
*/
|
||||
public Base32InputStream(final InputStream in, final boolean doEncode,
|
||||
final int lineLength, final byte[] lineSeparator) {
|
||||
super(in, new Base32(lineLength, lineSeparator), doEncode);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
|
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
|
||||
* constructor.
|
||||
* <p>
|
||||
* The default behaviour of the Base32OutputStream is to ENCODE, whereas the default behaviour of the Base32InputStream
|
||||
* is to DECODE. But this behaviour can be overridden by using a different constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
|
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the
|
||||
* final padding will be omitted and the resulting data will be incomplete/inconsistent.
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
|
||||
* @since 1.5
|
||||
*/
|
||||
public class Base32OutputStream extends BaseNCodecOutputStream {
|
||||
|
||||
/**
|
||||
* Creates a Base32OutputStream such that all data written is Base32-encoded to the original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
*/
|
||||
public Base32OutputStream(final OutputStream out) {
|
||||
this(out, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
|
||||
* original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data written to us, false if we should decode.
|
||||
*/
|
||||
public Base32OutputStream(final OutputStream out, final boolean doEncode) {
|
||||
super(out, new Base32(false), doEncode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
|
||||
* original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data written to us, false if we should decode.
|
||||
* @param lineLength
|
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
|
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
|
||||
* is false, lineLength is ignored.
|
||||
* @param lineSeparator
|
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
|
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
|
||||
*/
|
||||
public Base32OutputStream(final OutputStream out, final boolean doEncode,
|
||||
final int lineLength, final byte[] lineSeparator) {
|
||||
super(out, new Base32(lineLength, lineSeparator), doEncode);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,786 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
/**
|
||||
* Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
|
||||
*
|
||||
* <p>
|
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
|
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
|
||||
* </p>
|
||||
* <p>
|
||||
* The class can be parameterized in the following manner with various constructors:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>URL-safe mode: Default off.</li>
|
||||
* <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
|
||||
* 4 in the encoded data.
|
||||
* <li>Line separator: Default is CRLF ("\r\n")</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
|
||||
* </p>
|
||||
* <p>
|
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
|
||||
* encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
|
||||
* UTF-8, etc).
|
||||
* </p>
|
||||
* <p>
|
||||
* This class is thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
|
||||
* @since 1.0
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Base64 extends BaseNCodec {
|
||||
|
||||
/**
|
||||
* BASE32 characters are 6 bits in length.
|
||||
* They are formed by taking a block of 3 octets to form a 24-bit string,
|
||||
* which is converted into 4 BASE64 characters.
|
||||
*/
|
||||
private static final int BITS_PER_ENCODED_BYTE = 6;
|
||||
private static final int BYTES_PER_UNENCODED_BLOCK = 3;
|
||||
private static final int BYTES_PER_ENCODED_BLOCK = 4;
|
||||
|
||||
/**
|
||||
* Chunk separator per RFC 2045 section 2.1.
|
||||
*
|
||||
* <p>
|
||||
* N.B. The next major release may break compatibility and make this field private.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
|
||||
*/
|
||||
static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
|
||||
* equivalents as specified in Table 1 of RFC 2045.
|
||||
*
|
||||
* Thanks to "commons" project in ws.apache.org for this code.
|
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
*/
|
||||
private static final byte[] STANDARD_ENCODE_TABLE = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
|
||||
* changed to - and _ to make the encoded Base64 results more URL-SAFE.
|
||||
* This table is only used when the Base64's mode is set to URL-SAFE.
|
||||
*/
|
||||
private static final byte[] URL_SAFE_ENCODE_TABLE = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
|
||||
};
|
||||
|
||||
/**
|
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
|
||||
* in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
|
||||
* alphabet but fall within the bounds of the array are translated to -1.
|
||||
*
|
||||
* Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
|
||||
* URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
|
||||
*
|
||||
* Thanks to "commons" project in ws.apache.org for this code.
|
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
*/
|
||||
private static final byte[] DECODE_TABLE = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
|
||||
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
|
||||
};
|
||||
|
||||
/**
|
||||
* Base64 uses 6-bit fields.
|
||||
*/
|
||||
/** Mask used to extract 6 bits, used when encoding */
|
||||
private static final int MASK_6BITS = 0x3f;
|
||||
|
||||
// The static final fields above are used for the original static byte[] methods on Base64.
|
||||
// The private member fields below are used with the new streaming approach, which requires
|
||||
// some state be preserved between calls of encode() and decode().
|
||||
|
||||
/**
|
||||
* Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
|
||||
* to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
|
||||
* between the two modes.
|
||||
*/
|
||||
private final byte[] encodeTable;
|
||||
|
||||
// Only one decode table currently; keep for consistency with Base32 code
|
||||
private final byte[] decodeTable = DECODE_TABLE;
|
||||
|
||||
/**
|
||||
* Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
|
||||
*/
|
||||
private final byte[] lineSeparator;
|
||||
|
||||
/**
|
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
|
||||
* <code>decodeSize = 3 + lineSeparator.length;</code>
|
||||
*/
|
||||
private final int decodeSize;
|
||||
|
||||
/**
|
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
|
||||
* <code>encodeSize = 4 + lineSeparator.length;</code>
|
||||
*/
|
||||
private final int encodeSize;
|
||||
|
||||
/**
|
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
|
||||
* <p>
|
||||
* When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* When decoding all variants are supported.
|
||||
* </p>
|
||||
*/
|
||||
public Base64() {
|
||||
this(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
|
||||
* <p>
|
||||
* When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* When decoding all variants are supported.
|
||||
* </p>
|
||||
*
|
||||
* @param urlSafe
|
||||
* if <code>true</code>, URL-safe encoding is used. In most cases this should be set to
|
||||
* <code>false</code>.
|
||||
* @since 1.4
|
||||
*/
|
||||
public Base64(final boolean urlSafe) {
|
||||
this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
|
||||
* <p>
|
||||
* When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
|
||||
* STANDARD_ENCODE_TABLE.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
|
||||
* </p>
|
||||
* <p>
|
||||
* When decoding all variants are supported.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @since 1.4
|
||||
*/
|
||||
public Base64(final int lineLength) {
|
||||
this(lineLength, CHUNK_SEPARATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
|
||||
* <p>
|
||||
* When encoding the line length and line separator are given in the constructor, and the encoding table is
|
||||
* STANDARD_ENCODE_TABLE.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
|
||||
* </p>
|
||||
* <p>
|
||||
* When decoding all variants are supported.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @param lineSeparator
|
||||
* Each line of encoded data will end with this sequence of bytes.
|
||||
* @throws IllegalArgumentException
|
||||
* Thrown when the provided lineSeparator included some base64 characters.
|
||||
* @since 1.4
|
||||
*/
|
||||
public Base64(final int lineLength, final byte[] lineSeparator) {
|
||||
this(lineLength, lineSeparator, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
|
||||
* <p>
|
||||
* When encoding the line length and line separator are given in the constructor, and the encoding table is
|
||||
* STANDARD_ENCODE_TABLE.
|
||||
* </p>
|
||||
* <p>
|
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
|
||||
* </p>
|
||||
* <p>
|
||||
* When decoding all variants are supported.
|
||||
* </p>
|
||||
*
|
||||
* @param lineLength
|
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
|
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
|
||||
* decoding.
|
||||
* @param lineSeparator
|
||||
* Each line of encoded data will end with this sequence of bytes.
|
||||
* @param urlSafe
|
||||
* Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
|
||||
* operations. Decoding seamlessly handles both modes.
|
||||
* <b>Note: no padding is added when using the URL-safe alphabet.</b>
|
||||
* @throws IllegalArgumentException
|
||||
* The provided lineSeparator included some base64 characters. That's not going to work!
|
||||
* @since 1.4
|
||||
*/
|
||||
public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
|
||||
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
|
||||
lineLength,
|
||||
lineSeparator == null ? 0 : lineSeparator.length);
|
||||
// TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
|
||||
// @see test case Base64Test.testConstructors()
|
||||
if (lineSeparator != null) {
|
||||
if (containsAlphabetOrPad(lineSeparator)) {
|
||||
final String sep = StringUtils.newStringUtf8(lineSeparator);
|
||||
throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
|
||||
}
|
||||
if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
|
||||
this.lineSeparator = new byte[lineSeparator.length];
|
||||
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
|
||||
} else {
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
|
||||
this.lineSeparator = null;
|
||||
}
|
||||
} else {
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
|
||||
this.lineSeparator = null;
|
||||
}
|
||||
this.decodeSize = this.encodeSize - 1;
|
||||
this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns our current encode mode. True if we're URL-SAFE, false otherwise.
|
||||
*
|
||||
* @return true if we're in URL-SAFE mode, false otherwise.
|
||||
* @since 1.4
|
||||
*/
|
||||
public boolean isUrlSafe() {
|
||||
return this.encodeTable == URL_SAFE_ENCODE_TABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
|
||||
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
|
||||
* remaining bytes (if not multiple of 3).
|
||||
* </p>
|
||||
* <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
|
||||
* <p>
|
||||
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
|
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
* </p>
|
||||
*
|
||||
* @param in
|
||||
* byte[] array of binary data to base64 encode.
|
||||
* @param inPos
|
||||
* Position to start reading data from.
|
||||
* @param inAvail
|
||||
* Amount of bytes available from input for encoding.
|
||||
* @param context
|
||||
* the context to be used
|
||||
*/
|
||||
@Override
|
||||
void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
|
||||
if (context.eof) {
|
||||
return;
|
||||
}
|
||||
// inAvail < 0 is how we're informed of EOF in the underlying data we're
|
||||
// encoding.
|
||||
if (inAvail < 0) {
|
||||
context.eof = true;
|
||||
if (0 == context.modulus && lineLength == 0) {
|
||||
return; // no leftovers to process and not using chunking
|
||||
}
|
||||
final byte[] buffer = ensureBufferSize(encodeSize, context);
|
||||
final int savedPos = context.pos;
|
||||
switch (context.modulus) { // 0-2
|
||||
case 0 : // nothing to do here
|
||||
break;
|
||||
case 1 : // 8 bits = 6 + 2
|
||||
// top 6 bits:
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS];
|
||||
// remaining 2:
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS];
|
||||
// URL-SAFE skips the padding to further reduce size.
|
||||
if (encodeTable == STANDARD_ENCODE_TABLE) {
|
||||
buffer[context.pos++] = pad;
|
||||
buffer[context.pos++] = pad;
|
||||
}
|
||||
break;
|
||||
|
||||
case 2 : // 16 bits = 6 + 6 + 4
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
|
||||
// URL-SAFE skips the padding to further reduce size.
|
||||
if (encodeTable == STANDARD_ENCODE_TABLE) {
|
||||
buffer[context.pos++] = pad;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Impossible modulus "+context.modulus);
|
||||
}
|
||||
context.currentLinePos += context.pos - savedPos; // keep track of current line position
|
||||
// if currentPos == 0 we are at the start of a line, so don't add CRLF
|
||||
if (lineLength > 0 && context.currentLinePos > 0) {
|
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
|
||||
context.pos += lineSeparator.length;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < inAvail; i++) {
|
||||
final byte[] buffer = ensureBufferSize(encodeSize, context);
|
||||
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
|
||||
int b = in[inPos++];
|
||||
if (b < 0) {
|
||||
b += 256;
|
||||
}
|
||||
context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
|
||||
if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
|
||||
buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
|
||||
context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
|
||||
if (lineLength > 0 && lineLength <= context.currentLinePos) {
|
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
|
||||
context.pos += lineSeparator.length;
|
||||
context.currentLinePos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
|
||||
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
|
||||
* call is not necessary when decoding, but it doesn't hurt, either.
|
||||
* </p>
|
||||
* <p>
|
||||
* Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
|
||||
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
|
||||
* garbage-out philosophy: it will not check the provided data for validity.
|
||||
* </p>
|
||||
* <p>
|
||||
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
|
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
* </p>
|
||||
*
|
||||
* @param in
|
||||
* byte[] array of ascii data to base64 decode.
|
||||
* @param inPos
|
||||
* Position to start reading data from.
|
||||
* @param inAvail
|
||||
* Amount of bytes available from input for encoding.
|
||||
* @param context
|
||||
* the context to be used
|
||||
*/
|
||||
@Override
|
||||
void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
|
||||
if (context.eof) {
|
||||
return;
|
||||
}
|
||||
if (inAvail < 0) {
|
||||
context.eof = true;
|
||||
}
|
||||
for (int i = 0; i < inAvail; i++) {
|
||||
final byte[] buffer = ensureBufferSize(decodeSize, context);
|
||||
final byte b = in[inPos++];
|
||||
if (b == pad) {
|
||||
// We're done.
|
||||
context.eof = true;
|
||||
break;
|
||||
} else {
|
||||
if (b >= 0 && b < DECODE_TABLE.length) {
|
||||
final int result = DECODE_TABLE[b];
|
||||
if (result >= 0) {
|
||||
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
|
||||
context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
|
||||
if (context.modulus == 0) {
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Two forms of EOF as far as base64 decoder is concerned: actual
|
||||
// EOF (-1) and first time '=' character is encountered in stream.
|
||||
// This approach makes the '=' padding characters completely optional.
|
||||
if (context.eof && context.modulus != 0) {
|
||||
final byte[] buffer = ensureBufferSize(decodeSize, context);
|
||||
|
||||
// We have some spare bits remaining
|
||||
// Output all whole multiples of 8 bits and ignore the rest
|
||||
switch (context.modulus) {
|
||||
// case 0 : // impossible, as excluded above
|
||||
case 1 : // 6 bits - ignore entirely
|
||||
// TODO not currently tested; perhaps it is impossible?
|
||||
break;
|
||||
case 2 : // 12 bits = 8 + 4
|
||||
context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
case 3 : // 18 bits = 8 + 8 + 2
|
||||
context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Impossible modulus "+context.modulus);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
|
||||
* method treats whitespace as valid.
|
||||
*
|
||||
* @param arrayOctet
|
||||
* byte array to test
|
||||
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
|
||||
* <code>false</code>, otherwise
|
||||
* @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
public static boolean isArrayByteBase64(final byte[] arrayOctet) {
|
||||
return isBase64(arrayOctet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether or not the <code>octet</code> is in the base 64 alphabet.
|
||||
*
|
||||
* @param octet
|
||||
* The value to test
|
||||
* @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
|
||||
* @since 1.4
|
||||
*/
|
||||
public static boolean isBase64(final byte octet) {
|
||||
return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
|
||||
* method treats whitespace as valid.
|
||||
*
|
||||
* @param base64
|
||||
* String to test
|
||||
* @return <code>true</code> if all characters in the String are valid characters in the Base64 alphabet or if
|
||||
* the String is empty; <code>false</code>, otherwise
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isBase64(final String base64) {
|
||||
return isBase64(StringUtils.getBytesUtf8(base64));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
|
||||
* method treats whitespace as valid.
|
||||
*
|
||||
* @param arrayOctet
|
||||
* byte array to test
|
||||
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
|
||||
* <code>false</code>, otherwise
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isBase64(final byte[] arrayOctet) {
|
||||
for (int i = 0; i < arrayOctet.length; i++) {
|
||||
if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm but does not chunk the output.
|
||||
*
|
||||
* @param binaryData
|
||||
* binary data to encode
|
||||
* @return byte[] containing Base64 characters in their UTF-8 representation.
|
||||
*/
|
||||
public static byte[] encodeBase64(final byte[] binaryData) {
|
||||
return encodeBase64(binaryData, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm but does not chunk the output.
|
||||
*
|
||||
* NOTE: We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to
|
||||
* single-line non-chunking (commons-codec-1.5).
|
||||
*
|
||||
* @param binaryData
|
||||
* binary data to encode
|
||||
* @return String containing Base64 characters.
|
||||
* @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
|
||||
*/
|
||||
public static String encodeBase64String(final byte[] binaryData) {
|
||||
return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
|
||||
* url-safe variation emits - and _ instead of + and / characters.
|
||||
* <b>Note: no padding is added.</b>
|
||||
* @param binaryData
|
||||
* binary data to encode
|
||||
* @return byte[] containing Base64 characters in their UTF-8 representation.
|
||||
* @since 1.4
|
||||
*/
|
||||
public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
|
||||
return encodeBase64(binaryData, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
|
||||
* url-safe variation emits - and _ instead of + and / characters.
|
||||
* <b>Note: no padding is added.</b>
|
||||
* @param binaryData
|
||||
* binary data to encode
|
||||
* @return String containing Base64 characters
|
||||
* @since 1.4
|
||||
*/
|
||||
public static String encodeBase64URLSafeString(final byte[] binaryData) {
|
||||
return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
|
||||
*
|
||||
* @param binaryData
|
||||
* binary data to encode
|
||||
* @return Base64 characters chunked in 76 character blocks
|
||||
*/
|
||||
public static byte[] encodeBase64Chunked(final byte[] binaryData) {
|
||||
return encodeBase64(binaryData, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
|
||||
*
|
||||
* @param binaryData
|
||||
* Array containing binary data to encode.
|
||||
* @param isChunked
|
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
|
||||
* @return Base64-encoded data.
|
||||
* @throws IllegalArgumentException
|
||||
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
|
||||
*/
|
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
|
||||
return encodeBase64(binaryData, isChunked, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
|
||||
*
|
||||
* @param binaryData
|
||||
* Array containing binary data to encode.
|
||||
* @param isChunked
|
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
|
||||
* @param urlSafe
|
||||
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
|
||||
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
|
||||
* @return Base64-encoded data.
|
||||
* @throws IllegalArgumentException
|
||||
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
|
||||
* @since 1.4
|
||||
*/
|
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
|
||||
return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
|
||||
*
|
||||
* @param binaryData
|
||||
* Array containing binary data to encode.
|
||||
* @param isChunked
|
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
|
||||
* @param urlSafe
|
||||
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
|
||||
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
|
||||
* @param maxResultSize
|
||||
* The maximum result size to accept.
|
||||
* @return Base64-encoded data.
|
||||
* @throws IllegalArgumentException
|
||||
* Thrown when the input array needs an output array bigger than maxResultSize
|
||||
* @since 1.4
|
||||
*/
|
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
|
||||
final boolean urlSafe, final int maxResultSize) {
|
||||
if (binaryData == null || binaryData.length == 0) {
|
||||
return binaryData;
|
||||
}
|
||||
|
||||
// Create this so can use the super-class method
|
||||
// Also ensures that the same roundings are performed by the ctor and the code
|
||||
final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
|
||||
final long len = b64.getEncodedLength(binaryData);
|
||||
if (len > maxResultSize) {
|
||||
throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
|
||||
len +
|
||||
") than the specified maximum size of " +
|
||||
maxResultSize);
|
||||
}
|
||||
|
||||
return b64.encode(binaryData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a Base64 String into octets.
|
||||
* <p>
|
||||
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
|
||||
* </p>
|
||||
*
|
||||
* @param base64String
|
||||
* String containing Base64 data
|
||||
* @return Array containing decoded data.
|
||||
* @since 1.4
|
||||
*/
|
||||
public static byte[] decodeBase64(final String base64String) {
|
||||
return new Base64().decode(base64String);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes Base64 data into octets.
|
||||
* <p>
|
||||
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
|
||||
* </p>
|
||||
*
|
||||
* @param base64Data
|
||||
* Byte array containing Base64 data
|
||||
* @return Array containing decoded data.
|
||||
*/
|
||||
public static byte[] decodeBase64(final byte[] base64Data) {
|
||||
return new Base64().decode(base64Data);
|
||||
}
|
||||
|
||||
// Implementation of the Encoder Interface
|
||||
|
||||
// Implementation of integer encoding used for crypto
|
||||
/**
|
||||
* Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
|
||||
*
|
||||
* @param pArray
|
||||
* a byte array containing base64 character data
|
||||
* @return A BigInteger
|
||||
* @since 1.4
|
||||
*/
|
||||
public static BigInteger decodeInteger(final byte[] pArray) {
|
||||
return new BigInteger(1, decodeBase64(pArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
|
||||
*
|
||||
* @param bigInt
|
||||
* a BigInteger
|
||||
* @return A byte array containing base64 character data
|
||||
* @throws NullPointerException
|
||||
* if null is passed in
|
||||
* @since 1.4
|
||||
*/
|
||||
public static byte[] encodeInteger(final BigInteger bigInt) {
|
||||
if (bigInt == null) {
|
||||
throw new NullPointerException("encodeInteger called with null parameter");
|
||||
}
|
||||
return encodeBase64(toIntegerBytes(bigInt), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a byte-array representation of a <code>BigInteger</code> without sign bit.
|
||||
*
|
||||
* @param bigInt
|
||||
* <code>BigInteger</code> to be converted
|
||||
* @return a byte array representation of the BigInteger parameter
|
||||
*/
|
||||
static byte[] toIntegerBytes(final BigInteger bigInt) {
|
||||
int bitlen = bigInt.bitLength();
|
||||
// round bitlen
|
||||
bitlen = ((bitlen + 7) >> 3) << 3;
|
||||
final byte[] bigBytes = bigInt.toByteArray();
|
||||
|
||||
if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
|
||||
return bigBytes;
|
||||
}
|
||||
// set up params for copying everything but sign bit
|
||||
int startSrc = 0;
|
||||
int len = bigBytes.length;
|
||||
|
||||
// if bigInt is exactly byte-aligned, just skip signbit in copy
|
||||
if ((bigInt.bitLength() % 8) == 0) {
|
||||
startSrc = 1;
|
||||
len--;
|
||||
}
|
||||
final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
|
||||
final byte[] resizedBytes = new byte[bitlen / 8];
|
||||
System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
|
||||
return resizedBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether or not the <code>octet</code> is in the Base64 alphabet.
|
||||
*
|
||||
* @param octet
|
||||
* The value to test
|
||||
* @return <code>true</code> if the value is defined in the the Base64 alphabet <code>false</code> otherwise.
|
||||
*/
|
||||
@Override
|
||||
protected boolean isInAlphabet(final byte octet) {
|
||||
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
|
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
|
||||
* constructor.
|
||||
* <p>
|
||||
* The default behaviour of the Base64InputStream is to DECODE, whereas the default behaviour of the Base64OutputStream
|
||||
* is to ENCODE, but this behaviour can be overridden by using a different constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
|
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
|
||||
* </p>
|
||||
* <p>
|
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
|
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
|
||||
* @since 1.4
|
||||
*/
|
||||
public class Base64InputStream extends BaseNCodecInputStream {
|
||||
|
||||
/**
|
||||
* Creates a Base64InputStream such that all data read is Base64-decoded from the original provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
*/
|
||||
public Base64InputStream(final InputStream in) {
|
||||
this(in, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
|
||||
* provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data read from us, false if we should decode.
|
||||
*/
|
||||
public Base64InputStream(final InputStream in, final boolean doEncode) {
|
||||
super(in, new Base64(false), doEncode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
|
||||
* provided InputStream.
|
||||
*
|
||||
* @param in
|
||||
* InputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data read from us, false if we should decode.
|
||||
* @param lineLength
|
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
|
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
|
||||
* is false, lineLength is ignored.
|
||||
* @param lineSeparator
|
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
|
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
|
||||
*/
|
||||
public Base64InputStream(final InputStream in, final boolean doEncode,
|
||||
final int lineLength, final byte[] lineSeparator) {
|
||||
super(in, new Base64(lineLength, lineSeparator), doEncode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
|
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
|
||||
* constructor.
|
||||
* <p>
|
||||
* The default behaviour of the Base64OutputStream is to ENCODE, whereas the default behaviour of the Base64InputStream
|
||||
* is to DECODE. But this behaviour can be overridden by using a different constructor.
|
||||
* </p>
|
||||
* <p>
|
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
|
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
|
||||
* </p>
|
||||
* <p>
|
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
|
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the
|
||||
* final padding will be omitted and the resulting data will be incomplete/inconsistent.
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
|
||||
* @since 1.4
|
||||
*/
|
||||
public class Base64OutputStream extends BaseNCodecOutputStream {
|
||||
|
||||
/**
|
||||
* Creates a Base64OutputStream such that all data written is Base64-encoded to the original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
*/
|
||||
public Base64OutputStream(final OutputStream out) {
|
||||
this(out, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
|
||||
* original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data written to us, false if we should decode.
|
||||
*/
|
||||
public Base64OutputStream(final OutputStream out, final boolean doEncode) {
|
||||
super(out,new Base64(false), doEncode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
|
||||
* original provided OutputStream.
|
||||
*
|
||||
* @param out
|
||||
* OutputStream to wrap.
|
||||
* @param doEncode
|
||||
* true if we should encode all data written to us, false if we should decode.
|
||||
* @param lineLength
|
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
|
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
|
||||
* is false, lineLength is ignored.
|
||||
* @param lineSeparator
|
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
|
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
|
||||
*/
|
||||
public Base64OutputStream(final OutputStream out, final boolean doEncode,
|
||||
final int lineLength, final byte[] lineSeparator) {
|
||||
super(out, new Base64(lineLength, lineSeparator), doEncode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,525 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.commons.codec.BinaryDecoder;
|
||||
import org.apache.commons.codec.BinaryEncoder;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
|
||||
/**
|
||||
* Abstract superclass for Base-N encoders and decoders.
|
||||
*
|
||||
* <p>
|
||||
* This class is thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
|
||||
|
||||
/**
|
||||
* Holds thread context so classes can be thread-safe.
|
||||
*
|
||||
* This class is not itself thread-safe; each thread must allocate its own copy.
|
||||
*
|
||||
* @since 1.7
|
||||
*/
|
||||
static class Context {
|
||||
|
||||
/**
|
||||
* Place holder for the bytes we're dealing with for our based logic.
|
||||
* Bitwise operations store and extract the encoding or decoding from this variable.
|
||||
*/
|
||||
int ibitWorkArea;
|
||||
|
||||
/**
|
||||
* Place holder for the bytes we're dealing with for our based logic.
|
||||
* Bitwise operations store and extract the encoding or decoding from this variable.
|
||||
*/
|
||||
long lbitWorkArea;
|
||||
|
||||
/**
|
||||
* Buffer for streaming.
|
||||
*/
|
||||
byte[] buffer;
|
||||
|
||||
/**
|
||||
* Position where next character should be written in the buffer.
|
||||
*/
|
||||
int pos;
|
||||
|
||||
/**
|
||||
* Position where next character should be read from the buffer.
|
||||
*/
|
||||
int readPos;
|
||||
|
||||
/**
|
||||
* Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
|
||||
* and must be thrown away.
|
||||
*/
|
||||
boolean eof;
|
||||
|
||||
/**
|
||||
* Variable tracks how many characters have been written to the current line. Only used when encoding. We use
|
||||
* it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
|
||||
*/
|
||||
int currentLinePos;
|
||||
|
||||
/**
|
||||
* Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
|
||||
* variable helps track that.
|
||||
*/
|
||||
int modulus;
|
||||
|
||||
Context() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a String useful for debugging (especially within a debugger.)
|
||||
*
|
||||
* @return a String useful for debugging.
|
||||
*/
|
||||
@SuppressWarnings("boxing") // OK to ignore boxing here
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
|
||||
"modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
|
||||
currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* EOF
|
||||
*
|
||||
* @since 1.7
|
||||
*/
|
||||
static final int EOF = -1;
|
||||
|
||||
/**
|
||||
* MIME chunk size per RFC 2045 section 6.8.
|
||||
*
|
||||
* <p>
|
||||
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
|
||||
* equal signs.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
|
||||
*/
|
||||
public static final int MIME_CHUNK_SIZE = 76;
|
||||
|
||||
/**
|
||||
* PEM chunk size per RFC 1421 section 4.3.2.4.
|
||||
*
|
||||
* <p>
|
||||
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
|
||||
* equal signs.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
|
||||
*/
|
||||
public static final int PEM_CHUNK_SIZE = 64;
|
||||
|
||||
private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
|
||||
|
||||
/**
|
||||
* Defines the default buffer size - currently {@value}
|
||||
* - must be large enough for at least one encoded block+separator
|
||||
*/
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
|
||||
/** Mask used to extract 8 bits, used in decoding bytes */
|
||||
protected static final int MASK_8BITS = 0xff;
|
||||
|
||||
/**
|
||||
* Byte used to pad output.
|
||||
*/
|
||||
protected static final byte PAD_DEFAULT = '='; // Allow static access to default
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #pad}. Will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
|
||||
|
||||
protected final byte pad; // instance variable just in case it needs to vary later
|
||||
|
||||
/** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
|
||||
private final int unencodedBlockSize;
|
||||
|
||||
/** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
|
||||
private final int encodedBlockSize;
|
||||
|
||||
/**
|
||||
* Chunksize for encoding. Not used when decoding.
|
||||
* A value of zero or less implies no chunking of the encoded data.
|
||||
* Rounded down to nearest multiple of encodedBlockSize.
|
||||
*/
|
||||
protected final int lineLength;
|
||||
|
||||
/**
|
||||
* Size of chunk separator. Not used unless {@link #lineLength} > 0.
|
||||
*/
|
||||
private final int chunkSeparatorLength;
|
||||
|
||||
/**
|
||||
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
|
||||
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
|
||||
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
|
||||
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
|
||||
* @param lineLength if > 0, use chunking with a length <code>lineLength</code>
|
||||
* @param chunkSeparatorLength the chunk separator length, if relevant
|
||||
*/
|
||||
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
|
||||
final int lineLength, final int chunkSeparatorLength) {
|
||||
this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
|
||||
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
|
||||
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
|
||||
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
|
||||
* @param lineLength if > 0, use chunking with a length <code>lineLength</code>
|
||||
* @param chunkSeparatorLength the chunk separator length, if relevant
|
||||
* @param pad byte used as padding byte.
|
||||
*/
|
||||
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
|
||||
final int lineLength, final int chunkSeparatorLength, final byte pad) {
|
||||
this.unencodedBlockSize = unencodedBlockSize;
|
||||
this.encodedBlockSize = encodedBlockSize;
|
||||
final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
|
||||
this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
|
||||
this.chunkSeparatorLength = chunkSeparatorLength;
|
||||
|
||||
this.pad = pad;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this object has buffered data for reading.
|
||||
*
|
||||
* @param context the context to be used
|
||||
* @return true if there is data still available for reading.
|
||||
*/
|
||||
boolean hasData(final Context context) { // package protected for access from I/O streams
|
||||
return context.buffer != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the amount of buffered data available for reading.
|
||||
*
|
||||
* @param context the context to be used
|
||||
* @return The amount of buffered data available for reading.
|
||||
*/
|
||||
int available(final Context context) { // package protected for access from I/O streams
|
||||
return context.buffer != null ? context.pos - context.readPos : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default buffer size. Can be overridden.
|
||||
*
|
||||
* @return {@link #DEFAULT_BUFFER_SIZE}
|
||||
*/
|
||||
protected int getDefaultBufferSize() {
|
||||
return DEFAULT_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
|
||||
* @param context the context to be used
|
||||
*/
|
||||
private byte[] resizeBuffer(final Context context) {
|
||||
if (context.buffer == null) {
|
||||
context.buffer = new byte[getDefaultBufferSize()];
|
||||
context.pos = 0;
|
||||
context.readPos = 0;
|
||||
} else {
|
||||
final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
|
||||
System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
|
||||
context.buffer = b;
|
||||
}
|
||||
return context.buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that the buffer has room for <code>size</code> bytes
|
||||
*
|
||||
* @param size minimum spare space required
|
||||
* @param context the context to be used
|
||||
* @return the buffer
|
||||
*/
|
||||
protected byte[] ensureBufferSize(final int size, final Context context){
|
||||
if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
|
||||
return resizeBuffer(context);
|
||||
}
|
||||
return context.buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
|
||||
* bytes. Returns how many bytes were actually extracted.
|
||||
* <p>
|
||||
* Package protected for access from I/O streams.
|
||||
*
|
||||
* @param b
|
||||
* byte[] array to extract the buffered data into.
|
||||
* @param bPos
|
||||
* position in byte[] array to start extraction at.
|
||||
* @param bAvail
|
||||
* amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
|
||||
* @param context
|
||||
* the context to be used
|
||||
* @return The number of bytes successfully extracted into the provided byte[] array.
|
||||
*/
|
||||
int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
|
||||
if (context.buffer != null) {
|
||||
final int len = Math.min(available(context), bAvail);
|
||||
System.arraycopy(context.buffer, context.readPos, b, bPos, len);
|
||||
context.readPos += len;
|
||||
if (context.readPos >= context.pos) {
|
||||
context.buffer = null; // so hasData() will return false, and this method can return -1
|
||||
}
|
||||
return len;
|
||||
}
|
||||
return context.eof ? EOF : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a byte value is whitespace or not.
|
||||
* Whitespace is taken to mean: space, tab, CR, LF
|
||||
* @param byteToCheck
|
||||
* the byte to check
|
||||
* @return true if byte is whitespace, false otherwise
|
||||
*/
|
||||
protected static boolean isWhiteSpace(final byte byteToCheck) {
|
||||
switch (byteToCheck) {
|
||||
case ' ' :
|
||||
case '\n' :
|
||||
case '\r' :
|
||||
case '\t' :
|
||||
return true;
|
||||
default :
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
|
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type byte[]
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof byte[])) {
|
||||
throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
|
||||
}
|
||||
return encode((byte[]) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
|
||||
* Uses UTF8 encoding.
|
||||
*
|
||||
* @param pArray
|
||||
* a byte array containing binary data
|
||||
* @return A String containing only Base-N character data
|
||||
*/
|
||||
public String encodeToString(final byte[] pArray) {
|
||||
return StringUtils.newStringUtf8(encode(pArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
|
||||
* Uses UTF8 encoding.
|
||||
*
|
||||
* @param pArray a byte array containing binary data
|
||||
* @return String containing only character data in the appropriate alphabet.
|
||||
*/
|
||||
public String encodeAsString(final byte[] pArray){
|
||||
return StringUtils.newStringUtf8(encode(pArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
|
||||
* the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
|
||||
*
|
||||
* @param obj
|
||||
* Object to decode
|
||||
* @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
|
||||
* supplied.
|
||||
* @throws DecoderException
|
||||
* if the parameter supplied is not of type byte[]
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object obj) throws DecoderException {
|
||||
if (obj instanceof byte[]) {
|
||||
return decode((byte[]) obj);
|
||||
} else if (obj instanceof String) {
|
||||
return decode((String) obj);
|
||||
} else {
|
||||
throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a String containing characters in the Base-N alphabet.
|
||||
*
|
||||
* @param pArray
|
||||
* A String containing Base-N character data
|
||||
* @return a byte array containing binary data
|
||||
*/
|
||||
public byte[] decode(final String pArray) {
|
||||
return decode(StringUtils.getBytesUtf8(pArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a byte[] containing characters in the Base-N alphabet.
|
||||
*
|
||||
* @param pArray
|
||||
* A byte array containing Base-N character data
|
||||
* @return a byte array containing binary data
|
||||
*/
|
||||
@Override
|
||||
public byte[] decode(final byte[] pArray) {
|
||||
if (pArray == null || pArray.length == 0) {
|
||||
return pArray;
|
||||
}
|
||||
final Context context = new Context();
|
||||
decode(pArray, 0, pArray.length, context);
|
||||
decode(pArray, 0, EOF, context); // Notify decoder of EOF.
|
||||
final byte[] result = new byte[context.pos];
|
||||
readResults(result, 0, result.length, context);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
|
||||
*
|
||||
* @param pArray
|
||||
* a byte array containing binary data
|
||||
* @return A byte array containing only the basen alphabetic character data
|
||||
*/
|
||||
@Override
|
||||
public byte[] encode(final byte[] pArray) {
|
||||
if (pArray == null || pArray.length == 0) {
|
||||
return pArray;
|
||||
}
|
||||
final Context context = new Context();
|
||||
encode(pArray, 0, pArray.length, context);
|
||||
encode(pArray, 0, EOF, context); // Notify encoder of EOF.
|
||||
final byte[] buf = new byte[context.pos - context.readPos];
|
||||
readResults(buf, 0, buf.length, context);
|
||||
return buf;
|
||||
}
|
||||
|
||||
// package protected for access from I/O streams
|
||||
abstract void encode(byte[] pArray, int i, int length, Context context);
|
||||
|
||||
// package protected for access from I/O streams
|
||||
abstract void decode(byte[] pArray, int i, int length, Context context);
|
||||
|
||||
/**
|
||||
* Returns whether or not the <code>octet</code> is in the current alphabet.
|
||||
* Does not allow whitespace or pad.
|
||||
*
|
||||
* @param value The value to test
|
||||
*
|
||||
* @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
|
||||
*/
|
||||
protected abstract boolean isInAlphabet(byte value);
|
||||
|
||||
/**
|
||||
* Tests a given byte array to see if it contains only valid characters within the alphabet.
|
||||
* The method optionally treats whitespace and pad as valid.
|
||||
*
|
||||
* @param arrayOctet byte array to test
|
||||
* @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
|
||||
*
|
||||
* @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
|
||||
* <code>false</code>, otherwise
|
||||
*/
|
||||
public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
|
||||
for (int i = 0; i < arrayOctet.length; i++) {
|
||||
if (!isInAlphabet(arrayOctet[i]) &&
|
||||
(!allowWSPad || (arrayOctet[i] != pad) && !isWhiteSpace(arrayOctet[i]))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a given String to see if it contains only valid characters within the alphabet.
|
||||
* The method treats whitespace and PAD as valid.
|
||||
*
|
||||
* @param basen String to test
|
||||
* @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
|
||||
* the String is empty; <code>false</code>, otherwise
|
||||
* @see #isInAlphabet(byte[], boolean)
|
||||
*/
|
||||
public boolean isInAlphabet(final String basen) {
|
||||
return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests a given byte array to see if it contains any characters within the alphabet or PAD.
|
||||
*
|
||||
* Intended for use in checking line-ending arrays
|
||||
*
|
||||
* @param arrayOctet
|
||||
* byte array to test
|
||||
* @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
|
||||
*/
|
||||
protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
|
||||
if (arrayOctet == null) {
|
||||
return false;
|
||||
}
|
||||
for (final byte element : arrayOctet) {
|
||||
if (pad == element || isInAlphabet(element)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the amount of space needed to encode the supplied array.
|
||||
*
|
||||
* @param pArray byte[] array which will later be encoded
|
||||
*
|
||||
* @return amount of space needed to encoded the supplied array.
|
||||
* Returns a long since a max-len array will require > Integer.MAX_VALUE
|
||||
*/
|
||||
public long getEncodedLength(final byte[] pArray) {
|
||||
// Calculate non-chunked size - rounded up to allow for padding
|
||||
// cast to long is needed to avoid possibility of overflow
|
||||
long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize;
|
||||
if (lineLength > 0) { // We're using chunking
|
||||
// Round up to nearest multiple
|
||||
len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import static org.apache.commons.codec.binary.BaseNCodec.EOF;
|
||||
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.codec.binary.BaseNCodec.Context;
|
||||
|
||||
/**
|
||||
* Abstract superclass for Base-N input streams.
|
||||
*
|
||||
* @since 1.5
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BaseNCodecInputStream extends FilterInputStream {
|
||||
|
||||
private final BaseNCodec baseNCodec;
|
||||
|
||||
private final boolean doEncode;
|
||||
|
||||
private final byte[] singleByte = new byte[1];
|
||||
|
||||
private final Context context = new Context();
|
||||
|
||||
protected BaseNCodecInputStream(final InputStream in, final BaseNCodec baseNCodec, final boolean doEncode) {
|
||||
super(in);
|
||||
this.doEncode = doEncode;
|
||||
this.baseNCodec = baseNCodec;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @return <code>0</code> if the {@link InputStream} has reached <code>EOF</code>,
|
||||
* <code>1</code> otherwise
|
||||
* @since 1.7
|
||||
*/
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
// Note: the logic is similar to the InflaterInputStream:
|
||||
// as long as we have not reached EOF, indicate that there is more
|
||||
// data available. As we do not know for sure how much data is left,
|
||||
// just return 1 as a safe guess.
|
||||
|
||||
return context.eof ? 0 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the current position in this input stream.
|
||||
* <p>The {@link #mark} method of {@link BaseNCodecInputStream} does nothing.</p>
|
||||
*
|
||||
* @param readLimit the maximum limit of bytes that can be read before the mark position becomes invalid.
|
||||
* @since 1.7
|
||||
*/
|
||||
@Override
|
||||
public synchronized void mark(final int readLimit) {
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @return always returns <code>false</code>
|
||||
*/
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return false; // not an easy job to support marks
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads one <code>byte</code> from this input stream.
|
||||
*
|
||||
* @return the byte as an integer in the range 0 to 255. Returns -1 if EOF has been reached.
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
*/
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int r = read(singleByte, 0, 1);
|
||||
while (r == 0) {
|
||||
r = read(singleByte, 0, 1);
|
||||
}
|
||||
if (r > 0) {
|
||||
final byte b = singleByte[0];
|
||||
return b < 0 ? 256 + b : b;
|
||||
}
|
||||
return EOF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to read <code>len</code> bytes into the specified <code>b</code> array starting at <code>offset</code>
|
||||
* from this InputStream.
|
||||
*
|
||||
* @param b
|
||||
* destination byte array
|
||||
* @param offset
|
||||
* where to start writing the bytes
|
||||
* @param len
|
||||
* maximum number of bytes to read
|
||||
*
|
||||
* @return number of bytes read
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
* @throws NullPointerException
|
||||
* if the byte array parameter is null
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if offset, len or buffer size are invalid
|
||||
*/
|
||||
@Override
|
||||
public int read(final byte b[], final int offset, final int len) throws IOException {
|
||||
if (b == null) {
|
||||
throw new NullPointerException();
|
||||
} else if (offset < 0 || len < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (offset > b.length || offset + len > b.length) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (len == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
int readLen = 0;
|
||||
/*
|
||||
Rationale for while-loop on (readLen == 0):
|
||||
-----
|
||||
Base32.readResults() usually returns > 0 or EOF (-1). In the
|
||||
rare case where it returns 0, we just keep trying.
|
||||
|
||||
This is essentially an undocumented contract for InputStream
|
||||
implementors that want their code to work properly with
|
||||
java.io.InputStreamReader, since the latter hates it when
|
||||
InputStream.read(byte[]) returns a zero. Unfortunately our
|
||||
readResults() call must return 0 if a large amount of the data
|
||||
being decoded was non-base32, so this while-loop enables proper
|
||||
interop with InputStreamReader for that scenario.
|
||||
-----
|
||||
This is a fix for CODEC-101
|
||||
*/
|
||||
while (readLen == 0) {
|
||||
if (!baseNCodec.hasData(context)) {
|
||||
final byte[] buf = new byte[doEncode ? 4096 : 8192];
|
||||
final int c = in.read(buf);
|
||||
if (doEncode) {
|
||||
baseNCodec.encode(buf, 0, c, context);
|
||||
} else {
|
||||
baseNCodec.decode(buf, 0, c, context);
|
||||
}
|
||||
}
|
||||
readLen = baseNCodec.readResults(b, offset, len, context);
|
||||
}
|
||||
return readLen;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Repositions this stream to the position at the time the mark method was last called on this input stream.
|
||||
* <p>
|
||||
* The {@link #reset} method of {@link BaseNCodecInputStream} does nothing except throw an {@link IOException}.
|
||||
*
|
||||
* @throws IOException if this method is invoked
|
||||
* @since 1.7
|
||||
*/
|
||||
@Override
|
||||
public synchronized void reset() throws IOException {
|
||||
throw new IOException("mark/reset not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @throws IllegalArgumentException if the provided skip length is negative
|
||||
* @since 1.7
|
||||
*/
|
||||
@Override
|
||||
public long skip(final long n) throws IOException {
|
||||
if (n < 0) {
|
||||
throw new IllegalArgumentException("Negative skip length: " + n);
|
||||
}
|
||||
|
||||
// skip in chunks of 512 bytes
|
||||
final byte[] b = new byte[512];
|
||||
long todo = n;
|
||||
|
||||
while (todo > 0) {
|
||||
int len = (int) Math.min(b.length, todo);
|
||||
len = this.read(b, 0, len);
|
||||
if (len == EOF) {
|
||||
break;
|
||||
}
|
||||
todo -= len;
|
||||
}
|
||||
|
||||
return n - todo;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import static org.apache.commons.codec.binary.BaseNCodec.EOF;
|
||||
|
||||
import java.io.FilterOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.commons.codec.binary.BaseNCodec.Context;
|
||||
|
||||
/**
|
||||
* Abstract superclass for Base-N output streams.
|
||||
* <p>
|
||||
* To write the EOF marker without closing the stream, call {@link #eof()} or use an <a
|
||||
* href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href=
|
||||
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html"
|
||||
* >CloseShieldOutputStream</a>.
|
||||
* </p>
|
||||
*
|
||||
* @since 1.5
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BaseNCodecOutputStream extends FilterOutputStream {
|
||||
|
||||
private final boolean doEncode;
|
||||
|
||||
private final BaseNCodec baseNCodec;
|
||||
|
||||
private final byte[] singleByte = new byte[1];
|
||||
|
||||
private final Context context = new Context();
|
||||
|
||||
// TODO should this be protected?
|
||||
public BaseNCodecOutputStream(final OutputStream out, final BaseNCodec basedCodec, final boolean doEncode) {
|
||||
super(out);
|
||||
this.baseNCodec = basedCodec;
|
||||
this.doEncode = doEncode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the specified <code>byte</code> to this output stream.
|
||||
*
|
||||
* @param i
|
||||
* source byte
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
*/
|
||||
@Override
|
||||
public void write(final int i) throws IOException {
|
||||
singleByte[0] = (byte) i;
|
||||
write(singleByte, 0, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes <code>len</code> bytes from the specified <code>b</code> array starting at <code>offset</code> to this
|
||||
* output stream.
|
||||
*
|
||||
* @param b
|
||||
* source byte array
|
||||
* @param offset
|
||||
* where to start reading the bytes
|
||||
* @param len
|
||||
* maximum number of bytes to write
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
* @throws NullPointerException
|
||||
* if the byte array parameter is null
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if offset, len or buffer size are invalid
|
||||
*/
|
||||
@Override
|
||||
public void write(final byte b[], final int offset, final int len) throws IOException {
|
||||
if (b == null) {
|
||||
throw new NullPointerException();
|
||||
} else if (offset < 0 || len < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (offset > b.length || offset + len > b.length) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (len > 0) {
|
||||
if (doEncode) {
|
||||
baseNCodec.encode(b, offset, len, context);
|
||||
} else {
|
||||
baseNCodec.decode(b, offset, len, context);
|
||||
}
|
||||
flush(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes this output stream and forces any buffered output bytes to be written out to the stream. If propagate is
|
||||
* true, the wrapped stream will also be flushed.
|
||||
*
|
||||
* @param propagate
|
||||
* boolean flag to indicate whether the wrapped OutputStream should also be flushed.
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
*/
|
||||
private void flush(final boolean propagate) throws IOException {
|
||||
final int avail = baseNCodec.available(context);
|
||||
if (avail > 0) {
|
||||
final byte[] buf = new byte[avail];
|
||||
final int c = baseNCodec.readResults(buf, 0, avail, context);
|
||||
if (c > 0) {
|
||||
out.write(buf, 0, c);
|
||||
}
|
||||
}
|
||||
if (propagate) {
|
||||
out.flush();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes this output stream and forces any buffered output bytes to be written out to the stream.
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
flush(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes this output stream and releases any system resources associated with the stream.
|
||||
* <p>
|
||||
* To write the EOF marker without closing the stream, call {@link #eof()} or use an
|
||||
* <a href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href=
|
||||
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html"
|
||||
* >CloseShieldOutputStream</a>.
|
||||
* </p>
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
eof();
|
||||
flush();
|
||||
out.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes EOF.
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs.
|
||||
* @since 1.11
|
||||
*/
|
||||
public void eof() throws IOException {
|
||||
// Notify encoder of EOF (-1).
|
||||
if (doEncode) {
|
||||
baseNCodec.encode(singleByte, 0, EOF, context);
|
||||
} else {
|
||||
baseNCodec.decode(singleByte, 0, EOF, context);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,301 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import org.apache.commons.codec.BinaryDecoder;
|
||||
import org.apache.commons.codec.BinaryEncoder;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
|
||||
/**
|
||||
* Converts between byte arrays and strings of "0"s and "1"s.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* TODO: may want to add more bit vector functions like and/or/xor/nand
|
||||
* TODO: also might be good to generate boolean[] from byte[] et cetera.
|
||||
*
|
||||
* @since 1.3
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
|
||||
/*
|
||||
* tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
|
||||
* it.
|
||||
*/
|
||||
/** Empty char array. */
|
||||
private static final char[] EMPTY_CHAR_ARRAY = new char[0];
|
||||
|
||||
/** Empty byte array. */
|
||||
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
|
||||
|
||||
/** Mask for bit 0 of a byte. */
|
||||
private static final int BIT_0 = 1;
|
||||
|
||||
/** Mask for bit 1 of a byte. */
|
||||
private static final int BIT_1 = 0x02;
|
||||
|
||||
/** Mask for bit 2 of a byte. */
|
||||
private static final int BIT_2 = 0x04;
|
||||
|
||||
/** Mask for bit 3 of a byte. */
|
||||
private static final int BIT_3 = 0x08;
|
||||
|
||||
/** Mask for bit 4 of a byte. */
|
||||
private static final int BIT_4 = 0x10;
|
||||
|
||||
/** Mask for bit 5 of a byte. */
|
||||
private static final int BIT_5 = 0x20;
|
||||
|
||||
/** Mask for bit 6 of a byte. */
|
||||
private static final int BIT_6 = 0x40;
|
||||
|
||||
/** Mask for bit 7 of a byte. */
|
||||
private static final int BIT_7 = 0x80;
|
||||
|
||||
private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
|
||||
|
||||
/**
|
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
|
||||
*
|
||||
* @param raw
|
||||
* the raw binary data to convert
|
||||
* @return 0 and 1 ASCII character bytes one for each bit of the argument
|
||||
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
|
||||
*/
|
||||
@Override
|
||||
public byte[] encode(final byte[] raw) {
|
||||
return toAsciiBytes(raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
|
||||
*
|
||||
* @param raw
|
||||
* the raw binary data to convert
|
||||
* @return 0 and 1 ASCII character chars one for each bit of the argument
|
||||
* @throws EncoderException
|
||||
* if the argument is not a byte[]
|
||||
* @see org.apache.commons.codec.Encoder#encode(Object)
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object raw) throws EncoderException {
|
||||
if (!(raw instanceof byte[])) {
|
||||
throw new EncoderException("argument not a byte array");
|
||||
}
|
||||
return toAsciiChars((byte[]) raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
|
||||
*
|
||||
* @param ascii
|
||||
* each byte represents an ASCII '0' or '1'
|
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
|
||||
* @throws DecoderException
|
||||
* if argument is not a byte[], char[] or String
|
||||
* @see org.apache.commons.codec.Decoder#decode(Object)
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object ascii) throws DecoderException {
|
||||
if (ascii == null) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
if (ascii instanceof byte[]) {
|
||||
return fromAscii((byte[]) ascii);
|
||||
}
|
||||
if (ascii instanceof char[]) {
|
||||
return fromAscii((char[]) ascii);
|
||||
}
|
||||
if (ascii instanceof String) {
|
||||
return fromAscii(((String) ascii).toCharArray());
|
||||
}
|
||||
throw new DecoderException("argument not a byte array");
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
|
||||
*
|
||||
* @param ascii
|
||||
* each byte represents an ASCII '0' or '1'
|
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
|
||||
* @see org.apache.commons.codec.Decoder#decode(Object)
|
||||
*/
|
||||
@Override
|
||||
public byte[] decode(final byte[] ascii) {
|
||||
return fromAscii(ascii);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a String where each char of the String represents an ASCII '0' or '1'.
|
||||
*
|
||||
* @param ascii
|
||||
* String of '0' and '1' characters
|
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
|
||||
* @see org.apache.commons.codec.Decoder#decode(Object)
|
||||
*/
|
||||
public byte[] toByteArray(final String ascii) {
|
||||
if (ascii == null) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
return fromAscii(ascii.toCharArray());
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
//
|
||||
// static codec operations
|
||||
//
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* Decodes a char array where each char represents an ASCII '0' or '1'.
|
||||
*
|
||||
* @param ascii
|
||||
* each char represents an ASCII '0' or '1'
|
||||
* @return the raw encoded binary where each bit corresponds to a char in the char array argument
|
||||
*/
|
||||
public static byte[] fromAscii(final char[] ascii) {
|
||||
if (ascii == null || ascii.length == 0) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
// get length/8 times bytes with 3 bit shifts to the right of the length
|
||||
final byte[] l_raw = new byte[ascii.length >> 3];
|
||||
/*
|
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
|
||||
* loop.
|
||||
*/
|
||||
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
|
||||
for (int bits = 0; bits < BITS.length; ++bits) {
|
||||
if (ascii[jj - bits] == '1') {
|
||||
l_raw[ii] |= BITS[bits];
|
||||
}
|
||||
}
|
||||
}
|
||||
return l_raw;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
|
||||
*
|
||||
* @param ascii
|
||||
* each byte represents an ASCII '0' or '1'
|
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
|
||||
*/
|
||||
public static byte[] fromAscii(final byte[] ascii) {
|
||||
if (isEmpty(ascii)) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
// get length/8 times bytes with 3 bit shifts to the right of the length
|
||||
final byte[] l_raw = new byte[ascii.length >> 3];
|
||||
/*
|
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
|
||||
* loop.
|
||||
*/
|
||||
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
|
||||
for (int bits = 0; bits < BITS.length; ++bits) {
|
||||
if (ascii[jj - bits] == '1') {
|
||||
l_raw[ii] |= BITS[bits];
|
||||
}
|
||||
}
|
||||
}
|
||||
return l_raw;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
|
||||
*
|
||||
* @param array
|
||||
* the source array
|
||||
* @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
|
||||
*/
|
||||
private static boolean isEmpty(final byte[] array) {
|
||||
return array == null || array.length == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
|
||||
* char.
|
||||
*
|
||||
* @param raw
|
||||
* the raw binary data to convert
|
||||
* @return an array of 0 and 1 character bytes for each bit of the argument
|
||||
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
|
||||
*/
|
||||
public static byte[] toAsciiBytes(final byte[] raw) {
|
||||
if (isEmpty(raw)) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
// get 8 times the bytes with 3 bit shifts to the left of the length
|
||||
final byte[] l_ascii = new byte[raw.length << 3];
|
||||
/*
|
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
|
||||
* loop.
|
||||
*/
|
||||
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
|
||||
for (int bits = 0; bits < BITS.length; ++bits) {
|
||||
if ((raw[ii] & BITS[bits]) == 0) {
|
||||
l_ascii[jj - bits] = '0';
|
||||
} else {
|
||||
l_ascii[jj - bits] = '1';
|
||||
}
|
||||
}
|
||||
}
|
||||
return l_ascii;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
|
||||
*
|
||||
* @param raw
|
||||
* the raw binary data to convert
|
||||
* @return an array of 0 and 1 characters for each bit of the argument
|
||||
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
|
||||
*/
|
||||
public static char[] toAsciiChars(final byte[] raw) {
|
||||
if (isEmpty(raw)) {
|
||||
return EMPTY_CHAR_ARRAY;
|
||||
}
|
||||
// get 8 times the bytes with 3 bit shifts to the left of the length
|
||||
final char[] l_ascii = new char[raw.length << 3];
|
||||
/*
|
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
|
||||
* loop.
|
||||
*/
|
||||
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
|
||||
for (int bits = 0; bits < BITS.length; ++bits) {
|
||||
if ((raw[ii] & BITS[bits]) == 0) {
|
||||
l_ascii[jj - bits] = '0';
|
||||
} else {
|
||||
l_ascii[jj - bits] = '1';
|
||||
}
|
||||
}
|
||||
}
|
||||
return l_ascii;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
|
||||
*
|
||||
* @param raw
|
||||
* the raw binary data to convert
|
||||
* @return a String of 0 and 1 characters representing the binary data
|
||||
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
|
||||
*/
|
||||
public static String toAsciiString(final byte[] raw) {
|
||||
return new String(toAsciiChars(raw));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Operations on {@link CharSequence} that are <code>null</code> safe.
|
||||
* </p>
|
||||
* <p>
|
||||
* Copied from Apache Commons Lang r1586295 on April 10, 2014 (day of 3.3.2 release).
|
||||
* </p>
|
||||
*
|
||||
* @see CharSequence
|
||||
* @since 1.10
|
||||
*/
|
||||
public class CharSequenceUtils {
|
||||
|
||||
/**
|
||||
* Green implementation of regionMatches.
|
||||
*
|
||||
* @param cs
|
||||
* the <code>CharSequence</code> to be processed
|
||||
* @param ignoreCase
|
||||
* whether or not to be case insensitive
|
||||
* @param thisStart
|
||||
* the index to start on the <code>cs</code> CharSequence
|
||||
* @param substring
|
||||
* the <code>CharSequence</code> to be looked for
|
||||
* @param start
|
||||
* the index to start on the <code>substring</code> CharSequence
|
||||
* @param length
|
||||
* character length of the region
|
||||
* @return whether the region matched
|
||||
*/
|
||||
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
|
||||
final CharSequence substring, final int start, final int length) {
|
||||
if (cs instanceof String && substring instanceof String) {
|
||||
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
|
||||
}
|
||||
int index1 = thisStart;
|
||||
int index2 = start;
|
||||
int tmpLen = length;
|
||||
|
||||
while (tmpLen-- > 0) {
|
||||
char c1 = cs.charAt(index1++);
|
||||
char c2 = substring.charAt(index2++);
|
||||
|
||||
if (c1 == c2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ignoreCase) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The same check as in String.regionMatches():
|
||||
if (Character.toUpperCase(c1) != Character.toUpperCase(c2) &&
|
||||
Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,443 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.commons.codec.BinaryDecoder;
|
||||
import org.apache.commons.codec.BinaryEncoder;
|
||||
import org.apache.commons.codec.CharEncoding;
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
|
||||
/**
|
||||
* Converts hexadecimal Strings. The charset used for certain operation can be set, the default is set in
|
||||
* {@link #DEFAULT_CHARSET_NAME}
|
||||
*
|
||||
* This class is thread-safe.
|
||||
*
|
||||
* @since 1.1
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Hex implements BinaryEncoder, BinaryDecoder {
|
||||
|
||||
/**
|
||||
* Default charset name is {@link Charsets#UTF_8}
|
||||
*
|
||||
* @since 1.7
|
||||
*/
|
||||
public static final Charset DEFAULT_CHARSET = Charsets.UTF_8;
|
||||
|
||||
/**
|
||||
* Default charset name is {@link CharEncoding#UTF_8}
|
||||
*
|
||||
* @since 1.4
|
||||
*/
|
||||
public static final String DEFAULT_CHARSET_NAME = CharEncoding.UTF_8;
|
||||
|
||||
/**
|
||||
* Used to build output as Hex
|
||||
*/
|
||||
private static final char[] DIGITS_LOWER =
|
||||
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
|
||||
|
||||
/**
|
||||
* Used to build output as Hex
|
||||
*/
|
||||
private static final char[] DIGITS_UPPER =
|
||||
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
|
||||
|
||||
/**
|
||||
* Converts an array of characters representing hexadecimal values into an array of bytes of those same values. The
|
||||
* returned array will be half the length of the passed array, as it takes two characters to represent any given
|
||||
* byte. An exception is thrown if the passed char array has an odd number of elements.
|
||||
*
|
||||
* @param data
|
||||
* An array of characters containing hexadecimal digits
|
||||
* @return A byte array containing binary data decoded from the supplied char array.
|
||||
* @throws DecoderException
|
||||
* Thrown if an odd number or illegal of characters is supplied
|
||||
*/
|
||||
public static byte[] decodeHex(final char[] data) throws DecoderException {
|
||||
|
||||
final int len = data.length;
|
||||
|
||||
if ((len & 0x01) != 0) {
|
||||
throw new DecoderException("Odd number of characters.");
|
||||
}
|
||||
|
||||
final byte[] out = new byte[len >> 1];
|
||||
|
||||
// two characters form the hex value.
|
||||
for (int i = 0, j = 0; j < len; i++) {
|
||||
int f = toDigit(data[j], j) << 4;
|
||||
j++;
|
||||
f = f | toDigit(data[j], j);
|
||||
j++;
|
||||
out[i] = (byte) (f & 0xFF);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte[] to convert to Hex characters
|
||||
* @return A char[] containing hexadecimal characters
|
||||
*/
|
||||
public static char[] encodeHex(final byte[] data) {
|
||||
return encodeHex(data, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte buffer to convert to Hex characters
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @since 1.11
|
||||
*/
|
||||
public static char[] encodeHex(final ByteBuffer data) {
|
||||
return encodeHex(data, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte[] to convert to Hex characters
|
||||
* @param toLowerCase
|
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @since 1.4
|
||||
*/
|
||||
public static char[] encodeHex(final byte[] data, final boolean toLowerCase) {
|
||||
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte buffer to convert to Hex characters
|
||||
* @param toLowerCase
|
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @since 1.11
|
||||
*/
|
||||
public static char[] encodeHex(final ByteBuffer data, final boolean toLowerCase) {
|
||||
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte[] to convert to Hex characters
|
||||
* @param toDigits
|
||||
* the output alphabet
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @since 1.4
|
||||
*/
|
||||
protected static char[] encodeHex(final byte[] data, final char[] toDigits) {
|
||||
final int l = data.length;
|
||||
final char[] out = new char[l << 1];
|
||||
// two characters form the hex value.
|
||||
for (int i = 0, j = 0; i < l; i++) {
|
||||
out[j++] = toDigits[(0xF0 & data[i]) >>> 4];
|
||||
out[j++] = toDigits[0x0F & data[i]];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
|
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any
|
||||
* given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte buffer to convert to Hex characters
|
||||
* @param toDigits
|
||||
* the output alphabet
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @since 1.11
|
||||
*/
|
||||
protected static char[] encodeHex(final ByteBuffer data, final char[] toDigits) {
|
||||
return encodeHex(data.array(), toDigits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned
|
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte[] to convert to Hex characters
|
||||
* @return A String containing hexadecimal characters
|
||||
* @since 1.4
|
||||
*/
|
||||
public static String encodeHexString(final byte[] data) {
|
||||
return new String(encodeHex(data));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned
|
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte.
|
||||
*
|
||||
* @param data
|
||||
* a byte buffer to convert to Hex characters
|
||||
* @return A String containing hexadecimal characters
|
||||
* @since 1.11
|
||||
*/
|
||||
public static String encodeHexString(final ByteBuffer data) {
|
||||
return new String(encodeHex(data));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a hexadecimal character to an integer.
|
||||
*
|
||||
* @param ch
|
||||
* A character to convert to an integer digit
|
||||
* @param index
|
||||
* The index of the character in the source
|
||||
* @return An integer
|
||||
* @throws DecoderException
|
||||
* Thrown if ch is an illegal hex character
|
||||
*/
|
||||
protected static int toDigit(final char ch, final int index) throws DecoderException {
|
||||
final int digit = Character.digit(ch, 16);
|
||||
if (digit == -1) {
|
||||
throw new DecoderException("Illegal hexadecimal character " + ch + " at index " + index);
|
||||
}
|
||||
return digit;
|
||||
}
|
||||
|
||||
private final Charset charset;
|
||||
|
||||
/**
|
||||
* Creates a new codec with the default charset name {@link #DEFAULT_CHARSET}
|
||||
*/
|
||||
public Hex() {
|
||||
// use default encoding
|
||||
this.charset = DEFAULT_CHARSET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new codec with the given Charset.
|
||||
*
|
||||
* @param charset
|
||||
* the charset.
|
||||
* @since 1.7
|
||||
*/
|
||||
public Hex(final Charset charset) {
|
||||
this.charset = charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new codec with the given charset name.
|
||||
*
|
||||
* @param charsetName
|
||||
* the charset name.
|
||||
* @throws java.nio.charset.UnsupportedCharsetException
|
||||
* If the named charset is unavailable
|
||||
* @since 1.4
|
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
|
||||
*/
|
||||
public Hex(final String charsetName) {
|
||||
this(Charset.forName(charsetName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values.
|
||||
* The returned array will be half the length of the passed array, as it takes two characters to represent any given
|
||||
* byte. An exception is thrown if the passed char array has an odd number of elements.
|
||||
*
|
||||
* @param array
|
||||
* An array of character bytes containing hexadecimal digits
|
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
|
||||
* @throws DecoderException
|
||||
* Thrown if an odd number of characters is supplied to this function
|
||||
* @see #decodeHex(char[])
|
||||
*/
|
||||
@Override
|
||||
public byte[] decode(final byte[] array) throws DecoderException {
|
||||
return decodeHex(new String(array, getCharset()).toCharArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values.
|
||||
* The returned array will be half the length of the passed array, as it takes two characters to represent any given
|
||||
* byte. An exception is thrown if the passed char array has an odd number of elements.
|
||||
*
|
||||
* @param buffer
|
||||
* An array of character bytes containing hexadecimal digits
|
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
|
||||
* @throws DecoderException
|
||||
* Thrown if an odd number of characters is supplied to this function
|
||||
* @see #decodeHex(char[])
|
||||
* @since 1.11
|
||||
*/
|
||||
public byte[] decode(final ByteBuffer buffer) throws DecoderException {
|
||||
return decodeHex(new String(buffer.array(), getCharset()).toCharArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those
|
||||
* same values. The returned array will be half the length of the passed String or array, as it takes two characters
|
||||
* to represent any given byte. An exception is thrown if the passed char array has an odd number of elements.
|
||||
*
|
||||
* @param object
|
||||
* A String, ByteBuffer, byte[], or an array of character bytes containing hexadecimal digits
|
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
|
||||
* @throws DecoderException
|
||||
* Thrown if an odd number of characters is supplied to this function or the object is not a String or
|
||||
* char[]
|
||||
* @see #decodeHex(char[])
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object object) throws DecoderException {
|
||||
if (object instanceof String) {
|
||||
return decode(((String) object).toCharArray());
|
||||
} else if (object instanceof byte[]) {
|
||||
return decode((byte[]) object);
|
||||
} else if (object instanceof ByteBuffer) {
|
||||
return decode((ByteBuffer) object);
|
||||
} else {
|
||||
try {
|
||||
return decodeHex((char[]) object);
|
||||
} catch (final ClassCastException e) {
|
||||
throw new DecoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of bytes into an array of bytes for the characters representing the hexadecimal values of each
|
||||
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to
|
||||
* represent any given byte.
|
||||
* <p>
|
||||
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
|
||||
* {@link #getCharset()}.
|
||||
* </p>
|
||||
*
|
||||
* @param array
|
||||
* a byte[] to convert to Hex characters
|
||||
* @return A byte[] containing the bytes of the hexadecimal characters
|
||||
* @since 1.7 No longer throws IllegalStateException if the charsetName is invalid.
|
||||
* @see #encodeHex(byte[])
|
||||
*/
|
||||
@Override
|
||||
public byte[] encode(final byte[] array) {
|
||||
return encodeHexString(array).getBytes(this.getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts byte buffer into an array of bytes for the characters representing the hexadecimal values of each
|
||||
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to
|
||||
* represent any given byte.
|
||||
* <p>
|
||||
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
|
||||
* {@link #getCharset()}.
|
||||
* </p>
|
||||
*
|
||||
* @param array
|
||||
* a byte buffer to convert to Hex characters
|
||||
* @return A byte[] containing the bytes of the hexadecimal characters
|
||||
* @see #encodeHex(byte[])
|
||||
* @since 1.11
|
||||
*/
|
||||
public byte[] encode(final ByteBuffer array) {
|
||||
return encodeHexString(array).getBytes(this.getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each
|
||||
* byte in order. The returned array will be double the length of the passed String or array, as it takes two
|
||||
* characters to represent any given byte.
|
||||
* <p>
|
||||
* The conversion from hexadecimal characters to bytes to be encoded to performed with the charset named by
|
||||
* {@link #getCharset()}.
|
||||
* </p>
|
||||
*
|
||||
* @param object
|
||||
* a String, ByteBuffer, or byte[] to convert to Hex characters
|
||||
* @return A char[] containing hexadecimal characters
|
||||
* @throws EncoderException
|
||||
* Thrown if the given object is not a String or byte[]
|
||||
* @see #encodeHex(byte[])
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object object) throws EncoderException {
|
||||
byte[] byteArray;
|
||||
if (object instanceof String) {
|
||||
byteArray = ((String) object).getBytes(this.getCharset());
|
||||
} else if (object instanceof ByteBuffer) {
|
||||
byteArray = ((ByteBuffer) object).array();
|
||||
} else {
|
||||
try {
|
||||
byteArray = (byte[]) object;
|
||||
} catch (final ClassCastException e) {
|
||||
throw new EncoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return encodeHex(byteArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the charset.
|
||||
*
|
||||
* @return the charset.
|
||||
* @since 1.7
|
||||
*/
|
||||
public Charset getCharset() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the charset name.
|
||||
*
|
||||
* @return the charset name.
|
||||
* @since 1.4
|
||||
*/
|
||||
public String getCharsetName() {
|
||||
return this.charset.name();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of the object, which includes the charset name.
|
||||
*
|
||||
* @return a string representation of the object.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString() + "[charsetName=" + this.charset + "]";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,422 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.binary;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.commons.codec.CharEncoding;
|
||||
import org.apache.commons.codec.Charsets;
|
||||
|
||||
/**
|
||||
* Converts String to and from bytes using the encodings required by the Java specification. These encodings are
|
||||
* specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
|
||||
* Standard charsets</a>.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @see CharEncoding
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @version $Id$
|
||||
* @since 1.4
|
||||
*/
|
||||
public class StringUtils {
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
|
||||
* The comparison is case sensitive.
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.equals(null, null) = true
|
||||
* StringUtils.equals(null, "abc") = false
|
||||
* StringUtils.equals("abc", null) = false
|
||||
* StringUtils.equals("abc", "abc") = true
|
||||
* StringUtils.equals("abc", "ABC") = false
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
|
||||
* </p>
|
||||
*
|
||||
* @see Object#equals(Object)
|
||||
* @param cs1
|
||||
* the first CharSequence, may be <code>null</code>
|
||||
* @param cs2
|
||||
* the second CharSequence, may be <code>null</code>
|
||||
* @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
|
||||
* @since 1.10
|
||||
*/
|
||||
public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
|
||||
if (cs1 == cs2) {
|
||||
return true;
|
||||
}
|
||||
if (cs1 == null || cs2 == null) {
|
||||
return false;
|
||||
}
|
||||
if (cs1 instanceof String && cs2 instanceof String) {
|
||||
return cs1.equals(cs2);
|
||||
}
|
||||
return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link String#getBytes(Charset)}
|
||||
*
|
||||
* @param string
|
||||
* The string to encode (if null, return null).
|
||||
* @param charset
|
||||
* The {@link Charset} to encode the <code>String</code>
|
||||
* @return the encoded bytes
|
||||
*/
|
||||
private static byte[] getBytes(final String string, final Charset charset) {
|
||||
if (string == null) {
|
||||
return null;
|
||||
}
|
||||
return string.getBytes(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link String#getBytes(Charset)}
|
||||
*
|
||||
* @param string
|
||||
* The string to encode (if null, return null).
|
||||
* @param charset
|
||||
* The {@link Charset} to encode the <code>String</code>
|
||||
* @return the encoded bytes
|
||||
* @since 1.11
|
||||
*/
|
||||
private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
|
||||
if (string == null) {
|
||||
return null;
|
||||
}
|
||||
return ByteBuffer.wrap(string.getBytes(charset));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
* @since 1.11
|
||||
*/
|
||||
public static ByteBuffer getByteBufferUtf8(final String string) {
|
||||
return getByteBuffer(string, Charsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
|
||||
* byte array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesIso8859_1(final String string) {
|
||||
return getBytes(string, Charsets.ISO_8859_1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
|
||||
* array.
|
||||
* <p>
|
||||
* This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
|
||||
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
|
||||
* </p>
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @param charsetName
|
||||
* The name of a required {@link java.nio.charset.Charset}
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws IllegalStateException
|
||||
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
|
||||
* required charset name.
|
||||
* @see CharEncoding
|
||||
* @see String#getBytes(String)
|
||||
*/
|
||||
public static byte[] getBytesUnchecked(final String string, final String charsetName) {
|
||||
if (string == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return string.getBytes(charsetName);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw StringUtils.newIllegalStateException(charsetName, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesUsAscii(final String string) {
|
||||
return getBytes(string, Charsets.US_ASCII);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesUtf16(final String string) {
|
||||
return getBytes(string, Charsets.UTF_16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesUtf16Be(final String string) {
|
||||
return getBytes(string, Charsets.UTF_16BE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesUtf16Le(final String string) {
|
||||
return getBytes(string, Charsets.UTF_16LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
|
||||
* array.
|
||||
*
|
||||
* @param string
|
||||
* the String to encode, may be <code>null</code>
|
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @see #getBytesUnchecked(String, String)
|
||||
*/
|
||||
public static byte[] getBytesUtf8(final String string) {
|
||||
return getBytes(string, Charsets.UTF_8);
|
||||
}
|
||||
|
||||
private static IllegalStateException newIllegalStateException(final String charsetName,
|
||||
final UnsupportedEncodingException e) {
|
||||
return new IllegalStateException(charsetName + ": " + e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @param charset
|
||||
* The {@link Charset} to encode the <code>String</code>
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
*/
|
||||
private static String newString(final byte[] bytes, final Charset charset) {
|
||||
return bytes == null ? null : new String(bytes, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
|
||||
* <p>
|
||||
* This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
|
||||
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
|
||||
* </p>
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters, may be <code>null</code>
|
||||
* @param charsetName
|
||||
* The name of a required {@link java.nio.charset.Charset}
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws IllegalStateException
|
||||
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
|
||||
* required charset name.
|
||||
* @see CharEncoding
|
||||
* @see String#String(byte[], String)
|
||||
*/
|
||||
public static String newString(final byte[] bytes, final String charsetName) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return new String(bytes, charsetName);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw StringUtils.newIllegalStateException(charsetName, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters, may be <code>null</code>
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
|
||||
* <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringIso8859_1(final byte[] bytes) {
|
||||
return new String(bytes, Charsets.ISO_8859_1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringUsAscii(final byte[] bytes) {
|
||||
return new String(bytes, Charsets.US_ASCII);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringUtf16(final byte[] bytes) {
|
||||
return new String(bytes, Charsets.UTF_16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringUtf16Be(final byte[] bytes) {
|
||||
return new String(bytes, Charsets.UTF_16BE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringUtf16Le(final byte[] bytes) {
|
||||
return new String(bytes, Charsets.UTF_16LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
|
||||
* or <code>null</code> if the input byte array was <code>null</code>.
|
||||
* @throws NullPointerException
|
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
|
||||
* required by the Java platform specification.
|
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
|
||||
*/
|
||||
public static String newStringUtf8(final byte[] bytes) {
|
||||
return newString(bytes, Charsets.UTF_8);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Base64 like method to convert binary bytes into ASCII chars.
|
||||
*
|
||||
* TODO: Can Base64 be reused?
|
||||
*
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.7
|
||||
*/
|
||||
class B64 {
|
||||
|
||||
/**
|
||||
* Table with characters for Base64 transformation.
|
||||
*/
|
||||
static final String B64T = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
/**
|
||||
* Base64 like conversion of bytes to ASCII chars.
|
||||
*
|
||||
* @param b2
|
||||
* A byte from the result.
|
||||
* @param b1
|
||||
* A byte from the result.
|
||||
* @param b0
|
||||
* A byte from the result.
|
||||
* @param outLen
|
||||
* The number of expected output chars.
|
||||
* @param buffer
|
||||
* Where the output chars is appended to.
|
||||
*/
|
||||
static void b64from24bit(final byte b2, final byte b1, final byte b0, final int outLen,
|
||||
final StringBuilder buffer) {
|
||||
// The bit masking is necessary because the JVM byte type is signed!
|
||||
int w = ((b2 << 16) & 0x00ffffff) | ((b1 << 8) & 0x00ffff) | (b0 & 0xff);
|
||||
// It's effectively a "for" loop but kept to resemble the original C code.
|
||||
int n = outLen;
|
||||
while (n-- > 0) {
|
||||
buffer.append(B64T.charAt(w & 0x3f));
|
||||
w >>= 6;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a string of random chars from the B64T set.
|
||||
*
|
||||
* @param num
|
||||
* Number of chars to generate.
|
||||
*/
|
||||
static String getRandomSalt(final int num) {
|
||||
final StringBuilder saltString = new StringBuilder();
|
||||
for (int i = 1; i <= num; i++) {
|
||||
saltString.append(B64T.charAt(new Random().nextInt(B64T.length())));
|
||||
}
|
||||
return saltString.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
|
||||
/**
|
||||
* GNU libc crypt(3) compatible hash method.
|
||||
* <p>
|
||||
* See {@link #crypt(String, String)} for further details.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.7
|
||||
*/
|
||||
public class Crypt {
|
||||
|
||||
/**
|
||||
* Encrypts a password in a crypt(3) compatible way.
|
||||
* <p>
|
||||
* A random salt and the default algorithm (currently SHA-512) are used. See {@link #crypt(String, String)} for
|
||||
* details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext password
|
||||
* @return hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String crypt(final byte[] keyBytes) {
|
||||
return crypt(keyBytes, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypts a password in a crypt(3) compatible way.
|
||||
* <p>
|
||||
* If no salt is provided, a random salt and the default algorithm (currently SHA-512) will be used. See
|
||||
* {@link #crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext password
|
||||
* @param salt
|
||||
* salt value
|
||||
* @return hash value
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String crypt(final byte[] keyBytes, final String salt) {
|
||||
if (salt == null) {
|
||||
return Sha2Crypt.sha512Crypt(keyBytes);
|
||||
} else if (salt.startsWith(Sha2Crypt.SHA512_PREFIX)) {
|
||||
return Sha2Crypt.sha512Crypt(keyBytes, salt);
|
||||
} else if (salt.startsWith(Sha2Crypt.SHA256_PREFIX)) {
|
||||
return Sha2Crypt.sha256Crypt(keyBytes, salt);
|
||||
} else if (salt.startsWith(Md5Crypt.MD5_PREFIX)) {
|
||||
return Md5Crypt.md5Crypt(keyBytes, salt);
|
||||
} else {
|
||||
return UnixCrypt.crypt(keyBytes, salt);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the digest using the strongest crypt(3) algorithm.
|
||||
* <p>
|
||||
* A random salt and the default algorithm (currently SHA-512) are used.
|
||||
*
|
||||
* @see #crypt(String, String)
|
||||
* @param key
|
||||
* plaintext password
|
||||
* @return hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String crypt(final String key) {
|
||||
return crypt(key, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypts a password in a crypt(3) compatible way.
|
||||
* <p>
|
||||
* The exact algorithm depends on the format of the salt string:
|
||||
* <ul>
|
||||
* <li>SHA-512 salts start with {@code $6$} and are up to 16 chars long.
|
||||
* <li>SHA-256 salts start with {@code $5$} and are up to 16 chars long
|
||||
* <li>MD5 salts start with {@code $1$} and are up to 8 chars long
|
||||
* <li>DES, the traditional UnixCrypt algorithm is used with only 2 chars
|
||||
* <li>Only the first 8 chars of the passwords are used in the DES algorithm!
|
||||
* </ul>
|
||||
* The magic strings {@code "$apr1$"} and {@code "$2a$"} are not recognized by this method as its output should be
|
||||
* identical with that of the libc implementation.
|
||||
* <p>
|
||||
* The rest of the salt string is drawn from the set {@code [a-zA-Z0-9./]} and is cut at the maximum length of if a
|
||||
* {@code "$"} sign is encountered. It is therefore valid to enter a complete hash value as salt to e.g. verify a
|
||||
* password with:
|
||||
*
|
||||
* <pre>
|
||||
* storedPwd.equals(crypt(enteredPwd, storedPwd))
|
||||
* </pre>
|
||||
* <p>
|
||||
* The resulting string starts with the marker string ({@code $6$}), continues with the salt value and ends with a
|
||||
* {@code "$"} sign followed by the actual hash value. For DES the string only contains the salt and actual hash.
|
||||
* It's total length is dependent on the algorithm used:
|
||||
* <ul>
|
||||
* <li>SHA-512: 106 chars
|
||||
* <li>SHA-256: 63 chars
|
||||
* <li>MD5: 34 chars
|
||||
* <li>DES: 13 chars
|
||||
* </ul>
|
||||
* <p>
|
||||
* Example:
|
||||
*
|
||||
* <pre>
|
||||
* crypt("secret", "$1$xxxx") => "$1$xxxx$aMkevjfEIpa35Bh3G4bAc."
|
||||
* crypt("secret", "xx") => "xxWAum7tHdIUw"
|
||||
* </pre>
|
||||
* <p>
|
||||
* This method comes in a variation that accepts a byte[] array to support input strings that are not encoded in
|
||||
* UTF-8 but e.g. in ISO-8859-1 where equal characters result in different byte values.
|
||||
*
|
||||
* @see "The man page of the libc crypt (3) function."
|
||||
* @param key
|
||||
* plaintext password as entered by the used
|
||||
* @param salt
|
||||
* salt value
|
||||
* @return hash value, i.e. encrypted password including the salt string
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. *
|
||||
*/
|
||||
public static String crypt(final String key, final String salt) {
|
||||
return crypt(key.getBytes(Charsets.UTF_8), salt);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
/**
|
||||
* Standard {@link HmacUtils} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name
|
||||
* Documentation</cite>.
|
||||
*
|
||||
* <p>
|
||||
* <strong>Note: Not all JCE implementations supports all algorithms in this enum.</strong>
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html">Java Cryptography
|
||||
* Architecture Standard Algorithm Name Documentation</a>
|
||||
* @since 1.10
|
||||
* @version $Id$
|
||||
*/
|
||||
public enum HmacAlgorithms {
|
||||
|
||||
/**
|
||||
* The HmacMD5 Message Authentication Code (MAC) algorithm specified in RFC 2104 and RFC 1321.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*/
|
||||
HMAC_MD5("HmacMD5"),
|
||||
|
||||
/**
|
||||
* The HmacSHA1 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*/
|
||||
HMAC_SHA_1("HmacSHA1"),
|
||||
|
||||
/**
|
||||
* The HmacSHA256 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*/
|
||||
HMAC_SHA_256("HmacSHA256"),
|
||||
|
||||
/**
|
||||
* The HmacSHA384 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
|
||||
* </p>
|
||||
*/
|
||||
HMAC_SHA_384("HmacSHA384"),
|
||||
|
||||
/**
|
||||
* The HmacSHA512 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
|
||||
* </p>
|
||||
*/
|
||||
HMAC_SHA_512("HmacSHA512");
|
||||
|
||||
private final String algorithm;
|
||||
|
||||
private HmacAlgorithms(final String algorithm) {
|
||||
this.algorithm = algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* The algorithm name
|
||||
*
|
||||
* @see <a
|
||||
* href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider">Java
|
||||
* Cryptography Architecture Sun Providers Documentation</a>
|
||||
* @return The algorithm name ("HmacSHA512" for example)
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,794 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.InvalidKeyException;
|
||||
import java.security.Key;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
import javax.crypto.Mac;
|
||||
import javax.crypto.spec.SecretKeySpec;
|
||||
|
||||
import org.apache.commons.codec.binary.Hex;
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
|
||||
/**
|
||||
* Simplifies common {@link javax.crypto.Mac} tasks. This class is immutable and thread-safe.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* <strong>Note: Not all JCE implementations supports all algorithms. If not supported, an IllegalArgumentException is
|
||||
* thrown.</strong>
|
||||
* </p>
|
||||
*
|
||||
* @since 1.10
|
||||
* @version $Id$
|
||||
*/
|
||||
public final class HmacUtils {
|
||||
|
||||
private static final int STREAM_BUFFER_LENGTH = 1024;
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the HmacMD5 algorithm.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getHmacMd5(final byte[] key) {
|
||||
return getInitializedMac(HmacAlgorithms.HMAC_MD5, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the HmacSHA1 algorithm.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getHmacSha1(final byte[] key) {
|
||||
return getInitializedMac(HmacAlgorithms.HMAC_SHA_1, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the HmacSHA256 algorithm.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is required to support this standard Mac algorithm.
|
||||
* </p>
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getHmacSha256(final byte[] key) {
|
||||
return getInitializedMac(HmacAlgorithms.HMAC_SHA_256, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the HmacSHA384 algorithm.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
|
||||
* </p>
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getHmacSha384(final byte[] key) {
|
||||
return getInitializedMac(HmacAlgorithms.HMAC_SHA_384, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the HmacSHA512 algorithm.
|
||||
* <p>
|
||||
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
|
||||
* </p>
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getHmacSha512(final byte[] key) {
|
||||
return getInitializedMac(HmacAlgorithms.HMAC_SHA_512, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the given <code>algorithm</code>.
|
||||
*
|
||||
* @param algorithm
|
||||
* the name of the algorithm requested. See <a href=
|
||||
* "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" >Appendix
|
||||
* A in the Java Cryptography Architecture Reference Guide</a> for information about standard algorithm
|
||||
* names.
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getInitializedMac(final HmacAlgorithms algorithm, final byte[] key) {
|
||||
return getInitializedMac(algorithm.toString(), key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an initialized <code>Mac</code> for the given <code>algorithm</code>.
|
||||
*
|
||||
* @param algorithm
|
||||
* the name of the algorithm requested. See <a href=
|
||||
* "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" >Appendix
|
||||
* A in the Java Cryptography Architecture Reference Guide</a> for information about standard algorithm
|
||||
* names.
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @return A Mac instance initialized with the given key.
|
||||
* @see Mac#getInstance(String)
|
||||
* @see Mac#init(Key)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static Mac getInitializedMac(final String algorithm, final byte[] key) {
|
||||
|
||||
if (key == null) {
|
||||
throw new IllegalArgumentException("Null key");
|
||||
}
|
||||
|
||||
try {
|
||||
final SecretKeySpec keySpec = new SecretKeySpec(key, algorithm);
|
||||
final Mac mac = Mac.getInstance(algorithm);
|
||||
mac.init(keySpec);
|
||||
return mac;
|
||||
} catch (final NoSuchAlgorithmException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
} catch (final InvalidKeyException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// hmacMd5
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacMD5 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacMd5(final byte[] key, final byte[] valueToDigest) {
|
||||
try {
|
||||
return getHmacMd5(key).doFinal(valueToDigest);
|
||||
} catch (final IllegalStateException e) {
|
||||
// cannot happen
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacMD5 MAC for the given key and value
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacMd5(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return updateHmac(getHmacMd5(key), valueToDigest).doFinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacMD5 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacMd5(final String key, final String valueToDigest) {
|
||||
return hmacMd5(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacMd5Hex(final byte[] key, final byte[] valueToDigest) {
|
||||
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacMd5Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacMd5Hex(final String key, final String valueToDigest) {
|
||||
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
|
||||
}
|
||||
|
||||
// hmacSha1
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA1 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha1(final byte[] key, final byte[] valueToDigest) {
|
||||
try {
|
||||
return getHmacSha1(key).doFinal(valueToDigest);
|
||||
} catch (final IllegalStateException e) {
|
||||
// cannot happen
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA1 MAC for the given key and value
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha1(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return updateHmac(getHmacSha1(key), valueToDigest).doFinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA1 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha1(final String key, final String valueToDigest) {
|
||||
return hmacSha1(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha1Hex(final byte[] key, final byte[] valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha1Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha1Hex(final String key, final String valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
|
||||
}
|
||||
|
||||
// hmacSha256
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA256 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha256(final byte[] key, final byte[] valueToDigest) {
|
||||
try {
|
||||
return getHmacSha256(key).doFinal(valueToDigest);
|
||||
} catch (final IllegalStateException e) {
|
||||
// cannot happen
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA256 MAC for the given key and value
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
s * @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha256(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return updateHmac(getHmacSha256(key), valueToDigest).doFinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA256 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha256(final String key, final String valueToDigest) {
|
||||
return hmacSha256(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha256Hex(final byte[] key, final byte[] valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha256Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha256Hex(final String key, final String valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
|
||||
}
|
||||
|
||||
// hmacSha384
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA384 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha384(final byte[] key, final byte[] valueToDigest) {
|
||||
try {
|
||||
return getHmacSha384(key).doFinal(valueToDigest);
|
||||
} catch (final IllegalStateException e) {
|
||||
// cannot happen
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA384 MAC for the given key and value
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha384(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return updateHmac(getHmacSha384(key), valueToDigest).doFinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA384 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha384(final String key, final String valueToDigest) {
|
||||
return hmacSha384(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha384Hex(final byte[] key, final byte[] valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha384Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha384Hex(final String key, final String valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
|
||||
}
|
||||
|
||||
// hmacSha512
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA512 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha512(final byte[] key, final byte[] valueToDigest) {
|
||||
try {
|
||||
return getHmacSha512(key).doFinal(valueToDigest);
|
||||
} catch (final IllegalStateException e) {
|
||||
// cannot happen
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA512 MAC for the given key and value
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha512(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return updateHmac(getHmacSha512(key), valueToDigest).doFinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA512 MAC for the given key and value
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static byte[] hmacSha512(final String key, final String valueToDigest) {
|
||||
return hmacSha512(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha512Hex(final byte[] key, final byte[] valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha512Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
|
||||
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
|
||||
*
|
||||
* @param key
|
||||
* They key for the keyed digest (must not be null)
|
||||
* @param valueToDigest
|
||||
* The value (data) which should to digest (maybe empty or null)
|
||||
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
|
||||
*/
|
||||
public static String hmacSha512Hex(final String key, final String valueToDigest) {
|
||||
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
|
||||
}
|
||||
|
||||
// update
|
||||
|
||||
/**
|
||||
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
|
||||
*
|
||||
* @param mac
|
||||
* the initialized {@link Mac} to update
|
||||
* @param valueToDigest
|
||||
* the value to update the {@link Mac} with (maybe null or empty)
|
||||
* @return the updated {@link Mac}
|
||||
* @throws IllegalStateException
|
||||
* if the Mac was not initialized
|
||||
* @since 1.x
|
||||
*/
|
||||
public static Mac updateHmac(final Mac mac, final byte[] valueToDigest) {
|
||||
mac.reset();
|
||||
mac.update(valueToDigest);
|
||||
return mac;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
|
||||
*
|
||||
* @param mac
|
||||
* the initialized {@link Mac} to update
|
||||
* @param valueToDigest
|
||||
* the value to update the {@link Mac} with
|
||||
* <p>
|
||||
* The InputStream must not be null and will not be closed
|
||||
* </p>
|
||||
* @return the updated {@link Mac}
|
||||
* @throws IOException
|
||||
* If an I/O error occurs.
|
||||
* @throws IllegalStateException
|
||||
* If the Mac was not initialized
|
||||
* @since 1.x
|
||||
*/
|
||||
public static Mac updateHmac(final Mac mac, final InputStream valueToDigest) throws IOException {
|
||||
mac.reset();
|
||||
final byte[] buffer = new byte[STREAM_BUFFER_LENGTH];
|
||||
int read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH);
|
||||
|
||||
while (read > -1) {
|
||||
mac.update(buffer, 0, read);
|
||||
read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH);
|
||||
}
|
||||
|
||||
return mac;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
|
||||
*
|
||||
* @param mac
|
||||
* the initialized {@link Mac} to update
|
||||
* @param valueToDigest
|
||||
* the value to update the {@link Mac} with (maybe null or empty)
|
||||
* @return the updated {@link Mac}
|
||||
* @throws IllegalStateException
|
||||
* if the Mac was not initialized
|
||||
* @since 1.x
|
||||
*/
|
||||
public static Mac updateHmac(final Mac mac, final String valueToDigest) {
|
||||
mac.reset();
|
||||
mac.update(StringUtils.getBytesUtf8(valueToDigest));
|
||||
return mac;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,302 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
|
||||
/**
|
||||
* The libc crypt() "$1$" and Apache "$apr1$" MD5-based hash algorithm.
|
||||
* <p>
|
||||
* Based on the public domain ("beer-ware") C implementation from Poul-Henning Kamp which was found at: <a
|
||||
* href="http://www.freebsd.org/cgi/cvsweb.cgi/src/lib/libcrypt/crypt-md5.c?rev=1.1;content-type=text%2Fplain">
|
||||
* crypt-md5.c @ freebsd.org</a><br>
|
||||
* <p>
|
||||
* Source:
|
||||
*
|
||||
* <pre>
|
||||
* $FreeBSD: src/lib/libcrypt/crypt-md5.c,v 1.1 1999/01/21 13:50:09 brandon Exp $
|
||||
* </pre>
|
||||
* <p>
|
||||
* Conversion to Kotlin and from there to Java in 2012.
|
||||
* <p>
|
||||
* The C style comments are from the original C code, the ones with "//" from the port.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.7
|
||||
*/
|
||||
public class Md5Crypt {
|
||||
|
||||
/** The Identifier of the Apache variant. */
|
||||
static final String APR1_PREFIX = "$apr1$";
|
||||
|
||||
/** The number of bytes of the final hash. */
|
||||
private static final int BLOCKSIZE = 16;
|
||||
|
||||
/** The Identifier of this crypt() variant. */
|
||||
static final String MD5_PREFIX = "$1$";
|
||||
|
||||
/** The number of rounds of the big loop. */
|
||||
private static final int ROUNDS = 1000;
|
||||
|
||||
/**
|
||||
* See {@link #apr1Crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @return the hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. *
|
||||
*/
|
||||
public static String apr1Crypt(final byte[] keyBytes) {
|
||||
return apr1Crypt(keyBytes, APR1_PREFIX + B64.getRandomSalt(8));
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link #apr1Crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @param salt An APR1 salt.
|
||||
* @return the hash value
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String apr1Crypt(final byte[] keyBytes, String salt) {
|
||||
// to make the md5Crypt regex happy
|
||||
if (salt != null && !salt.startsWith(APR1_PREFIX)) {
|
||||
salt = APR1_PREFIX + salt;
|
||||
}
|
||||
return Md5Crypt.md5Crypt(keyBytes, salt, APR1_PREFIX);
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link #apr1Crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @return the hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String apr1Crypt(final String keyBytes) {
|
||||
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an Apache htpasswd compatible "$apr1$" MD5 based hash value.
|
||||
* <p>
|
||||
* The algorithm is identical to the crypt(3) "$1$" one but produces different outputs due to the different salt
|
||||
* prefix.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @param salt
|
||||
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if
|
||||
* null.
|
||||
* @return the hash value
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String apr1Crypt(final String keyBytes, final String salt) {
|
||||
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8), salt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc6 crypt() compatible "$1$" hash value.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @return the hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String md5Crypt(final byte[] keyBytes) {
|
||||
return md5Crypt(keyBytes, MD5_PREFIX + B64.getRandomSalt(8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc crypt() compatible "$1$" MD5 based hash value.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @param salt
|
||||
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if
|
||||
* null.
|
||||
* @return the hash value
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String md5Crypt(final byte[] keyBytes, final String salt) {
|
||||
return md5Crypt(keyBytes, salt, MD5_PREFIX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} or {@link #apr1Crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext string to hash.
|
||||
* @param salt May be null.
|
||||
* @param prefix salt prefix
|
||||
* @return the hash value
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String md5Crypt(final byte[] keyBytes, final String salt, final String prefix) {
|
||||
final int keyLen = keyBytes.length;
|
||||
|
||||
// Extract the real salt from the given string which can be a complete hash string.
|
||||
String saltString;
|
||||
if (salt == null) {
|
||||
saltString = B64.getRandomSalt(8);
|
||||
} else {
|
||||
final Pattern p = Pattern.compile("^" + prefix.replace("$", "\\$") + "([\\.\\/a-zA-Z0-9]{1,8}).*");
|
||||
final Matcher m = p.matcher(salt);
|
||||
if (m == null || !m.find()) {
|
||||
throw new IllegalArgumentException("Invalid salt value: " + salt);
|
||||
}
|
||||
saltString = m.group(1);
|
||||
}
|
||||
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8);
|
||||
|
||||
final MessageDigest ctx = DigestUtils.getMd5Digest();
|
||||
|
||||
/*
|
||||
* The password first, since that is what is most unknown
|
||||
*/
|
||||
ctx.update(keyBytes);
|
||||
|
||||
/*
|
||||
* Then our magic string
|
||||
*/
|
||||
ctx.update(prefix.getBytes(Charsets.UTF_8));
|
||||
|
||||
/*
|
||||
* Then the raw salt
|
||||
*/
|
||||
ctx.update(saltBytes);
|
||||
|
||||
/*
|
||||
* Then just as many characters of the MD5(pw,salt,pw)
|
||||
*/
|
||||
MessageDigest ctx1 = DigestUtils.getMd5Digest();
|
||||
ctx1.update(keyBytes);
|
||||
ctx1.update(saltBytes);
|
||||
ctx1.update(keyBytes);
|
||||
byte[] finalb = ctx1.digest();
|
||||
int ii = keyLen;
|
||||
while (ii > 0) {
|
||||
ctx.update(finalb, 0, ii > 16 ? 16 : ii);
|
||||
ii -= 16;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't leave anything around in vm they could use.
|
||||
*/
|
||||
Arrays.fill(finalb, (byte) 0);
|
||||
|
||||
/*
|
||||
* Then something really weird...
|
||||
*/
|
||||
ii = keyLen;
|
||||
final int j = 0;
|
||||
while (ii > 0) {
|
||||
if ((ii & 1) == 1) {
|
||||
ctx.update(finalb[j]);
|
||||
} else {
|
||||
ctx.update(keyBytes[j]);
|
||||
}
|
||||
ii >>= 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make the output string
|
||||
*/
|
||||
final StringBuilder passwd = new StringBuilder(prefix + saltString + "$");
|
||||
finalb = ctx.digest();
|
||||
|
||||
/*
|
||||
* and now, just to make sure things don't run too fast On a 60 Mhz Pentium this takes 34 msec, so you would
|
||||
* need 30 seconds to build a 1000 entry dictionary...
|
||||
*/
|
||||
for (int i = 0; i < ROUNDS; i++) {
|
||||
ctx1 = DigestUtils.getMd5Digest();
|
||||
if ((i & 1) != 0) {
|
||||
ctx1.update(keyBytes);
|
||||
} else {
|
||||
ctx1.update(finalb, 0, BLOCKSIZE);
|
||||
}
|
||||
|
||||
if (i % 3 != 0) {
|
||||
ctx1.update(saltBytes);
|
||||
}
|
||||
|
||||
if (i % 7 != 0) {
|
||||
ctx1.update(keyBytes);
|
||||
}
|
||||
|
||||
if ((i & 1) != 0) {
|
||||
ctx1.update(finalb, 0, BLOCKSIZE);
|
||||
} else {
|
||||
ctx1.update(keyBytes);
|
||||
}
|
||||
finalb = ctx1.digest();
|
||||
}
|
||||
|
||||
// The following was nearly identical to the Sha2Crypt code.
|
||||
// Again, the buflen is not really needed.
|
||||
// int buflen = MD5_PREFIX.length() - 1 + salt_string.length() + 1 + BLOCKSIZE + 1;
|
||||
B64.b64from24bit(finalb[0], finalb[6], finalb[12], 4, passwd);
|
||||
B64.b64from24bit(finalb[1], finalb[7], finalb[13], 4, passwd);
|
||||
B64.b64from24bit(finalb[2], finalb[8], finalb[14], 4, passwd);
|
||||
B64.b64from24bit(finalb[3], finalb[9], finalb[15], 4, passwd);
|
||||
B64.b64from24bit(finalb[4], finalb[10], finalb[5], 4, passwd);
|
||||
B64.b64from24bit((byte) 0, (byte) 0, finalb[11], 2, passwd);
|
||||
|
||||
/*
|
||||
* Don't leave anything around in vm they could use.
|
||||
*/
|
||||
// Is there a better way to do this with the JVM?
|
||||
ctx.reset();
|
||||
ctx1.reset();
|
||||
Arrays.fill(keyBytes, (byte) 0);
|
||||
Arrays.fill(saltBytes, (byte) 0);
|
||||
Arrays.fill(finalb, (byte) 0);
|
||||
|
||||
return passwd.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
|
||||
/**
|
||||
* Standard {@link MessageDigest} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name
|
||||
* Documentation</cite>.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
* </p>
|
||||
* TODO 2.0 This should be an enum.
|
||||
*
|
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html">Java Cryptography
|
||||
* Architecture Standard Algorithm Name Documentation</a>
|
||||
* @since 1.7
|
||||
* @version $Id$
|
||||
*/
|
||||
public class MessageDigestAlgorithms {
|
||||
|
||||
private MessageDigestAlgorithms() {
|
||||
// cannot be instantiated.
|
||||
}
|
||||
|
||||
/**
|
||||
* The MD2 message digest algorithm defined in RFC 1319.
|
||||
*/
|
||||
public static final String MD2 = "MD2";
|
||||
|
||||
/**
|
||||
* The MD5 message digest algorithm defined in RFC 1321.
|
||||
*/
|
||||
public static final String MD5 = "MD5";
|
||||
|
||||
/**
|
||||
* The SHA-1 hash algorithm defined in the FIPS PUB 180-2.
|
||||
*/
|
||||
public static final String SHA_1 = "SHA-1";
|
||||
|
||||
/**
|
||||
* The SHA-224 hash algorithm defined in the FIPS PUB 180-4.
|
||||
* <p>
|
||||
* Java 8 only.
|
||||
* </p>
|
||||
*
|
||||
* @since 1.11
|
||||
*/
|
||||
public static final String SHA_224 = "SHA-224";
|
||||
|
||||
/**
|
||||
* The SHA-256 hash algorithm defined in the FIPS PUB 180-2.
|
||||
*/
|
||||
public static final String SHA_256 = "SHA-256";
|
||||
|
||||
/**
|
||||
* The SHA-384 hash algorithm defined in the FIPS PUB 180-2.
|
||||
*/
|
||||
public static final String SHA_384 = "SHA-384";
|
||||
|
||||
/**
|
||||
* The SHA-512 hash algorithm defined in the FIPS PUB 180-2.
|
||||
*/
|
||||
public static final String SHA_512 = "SHA-512";
|
||||
|
||||
}
|
|
@ -0,0 +1,545 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
|
||||
/**
|
||||
* SHA2-based Unix crypt implementation.
|
||||
* <p>
|
||||
* Based on the C implementation released into the Public Domain by Ulrich Drepper <drepper@redhat.com>
|
||||
* http://www.akkadia.org/drepper/SHA-crypt.txt
|
||||
* <p>
|
||||
* Conversion to Kotlin and from there to Java in 2012 by Christian Hammers <ch@lathspell.de> and likewise put
|
||||
* into the Public Domain.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.7
|
||||
*/
|
||||
public class Sha2Crypt {
|
||||
|
||||
/** Default number of rounds if not explicitly specified. */
|
||||
private static final int ROUNDS_DEFAULT = 5000;
|
||||
|
||||
/** Maximum number of rounds. */
|
||||
private static final int ROUNDS_MAX = 999999999;
|
||||
|
||||
/** Minimum number of rounds. */
|
||||
private static final int ROUNDS_MIN = 1000;
|
||||
|
||||
/** Prefix for optional rounds specification. */
|
||||
private static final String ROUNDS_PREFIX = "rounds=";
|
||||
|
||||
/** The number of bytes the final hash value will have (SHA-256 variant). */
|
||||
private static final int SHA256_BLOCKSIZE = 32;
|
||||
|
||||
/** The prefixes that can be used to identify this crypt() variant (SHA-256). */
|
||||
static final String SHA256_PREFIX = "$5$";
|
||||
|
||||
/** The number of bytes the final hash value will have (SHA-512 variant). */
|
||||
private static final int SHA512_BLOCKSIZE = 64;
|
||||
|
||||
/** The prefixes that can be used to identify this crypt() variant (SHA-512). */
|
||||
static final String SHA512_PREFIX = "$6$";
|
||||
|
||||
/** The pattern to match valid salt values. */
|
||||
private static final Pattern SALT_PATTERN = Pattern
|
||||
.compile("^\\$([56])\\$(rounds=(\\d+)\\$)?([\\.\\/a-zA-Z0-9]{1,16}).*");
|
||||
|
||||
/**
|
||||
* Generates a libc crypt() compatible "$5$" hash value with random salt.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext to hash
|
||||
* @return complete hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String sha256Crypt(final byte[] keyBytes) {
|
||||
return sha256Crypt(keyBytes, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc6 crypt() compatible "$5$" hash value.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext to hash
|
||||
* @param salt
|
||||
* real salt value without prefix or "rounds="
|
||||
* @return complete hash value including salt
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String sha256Crypt(final byte[] keyBytes, String salt) {
|
||||
if (salt == null) {
|
||||
salt = SHA256_PREFIX + B64.getRandomSalt(8);
|
||||
}
|
||||
return sha2Crypt(keyBytes, salt, SHA256_PREFIX, SHA256_BLOCKSIZE, MessageDigestAlgorithms.SHA_256);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc6 crypt() compatible "$5$" or "$6$" SHA2 based hash value.
|
||||
* <p>
|
||||
* This is a nearly line by line conversion of the original C function. The numbered comments are from the algorithm
|
||||
* description, the short C-style ones from the original C code and the ones with "Remark" from me.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext to hash
|
||||
* @param salt
|
||||
* real salt value without prefix or "rounds="
|
||||
* @param saltPrefix
|
||||
* either $5$ or $6$
|
||||
* @param blocksize
|
||||
* a value that differs between $5$ and $6$
|
||||
* @param algorithm
|
||||
* {@link MessageDigest} algorithm identifier string
|
||||
* @return complete hash value including prefix and salt
|
||||
* @throws IllegalArgumentException
|
||||
* if the given salt is <code>null</code> or does not match the allowed pattern
|
||||
* @throws IllegalArgumentException
|
||||
* when a {@link NoSuchAlgorithmException} is caught
|
||||
* @see MessageDigestAlgorithms
|
||||
*/
|
||||
private static String sha2Crypt(final byte[] keyBytes, final String salt, final String saltPrefix,
|
||||
final int blocksize, final String algorithm) {
|
||||
|
||||
final int keyLen = keyBytes.length;
|
||||
|
||||
// Extracts effective salt and the number of rounds from the given salt.
|
||||
int rounds = ROUNDS_DEFAULT;
|
||||
boolean roundsCustom = false;
|
||||
if (salt == null) {
|
||||
throw new IllegalArgumentException("Salt must not be null");
|
||||
}
|
||||
|
||||
final Matcher m = SALT_PATTERN.matcher(salt);
|
||||
if (m == null || !m.find()) {
|
||||
throw new IllegalArgumentException("Invalid salt value: " + salt);
|
||||
}
|
||||
if (m.group(3) != null) {
|
||||
rounds = Integer.parseInt(m.group(3));
|
||||
rounds = Math.max(ROUNDS_MIN, Math.min(ROUNDS_MAX, rounds));
|
||||
roundsCustom = true;
|
||||
}
|
||||
final String saltString = m.group(4);
|
||||
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8);
|
||||
final int saltLen = saltBytes.length;
|
||||
|
||||
// 1. start digest A
|
||||
// Prepare for the real work.
|
||||
MessageDigest ctx = DigestUtils.getDigest(algorithm);
|
||||
|
||||
// 2. the password string is added to digest A
|
||||
/*
|
||||
* Add the key string.
|
||||
*/
|
||||
ctx.update(keyBytes);
|
||||
|
||||
// 3. the salt string is added to digest A. This is just the salt string
|
||||
// itself without the enclosing '$', without the magic salt_prefix $5$ and
|
||||
// $6$ respectively and without the rounds=<N> specification.
|
||||
//
|
||||
// NB: the MD5 algorithm did add the $1$ salt_prefix. This is not deemed
|
||||
// necessary since it is a constant string and does not add security
|
||||
// and /possibly/ allows a plain text attack. Since the rounds=<N>
|
||||
// specification should never be added this would also create an
|
||||
// inconsistency.
|
||||
/*
|
||||
* The last part is the salt string. This must be at most 16 characters and it ends at the first `$' character
|
||||
* (for compatibility with existing implementations).
|
||||
*/
|
||||
ctx.update(saltBytes);
|
||||
|
||||
// 4. start digest B
|
||||
/*
|
||||
* Compute alternate sha512 sum with input KEY, SALT, and KEY. The final result will be added to the first
|
||||
* context.
|
||||
*/
|
||||
MessageDigest altCtx = DigestUtils.getDigest(algorithm);
|
||||
|
||||
// 5. add the password to digest B
|
||||
/*
|
||||
* Add key.
|
||||
*/
|
||||
altCtx.update(keyBytes);
|
||||
|
||||
// 6. add the salt string to digest B
|
||||
/*
|
||||
* Add salt.
|
||||
*/
|
||||
altCtx.update(saltBytes);
|
||||
|
||||
// 7. add the password again to digest B
|
||||
/*
|
||||
* Add key again.
|
||||
*/
|
||||
altCtx.update(keyBytes);
|
||||
|
||||
// 8. finish digest B
|
||||
/*
|
||||
* Now get result of this (32 bytes) and add it to the other context.
|
||||
*/
|
||||
byte[] altResult = altCtx.digest();
|
||||
|
||||
// 9. For each block of 32 or 64 bytes in the password string (excluding
|
||||
// the terminating NUL in the C representation), add digest B to digest A
|
||||
/*
|
||||
* Add for any character in the key one byte of the alternate sum.
|
||||
*/
|
||||
/*
|
||||
* (Remark: the C code comment seems wrong for key length > 32!)
|
||||
*/
|
||||
int cnt = keyBytes.length;
|
||||
while (cnt > blocksize) {
|
||||
ctx.update(altResult, 0, blocksize);
|
||||
cnt -= blocksize;
|
||||
}
|
||||
|
||||
// 10. For the remaining N bytes of the password string add the first
|
||||
// N bytes of digest B to digest A
|
||||
ctx.update(altResult, 0, cnt);
|
||||
|
||||
// 11. For each bit of the binary representation of the length of the
|
||||
// password string up to and including the highest 1-digit, starting
|
||||
// from to lowest bit position (numeric value 1):
|
||||
//
|
||||
// a) for a 1-digit add digest B to digest A
|
||||
//
|
||||
// b) for a 0-digit add the password string
|
||||
//
|
||||
// NB: this step differs significantly from the MD5 algorithm. It
|
||||
// adds more randomness.
|
||||
/*
|
||||
* Take the binary representation of the length of the key and for every 1 add the alternate sum, for every 0
|
||||
* the key.
|
||||
*/
|
||||
cnt = keyBytes.length;
|
||||
while (cnt > 0) {
|
||||
if ((cnt & 1) != 0) {
|
||||
ctx.update(altResult, 0, blocksize);
|
||||
} else {
|
||||
ctx.update(keyBytes);
|
||||
}
|
||||
cnt >>= 1;
|
||||
}
|
||||
|
||||
// 12. finish digest A
|
||||
/*
|
||||
* Create intermediate result.
|
||||
*/
|
||||
altResult = ctx.digest();
|
||||
|
||||
// 13. start digest DP
|
||||
/*
|
||||
* Start computation of P byte sequence.
|
||||
*/
|
||||
altCtx = DigestUtils.getDigest(algorithm);
|
||||
|
||||
// 14. for every byte in the password (excluding the terminating NUL byte
|
||||
// in the C representation of the string)
|
||||
//
|
||||
// add the password to digest DP
|
||||
/*
|
||||
* For every character in the password add the entire password.
|
||||
*/
|
||||
for (int i = 1; i <= keyLen; i++) {
|
||||
altCtx.update(keyBytes);
|
||||
}
|
||||
|
||||
// 15. finish digest DP
|
||||
/*
|
||||
* Finish the digest.
|
||||
*/
|
||||
byte[] tempResult = altCtx.digest();
|
||||
|
||||
// 16. produce byte sequence P of the same length as the password where
|
||||
//
|
||||
// a) for each block of 32 or 64 bytes of length of the password string
|
||||
// the entire digest DP is used
|
||||
//
|
||||
// b) for the remaining N (up to 31 or 63) bytes use the first N
|
||||
// bytes of digest DP
|
||||
/*
|
||||
* Create byte sequence P.
|
||||
*/
|
||||
final byte[] pBytes = new byte[keyLen];
|
||||
int cp = 0;
|
||||
while (cp < keyLen - blocksize) {
|
||||
System.arraycopy(tempResult, 0, pBytes, cp, blocksize);
|
||||
cp += blocksize;
|
||||
}
|
||||
System.arraycopy(tempResult, 0, pBytes, cp, keyLen - cp);
|
||||
|
||||
// 17. start digest DS
|
||||
/*
|
||||
* Start computation of S byte sequence.
|
||||
*/
|
||||
altCtx = DigestUtils.getDigest(algorithm);
|
||||
|
||||
// 18. repeast the following 16+A[0] times, where A[0] represents the first
|
||||
// byte in digest A interpreted as an 8-bit unsigned value
|
||||
//
|
||||
// add the salt to digest DS
|
||||
/*
|
||||
* For every character in the password add the entire password.
|
||||
*/
|
||||
for (int i = 1; i <= 16 + (altResult[0] & 0xff); i++) {
|
||||
altCtx.update(saltBytes);
|
||||
}
|
||||
|
||||
// 19. finish digest DS
|
||||
/*
|
||||
* Finish the digest.
|
||||
*/
|
||||
tempResult = altCtx.digest();
|
||||
|
||||
// 20. produce byte sequence S of the same length as the salt string where
|
||||
//
|
||||
// a) for each block of 32 or 64 bytes of length of the salt string
|
||||
// the entire digest DS is used
|
||||
//
|
||||
// b) for the remaining N (up to 31 or 63) bytes use the first N
|
||||
// bytes of digest DS
|
||||
/*
|
||||
* Create byte sequence S.
|
||||
*/
|
||||
// Remark: The salt is limited to 16 chars, how does this make sense?
|
||||
final byte[] sBytes = new byte[saltLen];
|
||||
cp = 0;
|
||||
while (cp < saltLen - blocksize) {
|
||||
System.arraycopy(tempResult, 0, sBytes, cp, blocksize);
|
||||
cp += blocksize;
|
||||
}
|
||||
System.arraycopy(tempResult, 0, sBytes, cp, saltLen - cp);
|
||||
|
||||
// 21. repeat a loop according to the number specified in the rounds=<N>
|
||||
// specification in the salt (or the default value if none is
|
||||
// present). Each round is numbered, starting with 0 and up to N-1.
|
||||
//
|
||||
// The loop uses a digest as input. In the first round it is the
|
||||
// digest produced in step 12. In the latter steps it is the digest
|
||||
// produced in step 21.h. The following text uses the notation
|
||||
// "digest A/C" to describe this behavior.
|
||||
/*
|
||||
* Repeatedly run the collected hash value through sha512 to burn CPU cycles.
|
||||
*/
|
||||
for (int i = 0; i <= rounds - 1; i++) {
|
||||
// a) start digest C
|
||||
/*
|
||||
* New context.
|
||||
*/
|
||||
ctx = DigestUtils.getDigest(algorithm);
|
||||
|
||||
// b) for odd round numbers add the byte sequense P to digest C
|
||||
// c) for even round numbers add digest A/C
|
||||
/*
|
||||
* Add key or last result.
|
||||
*/
|
||||
if ((i & 1) != 0) {
|
||||
ctx.update(pBytes, 0, keyLen);
|
||||
} else {
|
||||
ctx.update(altResult, 0, blocksize);
|
||||
}
|
||||
|
||||
// d) for all round numbers not divisible by 3 add the byte sequence S
|
||||
/*
|
||||
* Add salt for numbers not divisible by 3.
|
||||
*/
|
||||
if (i % 3 != 0) {
|
||||
ctx.update(sBytes, 0, saltLen);
|
||||
}
|
||||
|
||||
// e) for all round numbers not divisible by 7 add the byte sequence P
|
||||
/*
|
||||
* Add key for numbers not divisible by 7.
|
||||
*/
|
||||
if (i % 7 != 0) {
|
||||
ctx.update(pBytes, 0, keyLen);
|
||||
}
|
||||
|
||||
// f) for odd round numbers add digest A/C
|
||||
// g) for even round numbers add the byte sequence P
|
||||
/*
|
||||
* Add key or last result.
|
||||
*/
|
||||
if ((i & 1) != 0) {
|
||||
ctx.update(altResult, 0, blocksize);
|
||||
} else {
|
||||
ctx.update(pBytes, 0, keyLen);
|
||||
}
|
||||
|
||||
// h) finish digest C.
|
||||
/*
|
||||
* Create intermediate result.
|
||||
*/
|
||||
altResult = ctx.digest();
|
||||
}
|
||||
|
||||
// 22. Produce the output string. This is an ASCII string of the maximum
|
||||
// size specified above, consisting of multiple pieces:
|
||||
//
|
||||
// a) the salt salt_prefix, $5$ or $6$ respectively
|
||||
//
|
||||
// b) the rounds=<N> specification, if one was present in the input
|
||||
// salt string. A trailing '$' is added in this case to separate
|
||||
// the rounds specification from the following text.
|
||||
//
|
||||
// c) the salt string truncated to 16 characters
|
||||
//
|
||||
// d) a '$' character
|
||||
/*
|
||||
* Now we can construct the result string. It consists of three parts.
|
||||
*/
|
||||
final StringBuilder buffer = new StringBuilder(saltPrefix);
|
||||
if (roundsCustom) {
|
||||
buffer.append(ROUNDS_PREFIX);
|
||||
buffer.append(rounds);
|
||||
buffer.append("$");
|
||||
}
|
||||
buffer.append(saltString);
|
||||
buffer.append("$");
|
||||
|
||||
// e) the base-64 encoded final C digest. The encoding used is as
|
||||
// follows:
|
||||
// [...]
|
||||
//
|
||||
// Each group of three bytes from the digest produces four
|
||||
// characters as output:
|
||||
//
|
||||
// 1. character: the six low bits of the first byte
|
||||
// 2. character: the two high bits of the first byte and the
|
||||
// four low bytes from the second byte
|
||||
// 3. character: the four high bytes from the second byte and
|
||||
// the two low bits from the third byte
|
||||
// 4. character: the six high bits from the third byte
|
||||
//
|
||||
// The groups of three bytes are as follows (in this sequence).
|
||||
// These are the indices into the byte array containing the
|
||||
// digest, starting with index 0. For the last group there are
|
||||
// not enough bytes left in the digest and the value zero is used
|
||||
// in its place. This group also produces only three or two
|
||||
// characters as output for SHA-512 and SHA-512 respectively.
|
||||
|
||||
// This was just a safeguard in the C implementation:
|
||||
// int buflen = salt_prefix.length() - 1 + ROUNDS_PREFIX.length() + 9 + 1 + salt_string.length() + 1 + 86 + 1;
|
||||
|
||||
if (blocksize == 32) {
|
||||
B64.b64from24bit(altResult[0], altResult[10], altResult[20], 4, buffer);
|
||||
B64.b64from24bit(altResult[21], altResult[1], altResult[11], 4, buffer);
|
||||
B64.b64from24bit(altResult[12], altResult[22], altResult[2], 4, buffer);
|
||||
B64.b64from24bit(altResult[3], altResult[13], altResult[23], 4, buffer);
|
||||
B64.b64from24bit(altResult[24], altResult[4], altResult[14], 4, buffer);
|
||||
B64.b64from24bit(altResult[15], altResult[25], altResult[5], 4, buffer);
|
||||
B64.b64from24bit(altResult[6], altResult[16], altResult[26], 4, buffer);
|
||||
B64.b64from24bit(altResult[27], altResult[7], altResult[17], 4, buffer);
|
||||
B64.b64from24bit(altResult[18], altResult[28], altResult[8], 4, buffer);
|
||||
B64.b64from24bit(altResult[9], altResult[19], altResult[29], 4, buffer);
|
||||
B64.b64from24bit((byte) 0, altResult[31], altResult[30], 3, buffer);
|
||||
} else {
|
||||
B64.b64from24bit(altResult[0], altResult[21], altResult[42], 4, buffer);
|
||||
B64.b64from24bit(altResult[22], altResult[43], altResult[1], 4, buffer);
|
||||
B64.b64from24bit(altResult[44], altResult[2], altResult[23], 4, buffer);
|
||||
B64.b64from24bit(altResult[3], altResult[24], altResult[45], 4, buffer);
|
||||
B64.b64from24bit(altResult[25], altResult[46], altResult[4], 4, buffer);
|
||||
B64.b64from24bit(altResult[47], altResult[5], altResult[26], 4, buffer);
|
||||
B64.b64from24bit(altResult[6], altResult[27], altResult[48], 4, buffer);
|
||||
B64.b64from24bit(altResult[28], altResult[49], altResult[7], 4, buffer);
|
||||
B64.b64from24bit(altResult[50], altResult[8], altResult[29], 4, buffer);
|
||||
B64.b64from24bit(altResult[9], altResult[30], altResult[51], 4, buffer);
|
||||
B64.b64from24bit(altResult[31], altResult[52], altResult[10], 4, buffer);
|
||||
B64.b64from24bit(altResult[53], altResult[11], altResult[32], 4, buffer);
|
||||
B64.b64from24bit(altResult[12], altResult[33], altResult[54], 4, buffer);
|
||||
B64.b64from24bit(altResult[34], altResult[55], altResult[13], 4, buffer);
|
||||
B64.b64from24bit(altResult[56], altResult[14], altResult[35], 4, buffer);
|
||||
B64.b64from24bit(altResult[15], altResult[36], altResult[57], 4, buffer);
|
||||
B64.b64from24bit(altResult[37], altResult[58], altResult[16], 4, buffer);
|
||||
B64.b64from24bit(altResult[59], altResult[17], altResult[38], 4, buffer);
|
||||
B64.b64from24bit(altResult[18], altResult[39], altResult[60], 4, buffer);
|
||||
B64.b64from24bit(altResult[40], altResult[61], altResult[19], 4, buffer);
|
||||
B64.b64from24bit(altResult[62], altResult[20], altResult[41], 4, buffer);
|
||||
B64.b64from24bit((byte) 0, (byte) 0, altResult[63], 2, buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the buffer for the intermediate result so that people attaching to processes or reading core dumps
|
||||
* cannot get any information.
|
||||
*/
|
||||
// Is there a better way to do this with the JVM?
|
||||
Arrays.fill(tempResult, (byte) 0);
|
||||
Arrays.fill(pBytes, (byte) 0);
|
||||
Arrays.fill(sBytes, (byte) 0);
|
||||
ctx.reset();
|
||||
altCtx.reset();
|
||||
Arrays.fill(keyBytes, (byte) 0);
|
||||
Arrays.fill(saltBytes, (byte) 0);
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc crypt() compatible "$6$" hash value with random salt.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext to hash
|
||||
* @return complete hash value
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String sha512Crypt(final byte[] keyBytes) {
|
||||
return sha512Crypt(keyBytes, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a libc6 crypt() compatible "$6$" hash value.
|
||||
* <p>
|
||||
* See {@link Crypt#crypt(String, String)} for details.
|
||||
*
|
||||
* @param keyBytes
|
||||
* plaintext to hash
|
||||
* @param salt
|
||||
* real salt value without prefix or "rounds="
|
||||
* @return complete hash value including salt
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
* @throws RuntimeException
|
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught.
|
||||
*/
|
||||
public static String sha512Crypt(final byte[] keyBytes, String salt) {
|
||||
if (salt == null) {
|
||||
salt = SHA512_PREFIX + B64.getRandomSalt(8);
|
||||
}
|
||||
return sha2Crypt(keyBytes, salt, SHA512_PREFIX, SHA512_BLOCKSIZE, MessageDigestAlgorithms.SHA_512);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,413 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.digest;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
|
||||
/**
|
||||
* Unix crypt(3) algorithm implementation.
|
||||
* <p>
|
||||
* This class only implements the traditional 56 bit DES based algorithm. Please use DigestUtils.crypt() for a method
|
||||
* that distinguishes between all the algorithms supported in the current glibc's crypt().
|
||||
* <p>
|
||||
* The Java implementation was taken from the JetSpeed Portal project (see
|
||||
* org.apache.jetspeed.services.security.ldap.UnixCrypt).
|
||||
* <p>
|
||||
* This class is slightly incompatible if the given salt contains characters that are not part of the allowed range
|
||||
* [a-zA-Z0-9./].
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.7
|
||||
*/
|
||||
public class UnixCrypt {
|
||||
|
||||
private static final int CON_SALT[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 6,
|
||||
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
|
||||
34, 35, 36, 37, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 0, 0, 0, 0 };
|
||||
|
||||
private static final int COV2CHAR[] = { 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102,
|
||||
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122 };
|
||||
|
||||
private static final char SALT_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./"
|
||||
.toCharArray();
|
||||
|
||||
private static final boolean SHIFT2[] = { false, false, true, true, true, true, true, true, false, true, true,
|
||||
true, true, true, true, false };
|
||||
|
||||
private static final int SKB[][] = {
|
||||
{ 0, 16, 0x20000000, 0x20000010, 0x10000, 0x10010, 0x20010000, 0x20010010, 2048, 2064, 0x20000800,
|
||||
0x20000810, 0x10800, 0x10810, 0x20010800, 0x20010810, 32, 48, 0x20000020, 0x20000030, 0x10020,
|
||||
0x10030, 0x20010020, 0x20010030, 2080, 2096, 0x20000820, 0x20000830, 0x10820, 0x10830, 0x20010820,
|
||||
0x20010830, 0x80000, 0x80010, 0x20080000, 0x20080010, 0x90000, 0x90010, 0x20090000, 0x20090010,
|
||||
0x80800, 0x80810, 0x20080800, 0x20080810, 0x90800, 0x90810, 0x20090800, 0x20090810, 0x80020,
|
||||
0x80030, 0x20080020, 0x20080030, 0x90020, 0x90030, 0x20090020, 0x20090030, 0x80820, 0x80830,
|
||||
0x20080820, 0x20080830, 0x90820, 0x90830, 0x20090820, 0x20090830 },
|
||||
{ 0, 0x2000000, 8192, 0x2002000, 0x200000, 0x2200000, 0x202000, 0x2202000, 4, 0x2000004, 8196, 0x2002004,
|
||||
0x200004, 0x2200004, 0x202004, 0x2202004, 1024, 0x2000400, 9216, 0x2002400, 0x200400, 0x2200400,
|
||||
0x202400, 0x2202400, 1028, 0x2000404, 9220, 0x2002404, 0x200404, 0x2200404, 0x202404, 0x2202404,
|
||||
0x10000000, 0x12000000, 0x10002000, 0x12002000, 0x10200000, 0x12200000, 0x10202000, 0x12202000,
|
||||
0x10000004, 0x12000004, 0x10002004, 0x12002004, 0x10200004, 0x12200004, 0x10202004, 0x12202004,
|
||||
0x10000400, 0x12000400, 0x10002400, 0x12002400, 0x10200400, 0x12200400, 0x10202400, 0x12202400,
|
||||
0x10000404, 0x12000404, 0x10002404, 0x12002404, 0x10200404, 0x12200404, 0x10202404, 0x12202404 },
|
||||
{ 0, 1, 0x40000, 0x40001, 0x1000000, 0x1000001, 0x1040000, 0x1040001, 2, 3, 0x40002, 0x40003, 0x1000002,
|
||||
0x1000003, 0x1040002, 0x1040003, 512, 513, 0x40200, 0x40201, 0x1000200, 0x1000201, 0x1040200,
|
||||
0x1040201, 514, 515, 0x40202, 0x40203, 0x1000202, 0x1000203, 0x1040202, 0x1040203, 0x8000000,
|
||||
0x8000001, 0x8040000, 0x8040001, 0x9000000, 0x9000001, 0x9040000, 0x9040001, 0x8000002, 0x8000003,
|
||||
0x8040002, 0x8040003, 0x9000002, 0x9000003, 0x9040002, 0x9040003, 0x8000200, 0x8000201, 0x8040200,
|
||||
0x8040201, 0x9000200, 0x9000201, 0x9040200, 0x9040201, 0x8000202, 0x8000203, 0x8040202, 0x8040203,
|
||||
0x9000202, 0x9000203, 0x9040202, 0x9040203 },
|
||||
{ 0, 0x100000, 256, 0x100100, 8, 0x100008, 264, 0x100108, 4096, 0x101000, 4352, 0x101100, 4104, 0x101008,
|
||||
4360, 0x101108, 0x4000000, 0x4100000, 0x4000100, 0x4100100, 0x4000008, 0x4100008, 0x4000108,
|
||||
0x4100108, 0x4001000, 0x4101000, 0x4001100, 0x4101100, 0x4001008, 0x4101008, 0x4001108, 0x4101108,
|
||||
0x20000, 0x120000, 0x20100, 0x120100, 0x20008, 0x120008, 0x20108, 0x120108, 0x21000, 0x121000,
|
||||
0x21100, 0x121100, 0x21008, 0x121008, 0x21108, 0x121108, 0x4020000, 0x4120000, 0x4020100,
|
||||
0x4120100, 0x4020008, 0x4120008, 0x4020108, 0x4120108, 0x4021000, 0x4121000, 0x4021100, 0x4121100,
|
||||
0x4021008, 0x4121008, 0x4021108, 0x4121108 },
|
||||
{ 0, 0x10000000, 0x10000, 0x10010000, 4, 0x10000004, 0x10004, 0x10010004, 0x20000000, 0x30000000,
|
||||
0x20010000, 0x30010000, 0x20000004, 0x30000004, 0x20010004, 0x30010004, 0x100000, 0x10100000,
|
||||
0x110000, 0x10110000, 0x100004, 0x10100004, 0x110004, 0x10110004, 0x20100000, 0x30100000,
|
||||
0x20110000, 0x30110000, 0x20100004, 0x30100004, 0x20110004, 0x30110004, 4096, 0x10001000, 0x11000,
|
||||
0x10011000, 4100, 0x10001004, 0x11004, 0x10011004, 0x20001000, 0x30001000, 0x20011000, 0x30011000,
|
||||
0x20001004, 0x30001004, 0x20011004, 0x30011004, 0x101000, 0x10101000, 0x111000, 0x10111000,
|
||||
0x101004, 0x10101004, 0x111004, 0x10111004, 0x20101000, 0x30101000, 0x20111000, 0x30111000,
|
||||
0x20101004, 0x30101004, 0x20111004, 0x30111004 },
|
||||
{ 0, 0x8000000, 8, 0x8000008, 1024, 0x8000400, 1032, 0x8000408, 0x20000, 0x8020000, 0x20008, 0x8020008,
|
||||
0x20400, 0x8020400, 0x20408, 0x8020408, 1, 0x8000001, 9, 0x8000009, 1025, 0x8000401, 1033,
|
||||
0x8000409, 0x20001, 0x8020001, 0x20009, 0x8020009, 0x20401, 0x8020401, 0x20409, 0x8020409,
|
||||
0x2000000, 0xa000000, 0x2000008, 0xa000008, 0x2000400, 0xa000400, 0x2000408, 0xa000408, 0x2020000,
|
||||
0xa020000, 0x2020008, 0xa020008, 0x2020400, 0xa020400, 0x2020408, 0xa020408, 0x2000001, 0xa000001,
|
||||
0x2000009, 0xa000009, 0x2000401, 0xa000401, 0x2000409, 0xa000409, 0x2020001, 0xa020001, 0x2020009,
|
||||
0xa020009, 0x2020401, 0xa020401, 0x2020409, 0xa020409 },
|
||||
{ 0, 256, 0x80000, 0x80100, 0x1000000, 0x1000100, 0x1080000, 0x1080100, 16, 272, 0x80010, 0x80110,
|
||||
0x1000010, 0x1000110, 0x1080010, 0x1080110, 0x200000, 0x200100, 0x280000, 0x280100, 0x1200000,
|
||||
0x1200100, 0x1280000, 0x1280100, 0x200010, 0x200110, 0x280010, 0x280110, 0x1200010, 0x1200110,
|
||||
0x1280010, 0x1280110, 512, 768, 0x80200, 0x80300, 0x1000200, 0x1000300, 0x1080200, 0x1080300, 528,
|
||||
784, 0x80210, 0x80310, 0x1000210, 0x1000310, 0x1080210, 0x1080310, 0x200200, 0x200300, 0x280200,
|
||||
0x280300, 0x1200200, 0x1200300, 0x1280200, 0x1280300, 0x200210, 0x200310, 0x280210, 0x280310,
|
||||
0x1200210, 0x1200310, 0x1280210, 0x1280310 },
|
||||
{ 0, 0x4000000, 0x40000, 0x4040000, 2, 0x4000002, 0x40002, 0x4040002, 8192, 0x4002000, 0x42000, 0x4042000,
|
||||
8194, 0x4002002, 0x42002, 0x4042002, 32, 0x4000020, 0x40020, 0x4040020, 34, 0x4000022, 0x40022,
|
||||
0x4040022, 8224, 0x4002020, 0x42020, 0x4042020, 8226, 0x4002022, 0x42022, 0x4042022, 2048,
|
||||
0x4000800, 0x40800, 0x4040800, 2050, 0x4000802, 0x40802, 0x4040802, 10240, 0x4002800, 0x42800,
|
||||
0x4042800, 10242, 0x4002802, 0x42802, 0x4042802, 2080, 0x4000820, 0x40820, 0x4040820, 2082,
|
||||
0x4000822, 0x40822, 0x4040822, 10272, 0x4002820, 0x42820, 0x4042820, 10274, 0x4002822, 0x42822,
|
||||
0x4042822 } };
|
||||
|
||||
private static final int SPTRANS[][] = {
|
||||
{ 0x820200, 0x20000, 0x80800000, 0x80820200, 0x800000, 0x80020200, 0x80020000, 0x80800000, 0x80020200,
|
||||
0x820200, 0x820000, 0x80000200, 0x80800200, 0x800000, 0, 0x80020000, 0x20000, 0x80000000,
|
||||
0x800200, 0x20200, 0x80820200, 0x820000, 0x80000200, 0x800200, 0x80000000, 512, 0x20200,
|
||||
0x80820000, 512, 0x80800200, 0x80820000, 0, 0, 0x80820200, 0x800200, 0x80020000, 0x820200,
|
||||
0x20000, 0x80000200, 0x800200, 0x80820000, 512, 0x20200, 0x80800000, 0x80020200, 0x80000000,
|
||||
0x80800000, 0x820000, 0x80820200, 0x20200, 0x820000, 0x80800200, 0x800000, 0x80000200, 0x80020000,
|
||||
0, 0x20000, 0x800000, 0x80800200, 0x820200, 0x80000000, 0x80820000, 512, 0x80020200 },
|
||||
{ 0x10042004, 0, 0x42000, 0x10040000, 0x10000004, 8196, 0x10002000, 0x42000, 8192, 0x10040004, 4,
|
||||
0x10002000, 0x40004, 0x10042000, 0x10040000, 4, 0x40000, 0x10002004, 0x10040004, 8192, 0x42004,
|
||||
0x10000000, 0, 0x40004, 0x10002004, 0x42004, 0x10042000, 0x10000004, 0x10000000, 0x40000, 8196,
|
||||
0x10042004, 0x40004, 0x10042000, 0x10002000, 0x42004, 0x10042004, 0x40004, 0x10000004, 0,
|
||||
0x10000000, 8196, 0x40000, 0x10040004, 8192, 0x10000000, 0x42004, 0x10002004, 0x10042000, 8192, 0,
|
||||
0x10000004, 4, 0x10042004, 0x42000, 0x10040000, 0x10040004, 0x40000, 8196, 0x10002000, 0x10002004,
|
||||
4, 0x10040000, 0x42000 },
|
||||
{ 0x41000000, 0x1010040, 64, 0x41000040, 0x40010000, 0x1000000, 0x41000040, 0x10040, 0x1000040, 0x10000,
|
||||
0x1010000, 0x40000000, 0x41010040, 0x40000040, 0x40000000, 0x41010000, 0, 0x40010000, 0x1010040,
|
||||
64, 0x40000040, 0x41010040, 0x10000, 0x41000000, 0x41010000, 0x1000040, 0x40010040, 0x1010000,
|
||||
0x10040, 0, 0x1000000, 0x40010040, 0x1010040, 64, 0x40000000, 0x10000, 0x40000040, 0x40010000,
|
||||
0x1010000, 0x41000040, 0, 0x1010040, 0x10040, 0x41010000, 0x40010000, 0x1000000, 0x41010040,
|
||||
0x40000000, 0x40010040, 0x41000000, 0x1000000, 0x41010040, 0x10000, 0x1000040, 0x41000040,
|
||||
0x10040, 0x1000040, 0, 0x41010000, 0x40000040, 0x41000000, 0x40010040, 64, 0x1010000 },
|
||||
{ 0x100402, 0x4000400, 2, 0x4100402, 0, 0x4100000, 0x4000402, 0x100002, 0x4100400, 0x4000002, 0x4000000,
|
||||
1026, 0x4000002, 0x100402, 0x100000, 0x4000000, 0x4100002, 0x100400, 1024, 2, 0x100400, 0x4000402,
|
||||
0x4100000, 1024, 1026, 0, 0x100002, 0x4100400, 0x4000400, 0x4100002, 0x4100402, 0x100000,
|
||||
0x4100002, 1026, 0x100000, 0x4000002, 0x100400, 0x4000400, 2, 0x4100000, 0x4000402, 0, 1024,
|
||||
0x100002, 0, 0x4100002, 0x4100400, 1024, 0x4000000, 0x4100402, 0x100402, 0x100000, 0x4100402, 2,
|
||||
0x4000400, 0x100402, 0x100002, 0x100400, 0x4100000, 0x4000402, 1026, 0x4000000, 0x4000002,
|
||||
0x4100400 },
|
||||
{ 0x2000000, 16384, 256, 0x2004108, 0x2004008, 0x2000100, 16648, 0x2004000, 16384, 8, 0x2000008, 16640,
|
||||
0x2000108, 0x2004008, 0x2004100, 0, 16640, 0x2000000, 16392, 264, 0x2000100, 16648, 0, 0x2000008,
|
||||
8, 0x2000108, 0x2004108, 16392, 0x2004000, 256, 264, 0x2004100, 0x2004100, 0x2000108, 16392,
|
||||
0x2004000, 16384, 8, 0x2000008, 0x2000100, 0x2000000, 16640, 0x2004108, 0, 16648, 0x2000000, 256,
|
||||
16392, 0x2000108, 256, 0, 0x2004108, 0x2004008, 0x2004100, 264, 16384, 16640, 0x2004008,
|
||||
0x2000100, 264, 8, 16648, 0x2004000, 0x2000008 },
|
||||
{ 0x20000010, 0x80010, 0, 0x20080800, 0x80010, 2048, 0x20000810, 0x80000, 2064, 0x20080810, 0x80800,
|
||||
0x20000000, 0x20000800, 0x20000010, 0x20080000, 0x80810, 0x80000, 0x20000810, 0x20080010, 0, 2048,
|
||||
16, 0x20080800, 0x20080010, 0x20080810, 0x20080000, 0x20000000, 2064, 16, 0x80800, 0x80810,
|
||||
0x20000800, 2064, 0x20000000, 0x20000800, 0x80810, 0x20080800, 0x80010, 0, 0x20000800, 0x20000000,
|
||||
2048, 0x20080010, 0x80000, 0x80010, 0x20080810, 0x80800, 16, 0x20080810, 0x80800, 0x80000,
|
||||
0x20000810, 0x20000010, 0x20080000, 0x80810, 0, 2048, 0x20000010, 0x20000810, 0x20080800,
|
||||
0x20080000, 2064, 16, 0x20080010 },
|
||||
{ 4096, 128, 0x400080, 0x400001, 0x401081, 4097, 4224, 0, 0x400000, 0x400081, 129, 0x401000, 1, 0x401080,
|
||||
0x401000, 129, 0x400081, 4096, 4097, 0x401081, 0, 0x400080, 0x400001, 4224, 0x401001, 4225,
|
||||
0x401080, 1, 4225, 0x401001, 128, 0x400000, 4225, 0x401000, 0x401001, 129, 4096, 128, 0x400000,
|
||||
0x401001, 0x400081, 4225, 4224, 0, 128, 0x400001, 1, 0x400080, 0, 0x400081, 0x400080, 4224, 129,
|
||||
4096, 0x401081, 0x400000, 0x401080, 1, 4097, 0x401081, 0x400001, 0x401080, 0x401000, 4097 },
|
||||
{ 0x8200020, 0x8208000, 32800, 0, 0x8008000, 0x200020, 0x8200000, 0x8208020, 32, 0x8000000, 0x208000,
|
||||
32800, 0x208020, 0x8008020, 0x8000020, 0x8200000, 32768, 0x208020, 0x200020, 0x8008000, 0x8208020,
|
||||
0x8000020, 0, 0x208000, 0x8000000, 0x200000, 0x8008020, 0x8200020, 0x200000, 32768, 0x8208000, 32,
|
||||
0x200000, 32768, 0x8000020, 0x8208020, 32800, 0x8000000, 0, 0x208000, 0x8200020, 0x8008020,
|
||||
0x8008000, 0x200020, 0x8208000, 32, 0x200020, 0x8008000, 0x8208020, 0x200000, 0x8200000,
|
||||
0x8000020, 0x208000, 32800, 0x8008020, 0x8200000, 32, 0x8208000, 0x208020, 0, 0x8000000,
|
||||
0x8200020, 32768, 0x208020 } };
|
||||
|
||||
/**
|
||||
* Generates a crypt(3) compatible hash using the DES algorithm.
|
||||
* <p>
|
||||
* As no salt is given, a random one will be used.
|
||||
*
|
||||
* @param original
|
||||
* plaintext password
|
||||
* @return a 13 character string starting with the salt string
|
||||
*/
|
||||
public static String crypt(final byte[] original) {
|
||||
return crypt(original, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a crypt(3) compatible hash using the DES algorithm.
|
||||
* <p>
|
||||
* Using unspecified characters as salt results incompatible hash values.
|
||||
*
|
||||
* @param original
|
||||
* plaintext password
|
||||
* @param salt
|
||||
* a two character string drawn from [a-zA-Z0-9./] or null for a random one
|
||||
* @return a 13 character string starting with the salt string
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
*/
|
||||
public static String crypt(final byte[] original, String salt) {
|
||||
if (salt == null) {
|
||||
final Random randomGenerator = new Random();
|
||||
final int numSaltChars = SALT_CHARS.length;
|
||||
salt = "" + SALT_CHARS[randomGenerator.nextInt(numSaltChars)] +
|
||||
SALT_CHARS[randomGenerator.nextInt(numSaltChars)];
|
||||
} else if (!salt.matches("^[" + B64.B64T + "]{2,}$")) {
|
||||
throw new IllegalArgumentException("Invalid salt value: " + salt);
|
||||
}
|
||||
|
||||
final StringBuilder buffer = new StringBuilder(" ");
|
||||
final char charZero = salt.charAt(0);
|
||||
final char charOne = salt.charAt(1);
|
||||
buffer.setCharAt(0, charZero);
|
||||
buffer.setCharAt(1, charOne);
|
||||
final int eSwap0 = CON_SALT[charZero];
|
||||
final int eSwap1 = CON_SALT[charOne] << 4;
|
||||
final byte key[] = new byte[8];
|
||||
for (int i = 0; i < key.length; i++) {
|
||||
key[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < key.length && i < original.length; i++) {
|
||||
final int iChar = original[i];
|
||||
key[i] = (byte) (iChar << 1);
|
||||
}
|
||||
|
||||
final int schedule[] = desSetKey(key);
|
||||
final int out[] = body(schedule, eSwap0, eSwap1);
|
||||
final byte b[] = new byte[9];
|
||||
intToFourBytes(out[0], b, 0);
|
||||
intToFourBytes(out[1], b, 4);
|
||||
b[8] = 0;
|
||||
int i = 2;
|
||||
int y = 0;
|
||||
int u = 128;
|
||||
for (; i < 13; i++) {
|
||||
int j = 0;
|
||||
int c = 0;
|
||||
for (; j < 6; j++) {
|
||||
c <<= 1;
|
||||
if ((b[y] & u) != 0) {
|
||||
c |= 0x1;
|
||||
}
|
||||
u >>>= 1;
|
||||
if (u == 0) {
|
||||
y++;
|
||||
u = 128;
|
||||
}
|
||||
buffer.setCharAt(i, (char) COV2CHAR[c]);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a crypt(3) compatible hash using the DES algorithm.
|
||||
* <p>
|
||||
* As no salt is given, a random one is used.
|
||||
*
|
||||
* @param original
|
||||
* plaintext password
|
||||
* @return a 13 character string starting with the salt string
|
||||
*/
|
||||
public static String crypt(final String original) {
|
||||
return crypt(original.getBytes(Charsets.UTF_8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a crypt(3) compatible hash using the DES algorithm.
|
||||
*
|
||||
* @param original
|
||||
* plaintext password
|
||||
* @param salt
|
||||
* a two character string drawn from [a-zA-Z0-9./] or null for a random one
|
||||
* @return a 13 character string starting with the salt string
|
||||
* @throws IllegalArgumentException
|
||||
* if the salt does not match the allowed pattern
|
||||
*/
|
||||
public static String crypt(final String original, final String salt) {
|
||||
return crypt(original.getBytes(Charsets.UTF_8), salt);
|
||||
}
|
||||
|
||||
private static int[] body(final int schedule[], final int eSwap0, final int eSwap1) {
|
||||
int left = 0;
|
||||
int right = 0;
|
||||
int t = 0;
|
||||
for (int j = 0; j < 25; j++) {
|
||||
for (int i = 0; i < 32; i += 4) {
|
||||
left = dEncrypt(left, right, i, eSwap0, eSwap1, schedule);
|
||||
right = dEncrypt(right, left, i + 2, eSwap0, eSwap1, schedule);
|
||||
}
|
||||
t = left;
|
||||
left = right;
|
||||
right = t;
|
||||
}
|
||||
|
||||
t = right;
|
||||
right = left >>> 1 | left << 31;
|
||||
left = t >>> 1 | t << 31;
|
||||
final int results[] = new int[2];
|
||||
permOp(right, left, 1, 0x55555555, results);
|
||||
right = results[0];
|
||||
left = results[1];
|
||||
permOp(left, right, 8, 0xff00ff, results);
|
||||
left = results[0];
|
||||
right = results[1];
|
||||
permOp(right, left, 2, 0x33333333, results);
|
||||
right = results[0];
|
||||
left = results[1];
|
||||
permOp(left, right, 16, 65535, results);
|
||||
left = results[0];
|
||||
right = results[1];
|
||||
permOp(right, left, 4, 0xf0f0f0f, results);
|
||||
right = results[0];
|
||||
left = results[1];
|
||||
final int out[] = new int[2];
|
||||
out[0] = left;
|
||||
out[1] = right;
|
||||
return out;
|
||||
}
|
||||
|
||||
private static int byteToUnsigned(final byte b) {
|
||||
final int value = b;
|
||||
return value < 0 ? value + 256 : value;
|
||||
}
|
||||
|
||||
private static int dEncrypt(int el, final int r, final int s, final int e0, final int e1, final int sArr[]) {
|
||||
int v = r ^ r >>> 16;
|
||||
int u = v & e0;
|
||||
v &= e1;
|
||||
u = u ^ u << 16 ^ r ^ sArr[s];
|
||||
int t = v ^ v << 16 ^ r ^ sArr[s + 1];
|
||||
t = t >>> 4 | t << 28;
|
||||
el ^= SPTRANS[1][t & 0x3f] | SPTRANS[3][t >>> 8 & 0x3f] | SPTRANS[5][t >>> 16 & 0x3f] |
|
||||
SPTRANS[7][t >>> 24 & 0x3f] | SPTRANS[0][u & 0x3f] | SPTRANS[2][u >>> 8 & 0x3f] |
|
||||
SPTRANS[4][u >>> 16 & 0x3f] | SPTRANS[6][u >>> 24 & 0x3f];
|
||||
return el;
|
||||
}
|
||||
|
||||
private static int[] desSetKey(final byte key[]) {
|
||||
final int schedule[] = new int[32];
|
||||
int c = fourBytesToInt(key, 0);
|
||||
int d = fourBytesToInt(key, 4);
|
||||
final int results[] = new int[2];
|
||||
permOp(d, c, 4, 0xf0f0f0f, results);
|
||||
d = results[0];
|
||||
c = results[1];
|
||||
c = hPermOp(c, -2, 0xcccc0000);
|
||||
d = hPermOp(d, -2, 0xcccc0000);
|
||||
permOp(d, c, 1, 0x55555555, results);
|
||||
d = results[0];
|
||||
c = results[1];
|
||||
permOp(c, d, 8, 0xff00ff, results);
|
||||
c = results[0];
|
||||
d = results[1];
|
||||
permOp(d, c, 1, 0x55555555, results);
|
||||
d = results[0];
|
||||
c = results[1];
|
||||
d = (d & 0xff) << 16 | d & 0xff00 | (d & 0xff0000) >>> 16 | (c & 0xf0000000) >>> 4;
|
||||
c &= 0xfffffff;
|
||||
int j = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (SHIFT2[i]) {
|
||||
c = c >>> 2 | c << 26;
|
||||
d = d >>> 2 | d << 26;
|
||||
} else {
|
||||
c = c >>> 1 | c << 27;
|
||||
d = d >>> 1 | d << 27;
|
||||
}
|
||||
c &= 0xfffffff;
|
||||
d &= 0xfffffff;
|
||||
int s = SKB[0][c & 0x3f] | SKB[1][c >>> 6 & 0x3 | c >>> 7 & 0x3c] |
|
||||
SKB[2][c >>> 13 & 0xf | c >>> 14 & 0x30] |
|
||||
SKB[3][c >>> 20 & 0x1 | c >>> 21 & 0x6 | c >>> 22 & 0x38];
|
||||
final int t = SKB[4][d & 0x3f] | SKB[5][d >>> 7 & 0x3 | d >>> 8 & 0x3c] | SKB[6][d >>> 15 & 0x3f] |
|
||||
SKB[7][d >>> 21 & 0xf | d >>> 22 & 0x30];
|
||||
schedule[j++] = (t << 16 | s & 0xffff);
|
||||
s = s >>> 16 | t & 0xffff0000;
|
||||
s = s << 4 | s >>> 28;
|
||||
schedule[j++] = s;
|
||||
}
|
||||
|
||||
return schedule;
|
||||
}
|
||||
|
||||
private static int fourBytesToInt(final byte b[], int offset) {
|
||||
int value = byteToUnsigned(b[offset++]);
|
||||
value |= byteToUnsigned(b[offset++]) << 8;
|
||||
value |= byteToUnsigned(b[offset++]) << 16;
|
||||
value |= byteToUnsigned(b[offset++]) << 24;
|
||||
return value;
|
||||
}
|
||||
|
||||
private static int hPermOp(int a, final int n, final int m) {
|
||||
final int t = (a << 16 - n ^ a) & m;
|
||||
a = a ^ t ^ t >>> 16 - n;
|
||||
return a;
|
||||
}
|
||||
|
||||
private static void intToFourBytes(final int iValue, final byte b[], int offset) {
|
||||
b[offset++] = (byte) (iValue & 0xff);
|
||||
b[offset++] = (byte) (iValue >>> 8 & 0xff);
|
||||
b[offset++] = (byte) (iValue >>> 16 & 0xff);
|
||||
b[offset++] = (byte) (iValue >>> 24 & 0xff);
|
||||
}
|
||||
|
||||
private static void permOp(int a, int b, final int n, final int m, final int results[]) {
|
||||
final int t = (a >>> n ^ b) & m;
|
||||
a ^= t << n;
|
||||
b ^= t;
|
||||
results[0] = a;
|
||||
results[1] = b;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Simplifies common {@link java.security.MessageDigest} tasks and
|
||||
includes a libc crypt(3) compatible crypt method that supports DES,
|
||||
MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
|
||||
specific "$apr1$" variant.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Caverphone value.
|
||||
*
|
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
|
||||
* algorithm:
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
|
||||
* @since 1.5
|
||||
*/
|
||||
public abstract class AbstractCaverphone implements StringEncoder {
|
||||
|
||||
/**
|
||||
* Creates an instance of the Caverphone encoder
|
||||
*/
|
||||
public AbstractCaverphone() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
|
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
|
||||
*
|
||||
* @param source
|
||||
* Object to encode
|
||||
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
|
||||
* supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object source) throws EncoderException {
|
||||
if (!(source instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
|
||||
}
|
||||
return this.encode((String) source);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if the encodings of two strings are equal.
|
||||
*
|
||||
* This method might be promoted to a new AbstractStringEncoder superclass.
|
||||
*
|
||||
* @param str1
|
||||
* First of two strings to compare
|
||||
* @param str2
|
||||
* Second of two strings to compare
|
||||
* @return <code>true</code> if the encodings of these strings are identical, <code>false</code> otherwise.
|
||||
* @throws EncoderException
|
||||
* thrown if there is an error condition during the encoding process.
|
||||
*/
|
||||
public boolean isEncodeEqual(final String str1, final String str2) throws EncoderException {
|
||||
return this.encode(str1).equals(this.encode(str2));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Caverphone 2.0 value. Delegate to a {@link Caverphone2} instance.
|
||||
*
|
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
|
||||
* algorithm:
|
||||
*
|
||||
* @version $Id: Caverphone.java 1079535 2011-03-08 20:54:37Z ggregory $
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
|
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a>
|
||||
* @since 1.4
|
||||
* @deprecated 1.5 Replaced by {@link Caverphone2}, will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
public class Caverphone implements StringEncoder {
|
||||
|
||||
/**
|
||||
* Delegate to a {@link Caverphone2} instance to avoid code duplication.
|
||||
*/
|
||||
final private Caverphone2 encoder = new Caverphone2();
|
||||
|
||||
/**
|
||||
* Creates an instance of the Caverphone encoder
|
||||
*/
|
||||
public Caverphone() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given String into a Caverphone value.
|
||||
*
|
||||
* @param source
|
||||
* String the source string
|
||||
* @return A caverphone code for the given String
|
||||
*/
|
||||
public String caverphone(final String source) {
|
||||
return this.encoder.encode(source);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
|
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
|
||||
* supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
|
||||
}
|
||||
return this.caverphone((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the Caverphone algorithm.
|
||||
*
|
||||
* @param str
|
||||
* String object to encode
|
||||
* @return The caverphone code corresponding to the String supplied
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) {
|
||||
return this.caverphone(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if the caverphones of two strings are identical.
|
||||
*
|
||||
* @param str1
|
||||
* First of two strings to compare
|
||||
* @param str2
|
||||
* Second of two strings to compare
|
||||
* @return <code>true</code> if the caverphones of these strings are identical, <code>false</code> otherwise.
|
||||
*/
|
||||
public boolean isCaverphoneEqual(final String str1, final String str2) {
|
||||
return this.caverphone(str1).equals(this.caverphone(str2));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Caverphone 1.0 value.
|
||||
*
|
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 1.0
|
||||
* algorithm:
|
||||
*
|
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
|
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp060902.pdf">Caverphone 1.0 specification</a>
|
||||
* @since 1.5
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*/
|
||||
public class Caverphone1 extends AbstractCaverphone {
|
||||
|
||||
private static final String SIX_1 = "111111";
|
||||
|
||||
/**
|
||||
* Encodes the given String into a Caverphone value.
|
||||
*
|
||||
* @param source
|
||||
* String the source string
|
||||
* @return A caverphone code for the given String
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String source) {
|
||||
String txt = source;
|
||||
if (txt == null || txt.length() == 0) {
|
||||
return SIX_1;
|
||||
}
|
||||
|
||||
// 1. Convert to lowercase
|
||||
txt = txt.toLowerCase(java.util.Locale.ENGLISH);
|
||||
|
||||
// 2. Remove anything not A-Z
|
||||
txt = txt.replaceAll("[^a-z]", "");
|
||||
|
||||
// 3. Handle various start options
|
||||
// 2 is a temporary placeholder to indicate a consonant which we are no longer interested in.
|
||||
txt = txt.replaceAll("^cough", "cou2f");
|
||||
txt = txt.replaceAll("^rough", "rou2f");
|
||||
txt = txt.replaceAll("^tough", "tou2f");
|
||||
txt = txt.replaceAll("^enough", "enou2f");
|
||||
txt = txt.replaceAll("^gn", "2n");
|
||||
|
||||
// End
|
||||
txt = txt.replaceAll("mb$", "m2");
|
||||
|
||||
// 4. Handle replacements
|
||||
txt = txt.replaceAll("cq", "2q");
|
||||
txt = txt.replaceAll("ci", "si");
|
||||
txt = txt.replaceAll("ce", "se");
|
||||
txt = txt.replaceAll("cy", "sy");
|
||||
txt = txt.replaceAll("tch", "2ch");
|
||||
txt = txt.replaceAll("c", "k");
|
||||
txt = txt.replaceAll("q", "k");
|
||||
txt = txt.replaceAll("x", "k");
|
||||
txt = txt.replaceAll("v", "f");
|
||||
txt = txt.replaceAll("dg", "2g");
|
||||
txt = txt.replaceAll("tio", "sio");
|
||||
txt = txt.replaceAll("tia", "sia");
|
||||
txt = txt.replaceAll("d", "t");
|
||||
txt = txt.replaceAll("ph", "fh");
|
||||
txt = txt.replaceAll("b", "p");
|
||||
txt = txt.replaceAll("sh", "s2");
|
||||
txt = txt.replaceAll("z", "s");
|
||||
txt = txt.replaceAll("^[aeiou]", "A");
|
||||
// 3 is a temporary placeholder marking a vowel
|
||||
txt = txt.replaceAll("[aeiou]", "3");
|
||||
txt = txt.replaceAll("3gh3", "3kh3");
|
||||
txt = txt.replaceAll("gh", "22");
|
||||
txt = txt.replaceAll("g", "k");
|
||||
txt = txt.replaceAll("s+", "S");
|
||||
txt = txt.replaceAll("t+", "T");
|
||||
txt = txt.replaceAll("p+", "P");
|
||||
txt = txt.replaceAll("k+", "K");
|
||||
txt = txt.replaceAll("f+", "F");
|
||||
txt = txt.replaceAll("m+", "M");
|
||||
txt = txt.replaceAll("n+", "N");
|
||||
txt = txt.replaceAll("w3", "W3");
|
||||
txt = txt.replaceAll("wy", "Wy"); // 1.0 only
|
||||
txt = txt.replaceAll("wh3", "Wh3");
|
||||
txt = txt.replaceAll("why", "Why"); // 1.0 only
|
||||
txt = txt.replaceAll("w", "2");
|
||||
txt = txt.replaceAll("^h", "A");
|
||||
txt = txt.replaceAll("h", "2");
|
||||
txt = txt.replaceAll("r3", "R3");
|
||||
txt = txt.replaceAll("ry", "Ry"); // 1.0 only
|
||||
txt = txt.replaceAll("r", "2");
|
||||
txt = txt.replaceAll("l3", "L3");
|
||||
txt = txt.replaceAll("ly", "Ly"); // 1.0 only
|
||||
txt = txt.replaceAll("l", "2");
|
||||
txt = txt.replaceAll("j", "y"); // 1.0 only
|
||||
txt = txt.replaceAll("y3", "Y3"); // 1.0 only
|
||||
txt = txt.replaceAll("y", "2"); // 1.0 only
|
||||
|
||||
// 5. Handle removals
|
||||
txt = txt.replaceAll("2", "");
|
||||
txt = txt.replaceAll("3", "");
|
||||
|
||||
// 6. put ten 1s on the end
|
||||
txt = txt + SIX_1;
|
||||
|
||||
// 7. take the first six characters as the code
|
||||
return txt.substring(0, SIX_1.length());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Caverphone 2.0 value.
|
||||
*
|
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
|
||||
* algorithm:
|
||||
*
|
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
|
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a>
|
||||
* @since 1.5
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*/
|
||||
public class Caverphone2 extends AbstractCaverphone {
|
||||
|
||||
private static final String TEN_1 = "1111111111";
|
||||
|
||||
/**
|
||||
* Encodes the given String into a Caverphone 2.0 value.
|
||||
*
|
||||
* @param source
|
||||
* String the source string
|
||||
* @return A caverphone code for the given String
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String source) {
|
||||
String txt = source;
|
||||
if (txt == null || txt.length() == 0) {
|
||||
return TEN_1;
|
||||
}
|
||||
|
||||
// 1. Convert to lowercase
|
||||
txt = txt.toLowerCase(java.util.Locale.ENGLISH);
|
||||
|
||||
// 2. Remove anything not A-Z
|
||||
txt = txt.replaceAll("[^a-z]", "");
|
||||
|
||||
// 2.5. Remove final e
|
||||
txt = txt.replaceAll("e$", ""); // 2.0 only
|
||||
|
||||
// 3. Handle various start options
|
||||
txt = txt.replaceAll("^cough", "cou2f");
|
||||
txt = txt.replaceAll("^rough", "rou2f");
|
||||
txt = txt.replaceAll("^tough", "tou2f");
|
||||
txt = txt.replaceAll("^enough", "enou2f"); // 2.0 only
|
||||
txt = txt.replaceAll("^trough", "trou2f"); // 2.0 only
|
||||
// note the spec says ^enough here again, c+p error I assume
|
||||
txt = txt.replaceAll("^gn", "2n");
|
||||
|
||||
// End
|
||||
txt = txt.replaceAll("mb$", "m2");
|
||||
|
||||
// 4. Handle replacements
|
||||
txt = txt.replaceAll("cq", "2q");
|
||||
txt = txt.replaceAll("ci", "si");
|
||||
txt = txt.replaceAll("ce", "se");
|
||||
txt = txt.replaceAll("cy", "sy");
|
||||
txt = txt.replaceAll("tch", "2ch");
|
||||
txt = txt.replaceAll("c", "k");
|
||||
txt = txt.replaceAll("q", "k");
|
||||
txt = txt.replaceAll("x", "k");
|
||||
txt = txt.replaceAll("v", "f");
|
||||
txt = txt.replaceAll("dg", "2g");
|
||||
txt = txt.replaceAll("tio", "sio");
|
||||
txt = txt.replaceAll("tia", "sia");
|
||||
txt = txt.replaceAll("d", "t");
|
||||
txt = txt.replaceAll("ph", "fh");
|
||||
txt = txt.replaceAll("b", "p");
|
||||
txt = txt.replaceAll("sh", "s2");
|
||||
txt = txt.replaceAll("z", "s");
|
||||
txt = txt.replaceAll("^[aeiou]", "A");
|
||||
txt = txt.replaceAll("[aeiou]", "3");
|
||||
txt = txt.replaceAll("j", "y"); // 2.0 only
|
||||
txt = txt.replaceAll("^y3", "Y3"); // 2.0 only
|
||||
txt = txt.replaceAll("^y", "A"); // 2.0 only
|
||||
txt = txt.replaceAll("y", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("3gh3", "3kh3");
|
||||
txt = txt.replaceAll("gh", "22");
|
||||
txt = txt.replaceAll("g", "k");
|
||||
txt = txt.replaceAll("s+", "S");
|
||||
txt = txt.replaceAll("t+", "T");
|
||||
txt = txt.replaceAll("p+", "P");
|
||||
txt = txt.replaceAll("k+", "K");
|
||||
txt = txt.replaceAll("f+", "F");
|
||||
txt = txt.replaceAll("m+", "M");
|
||||
txt = txt.replaceAll("n+", "N");
|
||||
txt = txt.replaceAll("w3", "W3");
|
||||
txt = txt.replaceAll("wh3", "Wh3");
|
||||
txt = txt.replaceAll("w$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("w", "2");
|
||||
txt = txt.replaceAll("^h", "A");
|
||||
txt = txt.replaceAll("h", "2");
|
||||
txt = txt.replaceAll("r3", "R3");
|
||||
txt = txt.replaceAll("r$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("r", "2");
|
||||
txt = txt.replaceAll("l3", "L3");
|
||||
txt = txt.replaceAll("l$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("l", "2");
|
||||
|
||||
// 5. Handle removals
|
||||
txt = txt.replaceAll("2", "");
|
||||
txt = txt.replaceAll("3$", "A"); // 2.0 only
|
||||
txt = txt.replaceAll("3", "");
|
||||
|
||||
// 6. put ten 1s on the end
|
||||
txt = txt + TEN_1;
|
||||
|
||||
// 7. take the first ten characters as the code
|
||||
return txt.substring(0, TEN_1.length());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,445 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Cologne Phonetic value.
|
||||
* <p>
|
||||
* Implements the <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Kölner Phonetik</a> (Cologne
|
||||
* Phonetic) algorithm issued by Hans Joachim Postel in 1969.
|
||||
* </p>
|
||||
* <p>
|
||||
* The <i>Kölner Phonetik</i> is a phonetic algorithm which is optimized for the German language. It is related to
|
||||
* the well-known soundex algorithm.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Algorithm</h2>
|
||||
*
|
||||
* <ul>
|
||||
*
|
||||
* <li>
|
||||
* <h3>Step 1:</h3>
|
||||
* After preprocessing (conversion to upper case, transcription of <a
|
||||
* href="http://en.wikipedia.org/wiki/Germanic_umlaut">germanic umlauts</a>, removal of non alphabetical characters) the
|
||||
* letters of the supplied text are replaced by their phonetic code according to the following table.
|
||||
* <table border="1">
|
||||
* <caption style="caption-side: bottom"><small><i>(Source: <a
|
||||
* href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik#Buchstabencodes">Wikipedia (de): Kölner Phonetik --
|
||||
* Buchstabencodes</a>)</i></small></caption> <tbody>
|
||||
* <tr>
|
||||
* <th>Letter</th>
|
||||
* <th>Context</th>
|
||||
* <th align="center">Code</th>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>A, E, I, J, O, U, Y</td>
|
||||
* <td></td>
|
||||
* <td align="center">0</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
*
|
||||
* <td>H</td>
|
||||
* <td></td>
|
||||
* <td align="center">-</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>B</td>
|
||||
* <td></td>
|
||||
* <td rowspan="2" align="center">1</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>P</td>
|
||||
* <td>not before H</td>
|
||||
*
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>D, T</td>
|
||||
* <td>not before C, S, Z</td>
|
||||
* <td align="center">2</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>F, V, W</td>
|
||||
* <td></td>
|
||||
* <td rowspan="2" align="center">3</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
*
|
||||
* <td>P</td>
|
||||
* <td>before H</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>G, K, Q</td>
|
||||
* <td></td>
|
||||
* <td rowspan="3" align="center">4</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td rowspan="2">C</td>
|
||||
* <td>at onset before A, H, K, L, O, Q, R, U, X</td>
|
||||
*
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>before A, H, K, O, Q, U, X except after S, Z</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>X</td>
|
||||
* <td>not after C, K, Q</td>
|
||||
* <td align="center">48</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>L</td>
|
||||
* <td></td>
|
||||
*
|
||||
* <td align="center">5</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>M, N</td>
|
||||
* <td></td>
|
||||
* <td align="center">6</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>R</td>
|
||||
* <td></td>
|
||||
* <td align="center">7</td>
|
||||
* </tr>
|
||||
*
|
||||
* <tr>
|
||||
* <td>S, Z</td>
|
||||
* <td></td>
|
||||
* <td rowspan="6" align="center">8</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td rowspan="3">C</td>
|
||||
* <td>after S, Z</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>at onset except before A, H, K, L, O, Q, R, U, X</td>
|
||||
* </tr>
|
||||
*
|
||||
* <tr>
|
||||
* <td>not before A, H, K, O, Q, U, X</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>D, T</td>
|
||||
* <td>before C, S, Z</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>X</td>
|
||||
* <td>after C, K, Q</td>
|
||||
* </tr>
|
||||
* </tbody>
|
||||
* </table>
|
||||
*
|
||||
* <h4>Example:</h4>
|
||||
*
|
||||
* <code>"M</code>ü<code>ller-L</code>ü
|
||||
* <code>denscheidt" => "MULLERLUDENSCHEIDT" => "6005507500206880022"</code>
|
||||
*
|
||||
* </li>
|
||||
*
|
||||
* <li>
|
||||
* <h3>Step 2:</h3>
|
||||
* Collapse of all multiple consecutive code digits.
|
||||
* <h4>Example:</h4>
|
||||
* <code>"6005507500206880022" => "6050750206802"</code></li>
|
||||
*
|
||||
* <li>
|
||||
* <h3>Step 3:</h3>
|
||||
* Removal of all codes "0" except at the beginning. This means that two or more identical consecutive digits can occur
|
||||
* if they occur after removing the "0" digits.
|
||||
*
|
||||
* <h4>Example:</h4>
|
||||
* <code>"6050750206802" => "65752682"</code></li>
|
||||
*
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* This class is thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Wikipedia (de): Kölner Phonetik (in German)</a>
|
||||
* @since 1.5
|
||||
*/
|
||||
public class ColognePhonetic implements StringEncoder {
|
||||
|
||||
// Predefined char arrays for better performance and less GC load
|
||||
private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
|
||||
private static final char[] SCZ = new char[] { 'S', 'C', 'Z' };
|
||||
private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' };
|
||||
private static final char[] GKQ = new char[] { 'G', 'K', 'Q' };
|
||||
private static final char[] CKQ = new char[] { 'C', 'K', 'Q' };
|
||||
private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
|
||||
private static final char[] SZ = new char[] { 'S', 'Z' };
|
||||
private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' };
|
||||
private static final char[] TDX = new char[] { 'T', 'D', 'X' };
|
||||
|
||||
/**
|
||||
* This class is not thread-safe; the field {@link #length} is mutable.
|
||||
* However, it is not shared between threads, as it is constructed on demand
|
||||
* by the method {@link ColognePhonetic#colognePhonetic(String)}
|
||||
*/
|
||||
private abstract class CologneBuffer {
|
||||
|
||||
protected final char[] data;
|
||||
|
||||
protected int length = 0;
|
||||
|
||||
public CologneBuffer(final char[] data) {
|
||||
this.data = data;
|
||||
this.length = data.length;
|
||||
}
|
||||
|
||||
public CologneBuffer(final int buffSize) {
|
||||
this.data = new char[buffSize];
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
protected abstract char[] copyData(int start, final int length);
|
||||
|
||||
public int length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new String(copyData(0, length));
|
||||
}
|
||||
}
|
||||
|
||||
private class CologneOutputBuffer extends CologneBuffer {
|
||||
|
||||
public CologneOutputBuffer(final int buffSize) {
|
||||
super(buffSize);
|
||||
}
|
||||
|
||||
public void addRight(final char chr) {
|
||||
data[length] = chr;
|
||||
length++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected char[] copyData(final int start, final int length) {
|
||||
final char[] newData = new char[length];
|
||||
System.arraycopy(data, start, newData, 0, length);
|
||||
return newData;
|
||||
}
|
||||
}
|
||||
|
||||
private class CologneInputBuffer extends CologneBuffer {
|
||||
|
||||
public CologneInputBuffer(final char[] data) {
|
||||
super(data);
|
||||
}
|
||||
|
||||
public void addLeft(final char ch) {
|
||||
length++;
|
||||
data[getNextPos()] = ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected char[] copyData(final int start, final int length) {
|
||||
final char[] newData = new char[length];
|
||||
System.arraycopy(data, data.length - this.length + start, newData, 0, length);
|
||||
return newData;
|
||||
}
|
||||
|
||||
public char getNextChar() {
|
||||
return data[getNextPos()];
|
||||
}
|
||||
|
||||
protected int getNextPos() {
|
||||
return data.length - length;
|
||||
}
|
||||
|
||||
public char removeNext() {
|
||||
final char ch = getNextChar();
|
||||
length--;
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps some Germanic characters to plain for internal processing. The following characters are mapped:
|
||||
* <ul>
|
||||
* <li>capital a, umlaut mark</li>
|
||||
* <li>capital u, umlaut mark</li>
|
||||
* <li>capital o, umlaut mark</li>
|
||||
* <li>small sharp s, German</li>
|
||||
* </ul>
|
||||
*/
|
||||
private static final char[][] PREPROCESS_MAP = new char[][]{
|
||||
{'\u00C4', 'A'}, // capital a, umlaut mark
|
||||
{'\u00DC', 'U'}, // capital u, umlaut mark
|
||||
{'\u00D6', 'O'}, // capital o, umlaut mark
|
||||
{'\u00DF', 'S'} // small sharp s, German
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns whether the array contains the key, or not.
|
||||
*/
|
||||
private static boolean arrayContains(final char[] arr, final char key) {
|
||||
for (final char element : arr) {
|
||||
if (element == key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Implements the <i>Kölner Phonetik</i> algorithm.
|
||||
* </p>
|
||||
* <p>
|
||||
* In contrast to the initial description of the algorithm, this implementation does the encoding in one pass.
|
||||
* </p>
|
||||
*
|
||||
* @param text The source text to encode
|
||||
* @return the corresponding encoding according to the <i>Kölner Phonetik</i> algorithm
|
||||
*/
|
||||
public String colognePhonetic(String text) {
|
||||
if (text == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
text = preprocess(text);
|
||||
|
||||
final CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2);
|
||||
final CologneInputBuffer input = new CologneInputBuffer(text.toCharArray());
|
||||
|
||||
char nextChar;
|
||||
|
||||
char lastChar = '-';
|
||||
char lastCode = '/';
|
||||
char code;
|
||||
char chr;
|
||||
|
||||
int rightLength = input.length();
|
||||
|
||||
while (rightLength > 0) {
|
||||
chr = input.removeNext();
|
||||
|
||||
if ((rightLength = input.length()) > 0) {
|
||||
nextChar = input.getNextChar();
|
||||
} else {
|
||||
nextChar = '-';
|
||||
}
|
||||
|
||||
if (arrayContains(AEIJOUY, chr)) {
|
||||
code = '0';
|
||||
} else if (chr == 'H' || chr < 'A' || chr > 'Z') {
|
||||
if (lastCode == '/') {
|
||||
continue;
|
||||
}
|
||||
code = '-';
|
||||
} else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
|
||||
code = '1';
|
||||
} else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) {
|
||||
code = '2';
|
||||
} else if (arrayContains(WFPV, chr)) {
|
||||
code = '3';
|
||||
} else if (arrayContains(GKQ, chr)) {
|
||||
code = '4';
|
||||
} else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
|
||||
code = '4';
|
||||
input.addLeft('S');
|
||||
rightLength++;
|
||||
} else if (chr == 'S' || chr == 'Z') {
|
||||
code = '8';
|
||||
} else if (chr == 'C') {
|
||||
if (lastCode == '/') {
|
||||
if (arrayContains(AHKLOQRUX, nextChar)) {
|
||||
code = '4';
|
||||
} else {
|
||||
code = '8';
|
||||
}
|
||||
} else {
|
||||
if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) {
|
||||
code = '8';
|
||||
} else {
|
||||
code = '4';
|
||||
}
|
||||
}
|
||||
} else if (arrayContains(TDX, chr)) {
|
||||
code = '8';
|
||||
} else if (chr == 'R') {
|
||||
code = '7';
|
||||
} else if (chr == 'L') {
|
||||
code = '5';
|
||||
} else if (chr == 'M' || chr == 'N') {
|
||||
code = '6';
|
||||
} else {
|
||||
code = chr;
|
||||
}
|
||||
|
||||
if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) {
|
||||
output.addRight(code);
|
||||
}
|
||||
|
||||
lastChar = chr;
|
||||
lastCode = code;
|
||||
}
|
||||
return output.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object encode(final Object object) throws EncoderException {
|
||||
if (!(object instanceof String)) {
|
||||
throw new EncoderException("This method's parameter was expected to be of the type " +
|
||||
String.class.getName() +
|
||||
". But actually it was of the type " +
|
||||
object.getClass().getName() +
|
||||
".");
|
||||
}
|
||||
return encode((String) object);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String encode(final String text) {
|
||||
return colognePhonetic(text);
|
||||
}
|
||||
|
||||
public boolean isEncodeEqual(final String text1, final String text2) {
|
||||
return colognePhonetic(text1).equals(colognePhonetic(text2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the string to upper case and replaces germanic characters as defined in {@link #PREPROCESS_MAP}.
|
||||
*/
|
||||
private String preprocess(String text) {
|
||||
text = text.toUpperCase(Locale.GERMAN);
|
||||
|
||||
final char[] chrs = text.toCharArray();
|
||||
|
||||
for (int index = 0; index < chrs.length; index++) {
|
||||
if (chrs[index] > 'Z') {
|
||||
for (final char[] element : PREPROCESS_MAP) {
|
||||
if (chrs[index] == element[0]) {
|
||||
chrs[index] = element[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new String(chrs);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,561 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.codec.CharEncoding;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Daitch-Mokotoff Soundex value.
|
||||
* <p>
|
||||
* The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
|
||||
* accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
|
||||
* </p>
|
||||
* <p>
|
||||
* The main differences compared to the other soundex variants are:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>coded names are 6 digits long
|
||||
* <li>the initial character of the name is coded
|
||||
* <li>rules to encoded multi-character n-grams
|
||||
* <li>multiple possible encodings for the same name (branching)
|
||||
* </ul>
|
||||
* <p>
|
||||
* This implementation supports branching, depending on the used method:
|
||||
* <ul>
|
||||
* <li>{@link #encode(String)} - branching disabled, only the first code will be returned
|
||||
* <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|'
|
||||
* </ul>
|
||||
* <p>
|
||||
* Note: this implementation has additional branching rules compared to the original description of the algorithm. The
|
||||
* rules can be customized by overriding the default rules contained in the resource file
|
||||
* {@code org/apache/commons/codec/language/dmrules.txt}.
|
||||
* </p>
|
||||
* <p>
|
||||
* This class is thread-safe.
|
||||
* </p>
|
||||
*
|
||||
* @see Soundex
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
|
||||
* @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.10
|
||||
*/
|
||||
public class DaitchMokotoffSoundex implements StringEncoder {
|
||||
|
||||
/**
|
||||
* Inner class representing a branch during DM soundex encoding.
|
||||
*/
|
||||
private static final class Branch {
|
||||
private final StringBuilder builder;
|
||||
private String cachedString;
|
||||
private String lastReplacement;
|
||||
|
||||
private Branch() {
|
||||
builder = new StringBuilder();
|
||||
lastReplacement = null;
|
||||
cachedString = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new branch, identical to this branch.
|
||||
*
|
||||
* @return a new, identical branch
|
||||
*/
|
||||
public Branch createBranch() {
|
||||
final Branch branch = new Branch();
|
||||
branch.builder.append(toString());
|
||||
branch.lastReplacement = this.lastReplacement;
|
||||
return branch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof Branch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return toString().equals(((Branch) other).toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish this branch by appending '0's until the maximum code length has been reached.
|
||||
*/
|
||||
public void finish() {
|
||||
while (builder.length() < MAX_LENGTH) {
|
||||
builder.append('0');
|
||||
cachedString = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return toString().hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the next replacement to be added to this branch.
|
||||
*
|
||||
* @param replacement
|
||||
* the next replacement to append
|
||||
* @param forceAppend
|
||||
* indicates if the default processing shall be overridden
|
||||
*/
|
||||
public void processNextReplacement(final String replacement, final boolean forceAppend) {
|
||||
final boolean append = lastReplacement == null || !lastReplacement.endsWith(replacement) || forceAppend;
|
||||
|
||||
if (append && builder.length() < MAX_LENGTH) {
|
||||
builder.append(replacement);
|
||||
// remove all characters after the maximum length
|
||||
if (builder.length() > MAX_LENGTH) {
|
||||
builder.delete(MAX_LENGTH, builder.length());
|
||||
}
|
||||
cachedString = null;
|
||||
}
|
||||
|
||||
lastReplacement = replacement;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (cachedString == null) {
|
||||
cachedString = builder.toString();
|
||||
}
|
||||
return cachedString;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inner class for storing rules.
|
||||
*/
|
||||
private static final class Rule {
|
||||
private final String pattern;
|
||||
private final String[] replacementAtStart;
|
||||
private final String[] replacementBeforeVowel;
|
||||
private final String[] replacementDefault;
|
||||
|
||||
protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
|
||||
final String replacementDefault) {
|
||||
this.pattern = pattern;
|
||||
this.replacementAtStart = replacementAtStart.split("\\|");
|
||||
this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
|
||||
this.replacementDefault = replacementDefault.split("\\|");
|
||||
}
|
||||
|
||||
public int getPatternLength() {
|
||||
return pattern.length();
|
||||
}
|
||||
|
||||
public String[] getReplacements(final String context, final boolean atStart) {
|
||||
if (atStart) {
|
||||
return replacementAtStart;
|
||||
}
|
||||
|
||||
final int nextIndex = getPatternLength();
|
||||
final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
|
||||
if (nextCharIsVowel) {
|
||||
return replacementBeforeVowel;
|
||||
}
|
||||
|
||||
return replacementDefault;
|
||||
}
|
||||
|
||||
private boolean isVowel(final char ch) {
|
||||
return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u';
|
||||
}
|
||||
|
||||
public boolean matches(final String context) {
|
||||
return context.startsWith(pattern);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
|
||||
Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
|
||||
}
|
||||
}
|
||||
|
||||
private static final String COMMENT = "//";
|
||||
private static final String DOUBLE_QUOTE = "\"";
|
||||
|
||||
private static final String MULTILINE_COMMENT_END = "*/";
|
||||
|
||||
private static final String MULTILINE_COMMENT_START = "/*";
|
||||
|
||||
/** The resource file containing the replacement and folding rules */
|
||||
private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
|
||||
|
||||
/** The code length of a DM soundex value. */
|
||||
private static final int MAX_LENGTH = 6;
|
||||
|
||||
/** Transformation rules indexed by the first character of their pattern. */
|
||||
private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
|
||||
|
||||
/** Folding rules. */
|
||||
private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
|
||||
|
||||
static {
|
||||
final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
|
||||
if (rulesIS == null) {
|
||||
throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
|
||||
}
|
||||
|
||||
final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
|
||||
parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
|
||||
scanner.close();
|
||||
|
||||
// sort RULES by pattern length in descending order
|
||||
for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
|
||||
final List<Rule> ruleList = rule.getValue();
|
||||
Collections.sort(ruleList, new Comparator<Rule>() {
|
||||
@Override
|
||||
public int compare(final Rule rule1, final Rule rule2) {
|
||||
return rule2.getPatternLength() - rule1.getPatternLength();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static void parseRules(final Scanner scanner, final String location,
|
||||
final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
|
||||
int currentLine = 0;
|
||||
boolean inMultilineComment = false;
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
currentLine++;
|
||||
final String rawLine = scanner.nextLine();
|
||||
String line = rawLine;
|
||||
|
||||
if (inMultilineComment) {
|
||||
if (line.endsWith(MULTILINE_COMMENT_END)) {
|
||||
inMultilineComment = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.startsWith(MULTILINE_COMMENT_START)) {
|
||||
inMultilineComment = true;
|
||||
} else {
|
||||
// discard comments
|
||||
final int cmtI = line.indexOf(COMMENT);
|
||||
if (cmtI >= 0) {
|
||||
line = line.substring(0, cmtI);
|
||||
}
|
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim();
|
||||
|
||||
if (line.length() == 0) {
|
||||
continue; // empty lines can be safely skipped
|
||||
}
|
||||
|
||||
if (line.contains("=")) {
|
||||
// folding
|
||||
final String[] parts = line.split("=");
|
||||
if (parts.length != 2) {
|
||||
throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
|
||||
" parts: " + rawLine + " in " + location);
|
||||
} else {
|
||||
final String leftCharacter = parts[0];
|
||||
final String rightCharacter = parts[1];
|
||||
|
||||
if (leftCharacter.length() != 1 || rightCharacter.length() != 1) {
|
||||
throw new IllegalArgumentException("Malformed folding statement - " +
|
||||
"patterns are not single characters: " + rawLine + " in " + location);
|
||||
}
|
||||
|
||||
asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
|
||||
}
|
||||
} else {
|
||||
// rule
|
||||
final String[] parts = line.split("\\s+");
|
||||
if (parts.length != 4) {
|
||||
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
|
||||
" parts: " + rawLine + " in " + location);
|
||||
} else {
|
||||
try {
|
||||
final String pattern = stripQuotes(parts[0]);
|
||||
final String replacement1 = stripQuotes(parts[1]);
|
||||
final String replacement2 = stripQuotes(parts[2]);
|
||||
final String replacement3 = stripQuotes(parts[3]);
|
||||
|
||||
final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
|
||||
final char patternKey = r.pattern.charAt(0);
|
||||
List<Rule> rules = ruleMapping.get(patternKey);
|
||||
if (rules == null) {
|
||||
rules = new ArrayList<Rule>();
|
||||
ruleMapping.put(patternKey, rules);
|
||||
}
|
||||
rules.add(r);
|
||||
} catch (final IllegalArgumentException e) {
|
||||
throw new IllegalStateException(
|
||||
"Problem parsing line '" + currentLine + "' in " + location, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String stripQuotes(String str) {
|
||||
if (str.startsWith(DOUBLE_QUOTE)) {
|
||||
str = str.substring(1);
|
||||
}
|
||||
|
||||
if (str.endsWith(DOUBLE_QUOTE)) {
|
||||
str = str.substring(0, str.length() - 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/** Whether to use ASCII folding prior to encoding. */
|
||||
private final boolean folding;
|
||||
|
||||
/**
|
||||
* Creates a new instance with ASCII-folding enabled.
|
||||
*/
|
||||
public DaitchMokotoffSoundex() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance.
|
||||
* <p>
|
||||
* With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
|
||||
* è -> e.
|
||||
* </p>
|
||||
*
|
||||
* @param folding
|
||||
* if ASCII-folding shall be performed before encoding
|
||||
*/
|
||||
public DaitchMokotoffSoundex(final boolean folding) {
|
||||
this.folding = folding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a cleanup of the input string before the actual soundex transformation.
|
||||
* <p>
|
||||
* Removes all whitespace characters and performs ASCII folding if enabled.
|
||||
* </p>
|
||||
*
|
||||
* @param input
|
||||
* the input string to cleanup
|
||||
* @return a cleaned up string
|
||||
*/
|
||||
private String cleanup(final String input) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
for (char ch : input.toCharArray()) {
|
||||
if (Character.isWhitespace(ch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = Character.toLowerCase(ch);
|
||||
if (folding && FOLDINGS.containsKey(ch)) {
|
||||
ch = FOLDINGS.get(ch);
|
||||
}
|
||||
sb.append(ch);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
|
||||
* <p>
|
||||
* This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
|
||||
* EncoderException if the supplied object is not of type java.lang.String.
|
||||
* </p>
|
||||
*
|
||||
* @see #soundex(String)
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
|
||||
* supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException(
|
||||
"Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
|
||||
}
|
||||
return encode((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
|
||||
*
|
||||
* @see #soundex(String)
|
||||
*
|
||||
* @param source
|
||||
* A String object to encode
|
||||
* @return A DM Soundex code corresponding to the String supplied
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String source) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
return soundex(source, false)[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
|
||||
* <p>
|
||||
* In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
|
||||
* separated by '|'.
|
||||
* </p>
|
||||
* <p>
|
||||
* Example: the name "AUERBACH" is encoded as both
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>097400</li>
|
||||
* <li>097500</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Thus the result will be "097400|097500".
|
||||
* </p>
|
||||
*
|
||||
* @param source
|
||||
* A String object to encode
|
||||
* @return A string containing a set of DM Soundex codes corresponding to the String supplied
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
public String soundex(final String source) {
|
||||
final String[] branches = soundex(source, true);
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
int index = 0;
|
||||
for (final String branch : branches) {
|
||||
sb.append(branch);
|
||||
if (++index < branches.length) {
|
||||
sb.append('|');
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the actual DM Soundex algorithm on the input string.
|
||||
*
|
||||
* @param source
|
||||
* A String object to encode
|
||||
* @param branching
|
||||
* If branching shall be performed
|
||||
* @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
|
||||
* selected branching mode
|
||||
*/
|
||||
private String[] soundex(final String source, final boolean branching) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final String input = cleanup(source);
|
||||
|
||||
final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
|
||||
currentBranches.add(new Branch());
|
||||
|
||||
char lastChar = '\0';
|
||||
for (int index = 0; index < input.length(); index++) {
|
||||
final char ch = input.charAt(index);
|
||||
|
||||
// ignore whitespace inside a name
|
||||
if (Character.isWhitespace(ch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final String inputContext = input.substring(index);
|
||||
final List<Rule> rules = RULES.get(ch);
|
||||
if (rules == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
|
||||
@SuppressWarnings("unchecked")
|
||||
final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
|
||||
|
||||
for (final Rule rule : rules) {
|
||||
if (rule.matches(inputContext)) {
|
||||
if (branching) {
|
||||
nextBranches.clear();
|
||||
}
|
||||
final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
|
||||
final boolean branchingRequired = replacements.length > 1 && branching;
|
||||
|
||||
for (final Branch branch : currentBranches) {
|
||||
for (final String nextReplacement : replacements) {
|
||||
// if we have multiple replacements, always create a new branch
|
||||
final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
|
||||
|
||||
// special rule: occurrences of mn or nm are treated differently
|
||||
final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');
|
||||
|
||||
nextBranch.processNextReplacement(nextReplacement, force);
|
||||
|
||||
if (branching) {
|
||||
nextBranches.add(nextBranch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (branching) {
|
||||
currentBranches.clear();
|
||||
currentBranches.addAll(nextBranches);
|
||||
}
|
||||
index += rule.getPatternLength() - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lastChar = ch;
|
||||
}
|
||||
|
||||
final String[] result = new String[currentBranches.size()];
|
||||
int index = 0;
|
||||
for (final Branch branch : currentBranches) {
|
||||
branch.finish();
|
||||
result[index++] = branch.toString();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,426 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977.
|
||||
*
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a>
|
||||
* @since 1.8
|
||||
*/
|
||||
public class MatchRatingApproachEncoder implements StringEncoder {
|
||||
|
||||
private static final String SPACE = " ";
|
||||
|
||||
private static final String EMPTY = "";
|
||||
|
||||
/**
|
||||
* Constants used mainly for the min rating value.
|
||||
*/
|
||||
private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, EIGHT = 8,
|
||||
ELEVEN = 11, TWELVE = 12;
|
||||
|
||||
/**
|
||||
* The plain letter equivalent of the accented letters.
|
||||
*/
|
||||
private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave
|
||||
"AaEeIiOoUuYy" + // acute
|
||||
"AaEeIiOoUuYy" + // circumflex
|
||||
"AaOoNn" + // tilde
|
||||
"AaEeIiOoUuYy" + // umlaut
|
||||
"Aa" + // ring
|
||||
"Cc" + // cedilla
|
||||
"OoUu"; // double acute
|
||||
|
||||
/**
|
||||
* Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc...
|
||||
*/
|
||||
private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" +
|
||||
"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" +
|
||||
"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" +
|
||||
"\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" +
|
||||
"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" +
|
||||
"\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171";
|
||||
|
||||
private static final String[] DOUBLE_CONSONANT =
|
||||
new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS",
|
||||
"TT", "VV", "WW", "XX", "YY", "ZZ" };
|
||||
|
||||
/**
|
||||
* Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any
|
||||
* spaces.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param name
|
||||
* The name to be cleaned
|
||||
* @return The cleaned name
|
||||
*/
|
||||
String cleanName(final String name) {
|
||||
String upperName = name.toUpperCase(Locale.ENGLISH);
|
||||
|
||||
final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" };
|
||||
for (final String str : charsToTrim) {
|
||||
upperName = upperName.replaceAll(str, EMPTY);
|
||||
}
|
||||
|
||||
upperName = removeAccents(upperName);
|
||||
upperName = upperName.replaceAll("\\s+", EMPTY);
|
||||
|
||||
return upperName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the Match Rating Approach algorithm. Method is here to satisfy the requirements of the
|
||||
* Encoder interface Throws an EncoderException if input object is not of type java.lang.String.
|
||||
*
|
||||
* @param pObject
|
||||
* Object to encode
|
||||
* @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the
|
||||
* String supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
*/
|
||||
@Override
|
||||
public final Object encode(final Object pObject) throws EncoderException {
|
||||
if (!(pObject instanceof String)) {
|
||||
throw new EncoderException(
|
||||
"Parameter supplied to Match Rating Approach encoder is not of type java.lang.String");
|
||||
}
|
||||
return encode((String) pObject);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the Match Rating Approach (MRA) algorithm.
|
||||
*
|
||||
* @param name
|
||||
* String object to encode
|
||||
* @return The MRA code corresponding to the String supplied
|
||||
*/
|
||||
@Override
|
||||
public final String encode(String name) {
|
||||
// Bulletproof for trivial input - NINO
|
||||
if (name == null || EMPTY.equalsIgnoreCase(name) || SPACE.equalsIgnoreCase(name) || name.length() == 1) {
|
||||
return EMPTY;
|
||||
}
|
||||
|
||||
// Preprocessing
|
||||
name = cleanName(name);
|
||||
|
||||
// BEGIN: Actual encoding part of the algorithm...
|
||||
// 1. Delete all vowels unless the vowel begins the word
|
||||
name = removeVowels(name);
|
||||
|
||||
// 2. Remove second consonant from any double consonant
|
||||
name = removeDoubleConsonants(name);
|
||||
|
||||
// 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
|
||||
name = getFirst3Last3(name);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first and last 3 letters of a name (if > 6 characters) Else just returns the name.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param name
|
||||
* The string to get the substrings from
|
||||
* @return Annexed first and last 3 letters of input word.
|
||||
*/
|
||||
String getFirst3Last3(final String name) {
|
||||
final int nameLength = name.length();
|
||||
|
||||
if (nameLength > SIX) {
|
||||
final String firstThree = name.substring(0, THREE);
|
||||
final String lastThree = name.substring(nameLength - THREE, nameLength);
|
||||
return firstThree + lastThree;
|
||||
} else {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the
|
||||
* min rating. Values strictly from documentation.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param sumLength
|
||||
* The length of 2 strings sent down
|
||||
* @return The min rating value
|
||||
*/
|
||||
int getMinRating(final int sumLength) {
|
||||
int minRating = 0;
|
||||
|
||||
if (sumLength <= FOUR) {
|
||||
minRating = FIVE;
|
||||
} else if (sumLength >= FIVE && sumLength <= SEVEN) {
|
||||
minRating = FOUR;
|
||||
} else if (sumLength >= EIGHT && sumLength <= ELEVEN) {
|
||||
minRating = THREE;
|
||||
} else if (sumLength == TWELVE) {
|
||||
minRating = TWO;
|
||||
} else {
|
||||
minRating = ONE; // docs said little here.
|
||||
}
|
||||
|
||||
return minRating;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the
|
||||
* strings are cleaned in the same way as {@link #encode(String)}.
|
||||
*
|
||||
* @param name1
|
||||
* First of the 2 strings (names) to compare
|
||||
* @param name2
|
||||
* Second of the 2 names to compare
|
||||
* @return <code>true</code> if the encodings are identical <code>false</code> otherwise.
|
||||
*/
|
||||
public boolean isEncodeEquals(String name1, String name2) {
|
||||
// Bulletproof for trivial input - NINO
|
||||
if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) {
|
||||
return false;
|
||||
} else if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) {
|
||||
return false;
|
||||
} else if (name1.length() == 1 || name2.length() == 1) {
|
||||
return false;
|
||||
} else if (name1.equalsIgnoreCase(name2)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Preprocessing
|
||||
name1 = cleanName(name1);
|
||||
name2 = cleanName(name2);
|
||||
|
||||
// Actual MRA Algorithm
|
||||
|
||||
// 1. Remove vowels
|
||||
name1 = removeVowels(name1);
|
||||
name2 = removeVowels(name2);
|
||||
|
||||
// 2. Remove double consonants
|
||||
name1 = removeDoubleConsonants(name1);
|
||||
name2 = removeDoubleConsonants(name2);
|
||||
|
||||
// 3. Reduce down to 3 letters
|
||||
name1 = getFirst3Last3(name1);
|
||||
name2 = getFirst3Last3(name2);
|
||||
|
||||
// 4. Check for length difference - if 3 or greater then no similarity
|
||||
// comparison is done
|
||||
if (Math.abs(name1.length() - name2.length()) >= THREE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 5. Obtain the minimum rating value by calculating the length sum of the
|
||||
// encoded Strings and sending it down.
|
||||
final int sumLength = Math.abs(name1.length() + name2.length());
|
||||
int minRating = 0;
|
||||
minRating = getMinRating(sumLength);
|
||||
|
||||
// 6. Process the encoded Strings from left to right and remove any
|
||||
// identical characters found from both Strings respectively.
|
||||
final int count = leftToRightThenRightToLeftProcessing(name1, name2);
|
||||
|
||||
// 7. Each PNI item that has a similarity rating equal to or greater than
|
||||
// the min is considered to be a good candidate match
|
||||
return count >= minRating;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a letter is a vowel.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param letter
|
||||
* The letter under investiagtion
|
||||
* @return True if a vowel, else false
|
||||
*/
|
||||
boolean isVowel(final String letter) {
|
||||
return letter.equalsIgnoreCase("E") || letter.equalsIgnoreCase("A") || letter.equalsIgnoreCase("O") ||
|
||||
letter.equalsIgnoreCase("I") || letter.equalsIgnoreCase("U");
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the names from left to right (first) then right to left removing identical letters in same positions.
|
||||
* Then subtracts the longer string that remains from 6 and returns this.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param name1
|
||||
* name2
|
||||
* @return
|
||||
*/
|
||||
int leftToRightThenRightToLeftProcessing(final String name1, final String name2) {
|
||||
final char[] name1Char = name1.toCharArray();
|
||||
final char[] name2Char = name2.toCharArray();
|
||||
|
||||
final int name1Size = name1.length() - 1;
|
||||
final int name2Size = name2.length() - 1;
|
||||
|
||||
String name1LtRStart = EMPTY;
|
||||
String name1LtREnd = EMPTY;
|
||||
|
||||
String name2RtLStart = EMPTY;
|
||||
String name2RtLEnd = EMPTY;
|
||||
|
||||
for (int i = 0; i < name1Char.length; i++) {
|
||||
if (i > name2Size) {
|
||||
break;
|
||||
}
|
||||
|
||||
name1LtRStart = name1.substring(i, i + 1);
|
||||
name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1);
|
||||
|
||||
name2RtLStart = name2.substring(i, i + 1);
|
||||
name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1);
|
||||
|
||||
// Left to right...
|
||||
if (name1LtRStart.equals(name2RtLStart)) {
|
||||
name1Char[i] = ' ';
|
||||
name2Char[i] = ' ';
|
||||
}
|
||||
|
||||
// Right to left...
|
||||
if (name1LtREnd.equals(name2RtLEnd)) {
|
||||
name1Char[name1Size - i] = ' ';
|
||||
name2Char[name2Size - i] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
// Char arrays -> string & remove extraneous space
|
||||
final String strA = new String(name1Char).replaceAll("\\s+", EMPTY);
|
||||
final String strB = new String(name2Char).replaceAll("\\s+", EMPTY);
|
||||
|
||||
// Final bit - subtract longest string from 6 and return this int value
|
||||
if (strA.length() > strB.length()) {
|
||||
return Math.abs(SIX - strA.length());
|
||||
} else {
|
||||
return Math.abs(SIX - strB.length());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes accented letters and replaces with non-accented ascii equivalent Case is preserved.
|
||||
* http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
|
||||
*
|
||||
* @param accentedWord
|
||||
* The word that may have accents in it.
|
||||
* @return De-accented word
|
||||
*/
|
||||
String removeAccents(final String accentedWord) {
|
||||
if (accentedWord == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final int n = accentedWord.length();
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
final char c = accentedWord.charAt(i);
|
||||
final int pos = UNICODE.indexOf(c);
|
||||
if (pos > -1) {
|
||||
sb.append(PLAIN_ASCII.charAt(pos));
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces any double consonant pair with the single letter equivalent.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param name
|
||||
* String to have double consonants removed
|
||||
* @return Single consonant word
|
||||
*/
|
||||
String removeDoubleConsonants(final String name) {
|
||||
String replacedName = name.toUpperCase();
|
||||
for (final String dc : DOUBLE_CONSONANT) {
|
||||
if (replacedName.contains(dc)) {
|
||||
final String singleLetter = dc.substring(0, 1);
|
||||
replacedName = replacedName.replace(dc, singleLetter);
|
||||
}
|
||||
}
|
||||
return replacedName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all vowels unless the vowel begins the word.
|
||||
*
|
||||
* <h2>API Usage</h2>
|
||||
* <p>
|
||||
* Consider this method private, it is package protected for unit testing only.
|
||||
* </p>
|
||||
*
|
||||
* @param name
|
||||
* The name to have vowels removed
|
||||
* @return De-voweled word
|
||||
*/
|
||||
String removeVowels(String name) {
|
||||
// Extract first letter
|
||||
final String firstLetter = name.substring(0, 1);
|
||||
|
||||
name = name.replaceAll("A", EMPTY);
|
||||
name = name.replaceAll("E", EMPTY);
|
||||
name = name.replaceAll("I", EMPTY);
|
||||
name = name.replaceAll("O", EMPTY);
|
||||
name = name.replaceAll("U", EMPTY);
|
||||
|
||||
name = name.replaceAll("\\s{2,}\\b", SPACE);
|
||||
|
||||
// return isVowel(firstLetter) ? (firstLetter + name) : name;
|
||||
if (isVowel(firstLetter)) {
|
||||
return firstLetter + name;
|
||||
} else {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,430 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Metaphone value.
|
||||
* <p>
|
||||
* Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
|
||||
* Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
|
||||
* <p>
|
||||
* <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
|
||||
* p 39.</CITE>
|
||||
* <p>
|
||||
* Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
|
||||
* (broken link 4/30/2013) </li>
|
||||
* <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
|
||||
* (link checked 4/30/2013) </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* They have had undocumented changes from the originally published algorithm.
|
||||
* For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
|
||||
* <p>
|
||||
* This class is conditionally thread-safe.
|
||||
* The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
|
||||
* but is not volatile, and accesses are not synchronized.
|
||||
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
|
||||
* is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
|
||||
* after initial setup.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Metaphone implements StringEncoder {
|
||||
|
||||
/**
|
||||
* Five values in the English language
|
||||
*/
|
||||
private static final String VOWELS = "AEIOU";
|
||||
|
||||
/**
|
||||
* Variable used in Metaphone algorithm
|
||||
*/
|
||||
private static final String FRONTV = "EIY";
|
||||
|
||||
/**
|
||||
* Variable used in Metaphone algorithm
|
||||
*/
|
||||
private static final String VARSON = "CSPTG";
|
||||
|
||||
/**
|
||||
* The max code length for metaphone is 4
|
||||
*/
|
||||
private int maxCodeLen = 4;
|
||||
|
||||
/**
|
||||
* Creates an instance of the Metaphone encoder
|
||||
*/
|
||||
public Metaphone() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the metaphone value of a String. This is similar to the
|
||||
* soundex algorithm, but better at finding similar sounding words.
|
||||
* All input is converted to upper case.
|
||||
* Limitations: Input format is expected to be a single ASCII word
|
||||
* with only characters in the A - Z range, no punctuation or numbers.
|
||||
*
|
||||
* @param txt String to find the metaphone code for
|
||||
* @return A metaphone code corresponding to the String supplied
|
||||
*/
|
||||
public String metaphone(final String txt) {
|
||||
boolean hard = false;
|
||||
int txtLength;
|
||||
if (txt == null || (txtLength = txt.length()) == 0) {
|
||||
return "";
|
||||
}
|
||||
// single character is itself
|
||||
if (txtLength == 1) {
|
||||
return txt.toUpperCase(java.util.Locale.ENGLISH);
|
||||
}
|
||||
|
||||
final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
|
||||
|
||||
final StringBuilder local = new StringBuilder(40); // manipulate
|
||||
final StringBuilder code = new StringBuilder(10); // output
|
||||
// handle initial 2 characters exceptions
|
||||
switch(inwd[0]) {
|
||||
case 'K':
|
||||
case 'G':
|
||||
case 'P': /* looking for KN, etc*/
|
||||
if (inwd[1] == 'N') {
|
||||
local.append(inwd, 1, inwd.length - 1);
|
||||
} else {
|
||||
local.append(inwd);
|
||||
}
|
||||
break;
|
||||
case 'A': /* looking for AE */
|
||||
if (inwd[1] == 'E') {
|
||||
local.append(inwd, 1, inwd.length - 1);
|
||||
} else {
|
||||
local.append(inwd);
|
||||
}
|
||||
break;
|
||||
case 'W': /* looking for WR or WH */
|
||||
if (inwd[1] == 'R') { // WR -> R
|
||||
local.append(inwd, 1, inwd.length - 1);
|
||||
break;
|
||||
}
|
||||
if (inwd[1] == 'H') {
|
||||
local.append(inwd, 1, inwd.length - 1);
|
||||
local.setCharAt(0, 'W'); // WH -> W
|
||||
} else {
|
||||
local.append(inwd);
|
||||
}
|
||||
break;
|
||||
case 'X': /* initial X becomes S */
|
||||
inwd[0] = 'S';
|
||||
local.append(inwd);
|
||||
break;
|
||||
default:
|
||||
local.append(inwd);
|
||||
} // now local has working string with initials fixed
|
||||
|
||||
final int wdsz = local.length();
|
||||
int n = 0;
|
||||
|
||||
while (code.length() < this.getMaxCodeLen() &&
|
||||
n < wdsz ) { // max code size of 4 works well
|
||||
final char symb = local.charAt(n);
|
||||
// remove duplicate letters except C
|
||||
if (symb != 'C' && isPreviousChar( local, n, symb ) ) {
|
||||
n++;
|
||||
} else { // not dup
|
||||
switch(symb) {
|
||||
case 'A':
|
||||
case 'E':
|
||||
case 'I':
|
||||
case 'O':
|
||||
case 'U':
|
||||
if (n == 0) {
|
||||
code.append(symb);
|
||||
}
|
||||
break; // only use vowel if leading char
|
||||
case 'B':
|
||||
if ( isPreviousChar(local, n, 'M') &&
|
||||
isLastChar(wdsz, n) ) { // B is silent if word ends in MB
|
||||
break;
|
||||
}
|
||||
code.append(symb);
|
||||
break;
|
||||
case 'C': // lots of C special cases
|
||||
/* discard if SCI, SCE or SCY */
|
||||
if ( isPreviousChar(local, n, 'S') &&
|
||||
!isLastChar(wdsz, n) &&
|
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
|
||||
break;
|
||||
}
|
||||
if (regionMatch(local, n, "CIA")) { // "CIA" -> X
|
||||
code.append('X');
|
||||
break;
|
||||
}
|
||||
if (!isLastChar(wdsz, n) &&
|
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
|
||||
code.append('S');
|
||||
break; // CI,CE,CY -> S
|
||||
}
|
||||
if (isPreviousChar(local, n, 'S') &&
|
||||
isNextChar(local, n, 'H') ) { // SCH->sk
|
||||
code.append('K');
|
||||
break;
|
||||
}
|
||||
if (isNextChar(local, n, 'H')) { // detect CH
|
||||
if (n == 0 &&
|
||||
wdsz >= 3 &&
|
||||
isVowel(local,2) ) { // CH consonant -> K consonant
|
||||
code.append('K');
|
||||
} else {
|
||||
code.append('X'); // CHvowel -> X
|
||||
}
|
||||
} else {
|
||||
code.append('K');
|
||||
}
|
||||
break;
|
||||
case 'D':
|
||||
if (!isLastChar(wdsz, n + 1) &&
|
||||
isNextChar(local, n, 'G') &&
|
||||
FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
|
||||
code.append('J'); n += 2;
|
||||
} else {
|
||||
code.append('T');
|
||||
}
|
||||
break;
|
||||
case 'G': // GH silent at end or before consonant
|
||||
if (isLastChar(wdsz, n + 1) &&
|
||||
isNextChar(local, n, 'H')) {
|
||||
break;
|
||||
}
|
||||
if (!isLastChar(wdsz, n + 1) &&
|
||||
isNextChar(local,n,'H') &&
|
||||
!isVowel(local,n+2)) {
|
||||
break;
|
||||
}
|
||||
if (n > 0 &&
|
||||
( regionMatch(local, n, "GN") ||
|
||||
regionMatch(local, n, "GNED") ) ) {
|
||||
break; // silent G
|
||||
}
|
||||
if (isPreviousChar(local, n, 'G')) {
|
||||
// NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
|
||||
hard = true;
|
||||
} else {
|
||||
hard = false;
|
||||
}
|
||||
if (!isLastChar(wdsz, n) &&
|
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
|
||||
!hard) {
|
||||
code.append('J');
|
||||
} else {
|
||||
code.append('K');
|
||||
}
|
||||
break;
|
||||
case 'H':
|
||||
if (isLastChar(wdsz, n)) {
|
||||
break; // terminal H
|
||||
}
|
||||
if (n > 0 &&
|
||||
VARSON.indexOf(local.charAt(n - 1)) >= 0) {
|
||||
break;
|
||||
}
|
||||
if (isVowel(local,n+1)) {
|
||||
code.append('H'); // Hvowel
|
||||
}
|
||||
break;
|
||||
case 'F':
|
||||
case 'J':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'R':
|
||||
code.append(symb);
|
||||
break;
|
||||
case 'K':
|
||||
if (n > 0) { // not initial
|
||||
if (!isPreviousChar(local, n, 'C')) {
|
||||
code.append(symb);
|
||||
}
|
||||
} else {
|
||||
code.append(symb); // initial K
|
||||
}
|
||||
break;
|
||||
case 'P':
|
||||
if (isNextChar(local,n,'H')) {
|
||||
// PH -> F
|
||||
code.append('F');
|
||||
} else {
|
||||
code.append(symb);
|
||||
}
|
||||
break;
|
||||
case 'Q':
|
||||
code.append('K');
|
||||
break;
|
||||
case 'S':
|
||||
if (regionMatch(local,n,"SH") ||
|
||||
regionMatch(local,n,"SIO") ||
|
||||
regionMatch(local,n,"SIA")) {
|
||||
code.append('X');
|
||||
} else {
|
||||
code.append('S');
|
||||
}
|
||||
break;
|
||||
case 'T':
|
||||
if (regionMatch(local,n,"TIA") ||
|
||||
regionMatch(local,n,"TIO")) {
|
||||
code.append('X');
|
||||
break;
|
||||
}
|
||||
if (regionMatch(local,n,"TCH")) {
|
||||
// Silent if in "TCH"
|
||||
break;
|
||||
}
|
||||
// substitute numeral 0 for TH (resembles theta after all)
|
||||
if (regionMatch(local,n,"TH")) {
|
||||
code.append('0');
|
||||
} else {
|
||||
code.append('T');
|
||||
}
|
||||
break;
|
||||
case 'V':
|
||||
code.append('F'); break;
|
||||
case 'W':
|
||||
case 'Y': // silent if not followed by vowel
|
||||
if (!isLastChar(wdsz,n) &&
|
||||
isVowel(local,n+1)) {
|
||||
code.append(symb);
|
||||
}
|
||||
break;
|
||||
case 'X':
|
||||
code.append('K');
|
||||
code.append('S');
|
||||
break;
|
||||
case 'Z':
|
||||
code.append('S');
|
||||
break;
|
||||
default:
|
||||
// do nothing
|
||||
break;
|
||||
} // end switch
|
||||
n++;
|
||||
} // end else from symb != 'C'
|
||||
if (code.length() > this.getMaxCodeLen()) {
|
||||
code.setLength(this.getMaxCodeLen());
|
||||
}
|
||||
}
|
||||
return code.toString();
|
||||
}
|
||||
|
||||
private boolean isVowel(final StringBuilder string, final int index) {
|
||||
return VOWELS.indexOf(string.charAt(index)) >= 0;
|
||||
}
|
||||
|
||||
private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
|
||||
boolean matches = false;
|
||||
if( index > 0 &&
|
||||
index < string.length() ) {
|
||||
matches = string.charAt(index - 1) == c;
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
private boolean isNextChar(final StringBuilder string, final int index, final char c) {
|
||||
boolean matches = false;
|
||||
if( index >= 0 &&
|
||||
index < string.length() - 1 ) {
|
||||
matches = string.charAt(index + 1) == c;
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
private boolean regionMatch(final StringBuilder string, final int index, final String test) {
|
||||
boolean matches = false;
|
||||
if( index >= 0 &&
|
||||
index + test.length() - 1 < string.length() ) {
|
||||
final String substring = string.substring( index, index + test.length());
|
||||
matches = substring.equals( test );
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
private boolean isLastChar(final int wdsz, final int n) {
|
||||
return n + 1 == wdsz;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Encodes an Object using the metaphone algorithm. This method
|
||||
* is provided in order to satisfy the requirements of the
|
||||
* Encoder interface, and will throw an EncoderException if the
|
||||
* supplied object is not of type java.lang.String.
|
||||
*
|
||||
* @param obj Object to encode
|
||||
* @return An object (or type java.lang.String) containing the
|
||||
* metaphone code which corresponds to the String supplied.
|
||||
* @throws EncoderException if the parameter supplied is not
|
||||
* of type java.lang.String
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
|
||||
}
|
||||
return metaphone((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the Metaphone algorithm.
|
||||
*
|
||||
* @param str String object to encode
|
||||
* @return The metaphone code corresponding to the String supplied
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) {
|
||||
return metaphone(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests is the metaphones of two strings are identical.
|
||||
*
|
||||
* @param str1 First of two strings to compare
|
||||
* @param str2 Second of two strings to compare
|
||||
* @return <code>true</code> if the metaphones of these strings are identical,
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public boolean isMetaphoneEqual(final String str1, final String str2) {
|
||||
return metaphone(str1).equals(metaphone(str2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maxCodeLen.
|
||||
* @return int
|
||||
*/
|
||||
public int getMaxCodeLen() { return this.maxCodeLen; }
|
||||
|
||||
/**
|
||||
* Sets the maxCodeLen.
|
||||
* @param maxCodeLen The maxCodeLen to set
|
||||
*/
|
||||
public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
|
||||
|
||||
}
|
|
@ -0,0 +1,319 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate similar names, but can also be used as a
|
||||
* general purpose scheme to find word with similar phonemes.
|
||||
* <p>
|
||||
* NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm.
|
||||
* <p>
|
||||
* Algorithm description:
|
||||
* <pre>
|
||||
* 1. Transcode first characters of name
|
||||
* 1a. MAC -> MCC
|
||||
* 1b. KN -> NN
|
||||
* 1c. K -> C
|
||||
* 1d. PH -> FF
|
||||
* 1e. PF -> FF
|
||||
* 1f. SCH -> SSS
|
||||
* 2. Transcode last characters of name
|
||||
* 2a. EE, IE -> Y
|
||||
* 2b. DT,RT,RD,NT,ND -> D
|
||||
* 3. First character of key = first character of name
|
||||
* 4. Transcode remaining characters by following these rules, incrementing by one character each time
|
||||
* 4a. EV -> AF else A,E,I,O,U -> A
|
||||
* 4b. Q -> G
|
||||
* 4c. Z -> S
|
||||
* 4d. M -> N
|
||||
* 4e. KN -> N else K -> C
|
||||
* 4f. SCH -> SSS
|
||||
* 4g. PH -> FF
|
||||
* 4h. H -> If previous or next is nonvowel, previous
|
||||
* 4i. W -> If previous is vowel, previous
|
||||
* 4j. Add current to key if current != last key character
|
||||
* 5. If last character is S, remove it
|
||||
* 6. If last characters are AY, replace with Y
|
||||
* 7. If last character is A, remove it
|
||||
* 8. Collapse all strings of repeated characters
|
||||
* 9. Add original first character of name as first character of key
|
||||
* </pre>
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a>
|
||||
* @see <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a>
|
||||
* @see Soundex
|
||||
* @since 1.7
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Nysiis implements StringEncoder {
|
||||
|
||||
private static final char[] CHARS_A = new char[] { 'A' };
|
||||
private static final char[] CHARS_AF = new char[] { 'A', 'F' };
|
||||
private static final char[] CHARS_C = new char[] { 'C' };
|
||||
private static final char[] CHARS_FF = new char[] { 'F', 'F' };
|
||||
private static final char[] CHARS_G = new char[] { 'G' };
|
||||
private static final char[] CHARS_N = new char[] { 'N' };
|
||||
private static final char[] CHARS_NN = new char[] { 'N', 'N' };
|
||||
private static final char[] CHARS_S = new char[] { 'S' };
|
||||
private static final char[] CHARS_SSS = new char[] { 'S', 'S', 'S' };
|
||||
|
||||
private static final Pattern PAT_MAC = Pattern.compile("^MAC");
|
||||
private static final Pattern PAT_KN = Pattern.compile("^KN");
|
||||
private static final Pattern PAT_K = Pattern.compile("^K");
|
||||
private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)");
|
||||
private static final Pattern PAT_SCH = Pattern.compile("^SCH");
|
||||
private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$");
|
||||
private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$");
|
||||
|
||||
private static final char SPACE = ' ';
|
||||
private static final int TRUE_LENGTH = 6;
|
||||
|
||||
/**
|
||||
* Tests if the given character is a vowel.
|
||||
*
|
||||
* @param c
|
||||
* the character to test
|
||||
* @return <code>true</code> if the character is a vowel, <code>false</code> otherwise
|
||||
*/
|
||||
private static boolean isVowel(final char c) {
|
||||
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcodes the remaining parts of the String. The method operates on a sliding window, looking at 4 characters at
|
||||
* a time: [i-1, i, i+1, i+2].
|
||||
*
|
||||
* @param prev
|
||||
* the previous character
|
||||
* @param curr
|
||||
* the current character
|
||||
* @param next
|
||||
* the next character
|
||||
* @param aNext
|
||||
* the after next character
|
||||
* @return a transcoded array of characters, starting from the current position
|
||||
*/
|
||||
private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) {
|
||||
// 1. EV -> AF
|
||||
if (curr == 'E' && next == 'V') {
|
||||
return CHARS_AF;
|
||||
}
|
||||
|
||||
// A, E, I, O, U -> A
|
||||
if (isVowel(curr)) {
|
||||
return CHARS_A;
|
||||
}
|
||||
|
||||
// 2. Q -> G, Z -> S, M -> N
|
||||
if (curr == 'Q') {
|
||||
return CHARS_G;
|
||||
} else if (curr == 'Z') {
|
||||
return CHARS_S;
|
||||
} else if (curr == 'M') {
|
||||
return CHARS_N;
|
||||
}
|
||||
|
||||
// 3. KN -> NN else K -> C
|
||||
if (curr == 'K') {
|
||||
if (next == 'N') {
|
||||
return CHARS_NN;
|
||||
} else {
|
||||
return CHARS_C;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. SCH -> SSS
|
||||
if (curr == 'S' && next == 'C' && aNext == 'H') {
|
||||
return CHARS_SSS;
|
||||
}
|
||||
|
||||
// PH -> FF
|
||||
if (curr == 'P' && next == 'H') {
|
||||
return CHARS_FF;
|
||||
}
|
||||
|
||||
// 5. H -> If previous or next is a non vowel, previous.
|
||||
if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) {
|
||||
return new char[] { prev };
|
||||
}
|
||||
|
||||
// 6. W -> If previous is vowel, previous.
|
||||
if (curr == 'W' && isVowel(prev)) {
|
||||
return new char[] { prev };
|
||||
}
|
||||
|
||||
return new char[] { curr };
|
||||
}
|
||||
|
||||
/** Indicates the strict mode. */
|
||||
private final boolean strict;
|
||||
|
||||
/**
|
||||
* Creates an instance of the {@link Nysiis} encoder with strict mode (original form),
|
||||
* i.e. encoded strings have a maximum length of 6.
|
||||
*/
|
||||
public Nysiis() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of the {@link Nysiis} encoder with the specified strict mode:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>true</code>: encoded strings have a maximum length of 6</li>
|
||||
* <li><code>false</code>: encoded strings may have arbitrary length</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param strict
|
||||
* the strict mode
|
||||
*/
|
||||
public Nysiis(final boolean strict) {
|
||||
this.strict = strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the NYSIIS algorithm. This method is provided in order to satisfy the requirements of the
|
||||
* Encoder interface, and will throw an {@link EncoderException} if the supplied object is not of type
|
||||
* {@link String}.
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (or a {@link String}) containing the NYSIIS code which corresponds to the given String.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of a {@link String}
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String");
|
||||
}
|
||||
return this.nysiis((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the NYSIIS algorithm.
|
||||
*
|
||||
* @param str
|
||||
* A String object to encode
|
||||
* @return A Nysiis code corresponding to the String supplied
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) {
|
||||
return this.nysiis(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates the strict mode for this {@link Nysiis} encoder.
|
||||
*
|
||||
* @return <code>true</code> if the encoder is configured for strict mode, <code>false</code> otherwise
|
||||
*/
|
||||
public boolean isStrict() {
|
||||
return this.strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the NYSIIS code for a given String object.
|
||||
*
|
||||
* @param str
|
||||
* String to encode using the NYSIIS algorithm
|
||||
* @return A NYSIIS code for the String supplied
|
||||
*/
|
||||
public String nysiis(String str) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Use the same clean rules as Soundex
|
||||
str = SoundexUtils.clean(str);
|
||||
|
||||
if (str.length() == 0) {
|
||||
return str;
|
||||
}
|
||||
|
||||
// Translate first characters of name:
|
||||
// MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
|
||||
str = PAT_MAC.matcher(str).replaceFirst("MCC");
|
||||
str = PAT_KN.matcher(str).replaceFirst("NN");
|
||||
str = PAT_K.matcher(str).replaceFirst("C");
|
||||
str = PAT_PH_PF.matcher(str).replaceFirst("FF");
|
||||
str = PAT_SCH.matcher(str).replaceFirst("SSS");
|
||||
|
||||
// Translate last characters of name:
|
||||
// EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
|
||||
str = PAT_EE_IE.matcher(str).replaceFirst("Y");
|
||||
str = PAT_DT_ETC.matcher(str).replaceFirst("D");
|
||||
|
||||
// First character of key = first character of name.
|
||||
final StringBuilder key = new StringBuilder(str.length());
|
||||
key.append(str.charAt(0));
|
||||
|
||||
// Transcode remaining characters, incrementing by one character each time
|
||||
final char[] chars = str.toCharArray();
|
||||
final int len = chars.length;
|
||||
|
||||
for (int i = 1; i < len; i++) {
|
||||
final char next = i < len - 1 ? chars[i + 1] : SPACE;
|
||||
final char aNext = i < len - 2 ? chars[i + 2] : SPACE;
|
||||
final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext);
|
||||
System.arraycopy(transcoded, 0, chars, i, transcoded.length);
|
||||
|
||||
// only append the current char to the key if it is different from the last one
|
||||
if (chars[i] != chars[i - 1]) {
|
||||
key.append(chars[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (key.length() > 1) {
|
||||
char lastChar = key.charAt(key.length() - 1);
|
||||
|
||||
// If last character is S, remove it.
|
||||
if (lastChar == 'S') {
|
||||
key.deleteCharAt(key.length() - 1);
|
||||
lastChar = key.charAt(key.length() - 1);
|
||||
}
|
||||
|
||||
if (key.length() > 2) {
|
||||
final char last2Char = key.charAt(key.length() - 2);
|
||||
// If last characters are AY, replace with Y.
|
||||
if (last2Char == 'A' && lastChar == 'Y') {
|
||||
key.deleteCharAt(key.length() - 2);
|
||||
}
|
||||
}
|
||||
|
||||
// If last character is A, remove it.
|
||||
if (lastChar == 'A') {
|
||||
key.deleteCharAt(key.length() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
final String string = key.toString();
|
||||
return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Refined Soundex value. A refined soundex code is
|
||||
* optimized for spell checking words. Soundex method originally developed by
|
||||
* <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class RefinedSoundex implements StringEncoder {
|
||||
|
||||
/**
|
||||
* @since 1.4
|
||||
*/
|
||||
public static final String US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";
|
||||
|
||||
/**
|
||||
* RefinedSoundex is *refined* for a number of reasons one being that the
|
||||
* mappings have been altered. This implementation contains default
|
||||
* mappings for US English.
|
||||
*/
|
||||
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray();
|
||||
|
||||
/**
|
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char
|
||||
* array holds the values to which each letter is mapped. This
|
||||
* implementation contains a default map for US_ENGLISH
|
||||
*/
|
||||
private final char[] soundexMapping;
|
||||
|
||||
/**
|
||||
* This static variable contains an instance of the RefinedSoundex using
|
||||
* the US_ENGLISH mapping.
|
||||
*/
|
||||
public static final RefinedSoundex US_ENGLISH = new RefinedSoundex();
|
||||
|
||||
/**
|
||||
* Creates an instance of the RefinedSoundex object using the default US
|
||||
* English mapping.
|
||||
*/
|
||||
public RefinedSoundex() {
|
||||
this.soundexMapping = US_ENGLISH_MAPPING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a refined soundex instance using a custom mapping. This
|
||||
* constructor can be used to customize the mapping, and/or possibly
|
||||
* provide an internationalized mapping for a non-Western character set.
|
||||
*
|
||||
* @param mapping
|
||||
* Mapping array to use when finding the corresponding code for
|
||||
* a given character
|
||||
*/
|
||||
public RefinedSoundex(final char[] mapping) {
|
||||
this.soundexMapping = new char[mapping.length];
|
||||
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping,
|
||||
* and/or possibly provide an internationalized mapping for a non-Western character set.
|
||||
*
|
||||
* @param mapping
|
||||
* Mapping string to use when finding the corresponding code for a given character
|
||||
* @since 1.4
|
||||
*/
|
||||
public RefinedSoundex(final String mapping) {
|
||||
this.soundexMapping = mapping.toCharArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of characters in the two encoded Strings that are the
|
||||
* same. This return value ranges from 0 to the length of the shortest
|
||||
* encoded String: 0 indicates little or no similarity, and 4 out of 4 (for
|
||||
* example) indicates strong similarity or identical values. For refined
|
||||
* Soundex, the return value can be greater than 4.
|
||||
*
|
||||
* @param s1
|
||||
* A String that will be encoded and compared.
|
||||
* @param s2
|
||||
* A String that will be encoded and compared.
|
||||
* @return The number of characters in the two encoded Strings that are the
|
||||
* same from 0 to to the length of the shortest encoded String.
|
||||
*
|
||||
* @see SoundexUtils#difference(StringEncoder,String,String)
|
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
|
||||
* MS T-SQL DIFFERENCE</a>
|
||||
*
|
||||
* @throws EncoderException
|
||||
* if an error occurs encoding one of the strings
|
||||
* @since 1.3
|
||||
*/
|
||||
public int difference(final String s1, final String s2) throws EncoderException {
|
||||
return SoundexUtils.difference(this, s1, s2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the refined soundex algorithm. This method is
|
||||
* provided in order to satisfy the requirements of the Encoder interface,
|
||||
* and will throw an EncoderException if the supplied object is not of type
|
||||
* java.lang.String.
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (or type java.lang.String) containing the refined
|
||||
* soundex code which corresponds to the String supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String");
|
||||
}
|
||||
return soundex((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the refined soundex algorithm.
|
||||
*
|
||||
* @param str
|
||||
* A String object to encode
|
||||
* @return A Soundex code corresponding to the String supplied
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) {
|
||||
return soundex(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the mapping code for a given character. The mapping codes are
|
||||
* maintained in an internal char array named soundexMapping, and the
|
||||
* default values of these mappings are US English.
|
||||
*
|
||||
* @param c
|
||||
* char to get mapping for
|
||||
* @return A character (really a numeral) to return for the given char
|
||||
*/
|
||||
char getMappingCode(final char c) {
|
||||
if (!Character.isLetter(c)) {
|
||||
return 0;
|
||||
}
|
||||
return this.soundexMapping[Character.toUpperCase(c) - 'A'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the Refined Soundex code for a given String object.
|
||||
*
|
||||
* @param str
|
||||
* String to encode using the Refined Soundex algorithm
|
||||
* @return A soundex code for the String supplied
|
||||
*/
|
||||
public String soundex(String str) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
str = SoundexUtils.clean(str);
|
||||
if (str.length() == 0) {
|
||||
return str;
|
||||
}
|
||||
|
||||
final StringBuilder sBuf = new StringBuilder();
|
||||
sBuf.append(str.charAt(0));
|
||||
|
||||
char last, current;
|
||||
last = '*';
|
||||
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
|
||||
current = getMappingCode(str.charAt(i));
|
||||
if (current == last) {
|
||||
continue;
|
||||
} else if (current != 0) {
|
||||
sBuf.append(current);
|
||||
}
|
||||
|
||||
last = current;
|
||||
|
||||
}
|
||||
|
||||
return sBuf.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a
|
||||
* general purpose scheme to find word with similar phonemes.
|
||||
*
|
||||
* This class is thread-safe.
|
||||
* Although not strictly immutable, the {@link #maxLength} field is not actually used.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Soundex implements StringEncoder {
|
||||
|
||||
/**
|
||||
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
|
||||
* means do not encode.
|
||||
* <p>
|
||||
* (This constant is provided as both an implementation convenience and to allow Javadoc to pick
|
||||
* up the value for the constant values page.)
|
||||
* </p>
|
||||
*
|
||||
* @see #US_ENGLISH_MAPPING
|
||||
*/
|
||||
public static final String US_ENGLISH_MAPPING_STRING = "0123012#02245501262301#202";
|
||||
|
||||
/**
|
||||
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
|
||||
* means do not encode.
|
||||
*
|
||||
* @see Soundex#Soundex(char[])
|
||||
*/
|
||||
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray();
|
||||
|
||||
/**
|
||||
* An instance of Soundex using the US_ENGLISH_MAPPING mapping.
|
||||
*
|
||||
* @see #US_ENGLISH_MAPPING
|
||||
*/
|
||||
public static final Soundex US_ENGLISH = new Soundex();
|
||||
|
||||
/**
|
||||
* The maximum length of a Soundex code - Soundex codes are only four characters by definition.
|
||||
*
|
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
private int maxLength = 4;
|
||||
|
||||
/**
|
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
|
||||
* letter is mapped. This implementation contains a default map for US_ENGLISH
|
||||
*/
|
||||
private final char[] soundexMapping;
|
||||
|
||||
/**
|
||||
* Creates an instance using US_ENGLISH_MAPPING
|
||||
*
|
||||
* @see Soundex#Soundex(char[])
|
||||
* @see Soundex#US_ENGLISH_MAPPING
|
||||
*/
|
||||
public Soundex() {
|
||||
this.soundexMapping = US_ENGLISH_MAPPING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized
|
||||
* mapping for a non-Western character set.
|
||||
*
|
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
|
||||
* letter is mapped. This implementation contains a default map for US_ENGLISH
|
||||
*
|
||||
* @param mapping
|
||||
* Mapping array to use when finding the corresponding code for a given character
|
||||
*/
|
||||
public Soundex(final char[] mapping) {
|
||||
this.soundexMapping = new char[mapping.length];
|
||||
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping,
|
||||
* and/or possibly provide an internationalized mapping for a non-Western character set.
|
||||
*
|
||||
* @param mapping
|
||||
* Mapping string to use when finding the corresponding code for a given character
|
||||
* @since 1.4
|
||||
*/
|
||||
public Soundex(final String mapping) {
|
||||
this.soundexMapping = mapping.toCharArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This
|
||||
* return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or
|
||||
* identical values.
|
||||
*
|
||||
* @param s1
|
||||
* A String that will be encoded and compared.
|
||||
* @param s2
|
||||
* A String that will be encoded and compared.
|
||||
* @return The number of characters in the two encoded Strings that are the same from 0 to 4.
|
||||
*
|
||||
* @see SoundexUtils#difference(StringEncoder,String,String)
|
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS
|
||||
* T-SQL DIFFERENCE </a>
|
||||
*
|
||||
* @throws EncoderException
|
||||
* if an error occurs encoding one of the strings
|
||||
* @since 1.3
|
||||
*/
|
||||
public int difference(final String s1, final String s2) throws EncoderException {
|
||||
return SoundexUtils.difference(this, s1, s2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of
|
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
|
||||
*
|
||||
* @param obj
|
||||
* Object to encode
|
||||
* @return An object (or type java.lang.String) containing the soundex code which corresponds to the String
|
||||
* supplied.
|
||||
* @throws EncoderException
|
||||
* if the parameter supplied is not of type java.lang.String
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (!(obj instanceof String)) {
|
||||
throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String");
|
||||
}
|
||||
return soundex((String) obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a String using the soundex algorithm.
|
||||
*
|
||||
* @param str
|
||||
* A String object to encode
|
||||
* @return A Soundex code corresponding to the String supplied
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) {
|
||||
return soundex(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maxLength. Standard Soundex
|
||||
*
|
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
|
||||
* @return int
|
||||
*/
|
||||
@Deprecated
|
||||
public int getMaxLength() {
|
||||
return this.maxLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the soundex mapping.
|
||||
*
|
||||
* @return soundexMapping.
|
||||
*/
|
||||
private char[] getSoundexMapping() {
|
||||
return this.soundexMapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps the given upper-case character to its Soundex code.
|
||||
*
|
||||
* @param ch
|
||||
* An upper-case character.
|
||||
* @return A Soundex code.
|
||||
* @throws IllegalArgumentException
|
||||
* Thrown if <code>ch</code> is not mapped.
|
||||
*/
|
||||
private char map(final char ch) {
|
||||
final int index = ch - 'A';
|
||||
if (index < 0 || index >= this.getSoundexMapping().length) {
|
||||
throw new IllegalArgumentException("The character is not mapped: " + ch);
|
||||
}
|
||||
return this.getSoundexMapping()[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maxLength.
|
||||
*
|
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
|
||||
* @param maxLength
|
||||
* The maxLength to set
|
||||
*/
|
||||
@Deprecated
|
||||
public void setMaxLength(final int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the Soundex code for a given String object.
|
||||
*
|
||||
* @param str
|
||||
* String to encode using the Soundex algorithm
|
||||
* @return A soundex code for the String supplied
|
||||
* @throws IllegalArgumentException
|
||||
* if a character is not mapped
|
||||
*/
|
||||
public String soundex(String str) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
str = SoundexUtils.clean(str);
|
||||
if (str.length() == 0) {
|
||||
return str;
|
||||
}
|
||||
final char out[] = {'0', '0', '0', '0'};
|
||||
char last, mapped;
|
||||
int incount = 1, count = 1;
|
||||
out[0] = str.charAt(0);
|
||||
// map() throws IllegalArgumentException
|
||||
last = this.map(str.charAt(0));
|
||||
while (incount < str.length() && count < out.length) {
|
||||
mapped = this.map(str.charAt(incount++));
|
||||
if (mapped == '0') {
|
||||
last = mapped;
|
||||
} else if (mapped != '#' && mapped != last) {
|
||||
out[count++] = mapped;
|
||||
last = mapped;
|
||||
}
|
||||
}
|
||||
return new String(out);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Utility methods for {@link Soundex} and {@link RefinedSoundex} classes.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.3
|
||||
*/
|
||||
final class SoundexUtils {
|
||||
|
||||
/**
|
||||
* Cleans up the input string before Soundex processing by only returning
|
||||
* upper case letters.
|
||||
*
|
||||
* @param str
|
||||
* The String to clean.
|
||||
* @return A clean String.
|
||||
*/
|
||||
static String clean(final String str) {
|
||||
if (str == null || str.length() == 0) {
|
||||
return str;
|
||||
}
|
||||
final int len = str.length();
|
||||
final char[] chars = new char[len];
|
||||
int count = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (Character.isLetter(str.charAt(i))) {
|
||||
chars[count++] = str.charAt(i);
|
||||
}
|
||||
}
|
||||
if (count == len) {
|
||||
return str.toUpperCase(java.util.Locale.ENGLISH);
|
||||
}
|
||||
return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the Strings and returns the number of characters in the two
|
||||
* encoded Strings that are the same.
|
||||
* <ul>
|
||||
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
|
||||
* little or no similarity, and 4 indicates strong similarity or identical
|
||||
* values.</li>
|
||||
* <li>For refined Soundex, the return value can be greater than 4.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param encoder
|
||||
* The encoder to use to encode the Strings.
|
||||
* @param s1
|
||||
* A String that will be encoded and compared.
|
||||
* @param s2
|
||||
* A String that will be encoded and compared.
|
||||
* @return The number of characters in the two Soundex encoded Strings that
|
||||
* are the same.
|
||||
*
|
||||
* @see #differenceEncoded(String,String)
|
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
|
||||
* MS T-SQL DIFFERENCE</a>
|
||||
*
|
||||
* @throws EncoderException
|
||||
* if an error occurs encoding one of the strings
|
||||
*/
|
||||
static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException {
|
||||
return differenceEncoded(encoder.encode(s1), encoder.encode(s2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of characters in the two Soundex encoded Strings that
|
||||
* are the same.
|
||||
* <ul>
|
||||
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
|
||||
* little or no similarity, and 4 indicates strong similarity or identical
|
||||
* values.</li>
|
||||
* <li>For refined Soundex, the return value can be greater than 4.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param es1
|
||||
* An encoded String.
|
||||
* @param es2
|
||||
* An encoded String.
|
||||
* @return The number of characters in the two Soundex encoded Strings that
|
||||
* are the same.
|
||||
*
|
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
|
||||
* MS T-SQL DIFFERENCE</a>
|
||||
*/
|
||||
static int differenceEncoded(final String es1, final String es2) {
|
||||
|
||||
if (es1 == null || es2 == null) {
|
||||
return 0;
|
||||
}
|
||||
final int lengthToMatch = Math.min(es1.length(), es2.length());
|
||||
int diff = 0;
|
||||
for (int i = 0; i < lengthToMatch; i++) {
|
||||
if (es1.charAt(i) == es2.charAt(i)) {
|
||||
diff++;
|
||||
}
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Encodes strings into their Beider-Morse phonetic encoding.
|
||||
* <p>
|
||||
* Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range of
|
||||
* words.
|
||||
* <p>
|
||||
* This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it is mutable,
|
||||
* and may not be thread-safe. If you require a guaranteed thread-safe encoding then use {@link PhoneticEngine}
|
||||
* directly.
|
||||
* <p>
|
||||
* <b>Encoding overview</b>
|
||||
* <p>
|
||||
* Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what
|
||||
* language the word comes from. For example, if it ends in "<code>ault</code>" then it infers that the word is French.
|
||||
* Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some runs of
|
||||
* letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up into phonemes at
|
||||
* different places, so this stage results in a set of possible language-specific phonetic representations. Lastly, this
|
||||
* language-specific phonetic representation is processed by a table of rules that re-writes it phonetically taking into
|
||||
* account systematic pronunciation differences between languages, to move it towards a pan-indo-european phonetic
|
||||
* representation. Again, sometimes there are multiple ways this could be done and sometimes things that can be
|
||||
* pronounced in several ways in the source language have only one way to represent them in this average phonetic
|
||||
* language, so the result is again a set of phonetic spellings.
|
||||
* <p>
|
||||
* Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated. In
|
||||
* this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final encoding.
|
||||
* Secondly, some names have standard prefixes, for example, "<code>Mac/Mc</code>" in Scottish (English) names. As
|
||||
* sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word is encoded once
|
||||
* with the prefix and once without it. The resulting encoding contains one and then the other result.
|
||||
* <p>
|
||||
* <b>Encoding format</b>
|
||||
* <p>
|
||||
* Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where there
|
||||
* are multiple possible phonetic representations, these are joined with a pipe (<code>|</code>) character. If multiple
|
||||
* hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed in elipses and
|
||||
* these blocks are then joined with hyphens. For example, "<code>d'ortley</code>" has a possible prefix. The form
|
||||
* without prefix encodes to "<code>ortlaj|ortlej</code>", while the form with prefix encodes to "
|
||||
* <code>dortlaj|dortlej</code>". Thus, the full, combined encoding is "<code>(ortlaj|ortlej)-(dortlaj|dortlej)</code>".
|
||||
* <p>
|
||||
* The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many
|
||||
* potential phonetic interpretations. For example, "<code>Renault</code>" encodes to "
|
||||
* <code>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</code>". The <code>APPROX</code> rules will tend to produce larger
|
||||
* encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word.
|
||||
* Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
|
||||
* splitting on pipe (<code>|</code>) and indexing under each of these alternatives.
|
||||
* <p>
|
||||
* <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation.
|
||||
*
|
||||
* @see <a href="http://stevemorse.org/phonetics/bmpm.htm">Beider-Morse Phonetic Matching</a>
|
||||
* @see <a href="http://stevemorse.org/phoneticinfo.htm">Reference implementation</a>
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BeiderMorseEncoder implements StringEncoder {
|
||||
// Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration
|
||||
// of an immutable PhoneticEngine instance that will be delegated to for the actual encoding.
|
||||
|
||||
// a cached object
|
||||
private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
|
||||
|
||||
@Override
|
||||
public Object encode(final Object source) throws EncoderException {
|
||||
if (!(source instanceof String)) {
|
||||
throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String");
|
||||
}
|
||||
return encode((String) source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String encode(final String source) throws EncoderException {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
return this.engine.encode(source);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name type currently in operation.
|
||||
*
|
||||
* @return the NameType currently being used
|
||||
*/
|
||||
public NameType getNameType() {
|
||||
return this.engine.getNameType();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the rule type currently in operation.
|
||||
*
|
||||
* @return the RuleType currently being used
|
||||
*/
|
||||
public RuleType getRuleType() {
|
||||
return this.engine.getRuleType();
|
||||
}
|
||||
|
||||
/**
|
||||
* Discovers if multiple possible encodings are concatenated.
|
||||
*
|
||||
* @return true if multiple encodings are concatenated, false if just the first one is returned
|
||||
*/
|
||||
public boolean isConcat() {
|
||||
return this.engine.isConcat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets how multiple possible phonetic encodings are combined.
|
||||
*
|
||||
* @param concat
|
||||
* true if multiple encodings are to be combined with a '|', false if just the first one is
|
||||
* to be considered
|
||||
*/
|
||||
public void setConcat(final boolean concat) {
|
||||
this.engine = new PhoneticEngine(this.engine.getNameType(),
|
||||
this.engine.getRuleType(),
|
||||
concat,
|
||||
this.engine.getMaxPhonemes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the type of name. Use {@link NameType#GENERIC} unless you specifically want phonetic encodings
|
||||
* optimized for Ashkenazi or Sephardic Jewish family names.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType in use
|
||||
*/
|
||||
public void setNameType(final NameType nameType) {
|
||||
this.engine = new PhoneticEngine(nameType,
|
||||
this.engine.getRuleType(),
|
||||
this.engine.isConcat(),
|
||||
this.engine.getMaxPhonemes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered.
|
||||
*
|
||||
* @param ruleType
|
||||
* {@link RuleType#APPROX} or {@link RuleType#EXACT} for approximate or exact phonetic matches
|
||||
*/
|
||||
public void setRuleType(final RuleType ruleType) {
|
||||
this.engine = new PhoneticEngine(this.engine.getNameType(),
|
||||
ruleType,
|
||||
this.engine.isConcat(),
|
||||
this.engine.getMaxPhonemes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of maximum of phonemes that shall be considered by the engine.
|
||||
*
|
||||
* @param maxPhonemes
|
||||
* the maximum number of phonemes returned by the engine
|
||||
* @since 1.7
|
||||
*/
|
||||
public void setMaxPhonemes(final int maxPhonemes) {
|
||||
this.engine = new PhoneticEngine(this.engine.getNameType(),
|
||||
this.engine.getRuleType(),
|
||||
this.engine.isConcat(),
|
||||
maxPhonemes);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,231 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Language guessing utility.
|
||||
* <p>
|
||||
* This class encapsulates rules used to guess the possible languages that a word originates from. This is
|
||||
* done by reference to a whole series of rules distributed in resource files.
|
||||
* <p>
|
||||
* Instances of this class are typically managed through the static factory method instance().
|
||||
* Unless you are developing your own language guessing rules, you will not need to interact with this class directly.
|
||||
* <p>
|
||||
* This class is intended to be immutable and thread-safe.
|
||||
* <p>
|
||||
* <b>Lang resources</b>
|
||||
* <p>
|
||||
* Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files.
|
||||
* They are systematically named following the pattern:
|
||||
* <blockquote>org/apache/commons/codec/language/bm/lang.txt</blockquote>
|
||||
* The format of these resources is the following:
|
||||
* <ul>
|
||||
* <li><b>Rules:</b> whitespace separated strings.
|
||||
* There should be 3 columns to each row, and these will be interpreted as:
|
||||
* <ol>
|
||||
* <li>pattern: a regular expression.</li>
|
||||
* <li>languages: a '+'-separated list of languages.</li>
|
||||
* <li>acceptOnMatch: 'true' or 'false' indicating if a match rules in or rules out the language.</li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
|
||||
* discarded as a comment.</li>
|
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode.
|
||||
* This will skip all content until a line ending in '*' and '/' is found.</li>
|
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Port of lang.php
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Lang {
|
||||
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
|
||||
// exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that
|
||||
// encapsulate a particular language-guessing rule table and the language guessing itself.
|
||||
//
|
||||
// It may make sense in the future to expose the private constructor to allow power users to build custom language-
|
||||
// guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users
|
||||
// should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances.
|
||||
|
||||
private static final class LangRule {
|
||||
private final boolean acceptOnMatch;
|
||||
private final Set<String> languages;
|
||||
private final Pattern pattern;
|
||||
|
||||
private LangRule(final Pattern pattern, final Set<String> languages, final boolean acceptOnMatch) {
|
||||
this.pattern = pattern;
|
||||
this.languages = languages;
|
||||
this.acceptOnMatch = acceptOnMatch;
|
||||
}
|
||||
|
||||
public boolean matches(final String txt) {
|
||||
return this.pattern.matcher(txt).find();
|
||||
}
|
||||
}
|
||||
|
||||
private static final Map<NameType, Lang> Langs = new EnumMap<NameType, Lang>(NameType.class);
|
||||
|
||||
private static final String LANGUAGE_RULES_RN = "org/apache/commons/codec/language/bm/%s_lang.txt";
|
||||
|
||||
static {
|
||||
for (final NameType s : NameType.values()) {
|
||||
Langs.put(s, loadFromResource(String.format(LANGUAGE_RULES_RN, s.getName()), Languages.getInstance(s)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a Lang instance for one of the supported NameTypes.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType to look up
|
||||
* @return a Lang encapsulating the language guessing rules for that name type
|
||||
*/
|
||||
public static Lang instance(final NameType nameType) {
|
||||
return Langs.get(nameType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads language rules from a resource.
|
||||
* <p>
|
||||
* In normal use, you will obtain instances of Lang through the {@link #instance(NameType)} method.
|
||||
* You will only need to call this yourself if you are developing custom language mapping rules.
|
||||
*
|
||||
* @param languageRulesResourceName
|
||||
* the fully-qualified resource name to load
|
||||
* @param languages
|
||||
* the languages that these rules will support
|
||||
* @return a Lang encapsulating the loaded language-guessing rules.
|
||||
*/
|
||||
public static Lang loadFromResource(final String languageRulesResourceName, final Languages languages) {
|
||||
final List<LangRule> rules = new ArrayList<LangRule>();
|
||||
final InputStream lRulesIS = Lang.class.getClassLoader().getResourceAsStream(languageRulesResourceName);
|
||||
|
||||
if (lRulesIS == null) {
|
||||
throw new IllegalStateException("Unable to resolve required resource:" + LANGUAGE_RULES_RN);
|
||||
}
|
||||
|
||||
final Scanner scanner = new Scanner(lRulesIS, ResourceConstants.ENCODING);
|
||||
try {
|
||||
boolean inExtendedComment = false;
|
||||
while (scanner.hasNextLine()) {
|
||||
final String rawLine = scanner.nextLine();
|
||||
String line = rawLine;
|
||||
if (inExtendedComment) {
|
||||
// check for closing comment marker, otherwise discard doc comment line
|
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
|
||||
inExtendedComment = false;
|
||||
}
|
||||
} else {
|
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
|
||||
inExtendedComment = true;
|
||||
} else {
|
||||
// discard comments
|
||||
final int cmtI = line.indexOf(ResourceConstants.CMT);
|
||||
if (cmtI >= 0) {
|
||||
line = line.substring(0, cmtI);
|
||||
}
|
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim();
|
||||
|
||||
if (line.length() == 0) {
|
||||
continue; // empty lines can be safely skipped
|
||||
}
|
||||
|
||||
// split it up
|
||||
final String[] parts = line.split("\\s+");
|
||||
|
||||
if (parts.length != 3) {
|
||||
throw new IllegalArgumentException("Malformed line '" + rawLine +
|
||||
"' in language resource '" + languageRulesResourceName + "'");
|
||||
}
|
||||
|
||||
final Pattern pattern = Pattern.compile(parts[0]);
|
||||
final String[] langs = parts[1].split("\\+");
|
||||
final boolean accept = parts[2].equals("true");
|
||||
|
||||
rules.add(new LangRule(pattern, new HashSet<String>(Arrays.asList(langs)), accept));
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
scanner.close();
|
||||
}
|
||||
return new Lang(rules, languages);
|
||||
}
|
||||
|
||||
private final Languages languages;
|
||||
private final List<LangRule> rules;
|
||||
|
||||
private Lang(final List<LangRule> rules, final Languages languages) {
|
||||
this.rules = Collections.unmodifiableList(rules);
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Guesses the language of a word.
|
||||
*
|
||||
* @param text
|
||||
* the word
|
||||
* @return the language that the word originates from or {@link Languages#ANY} if there was no unique match
|
||||
*/
|
||||
public String guessLanguage(final String text) {
|
||||
final Languages.LanguageSet ls = guessLanguages(text);
|
||||
return ls.isSingleton() ? ls.getAny() : Languages.ANY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Guesses the languages of a word.
|
||||
*
|
||||
* @param input
|
||||
* the word
|
||||
* @return a Set of Strings of language names that are potential matches for the input word
|
||||
*/
|
||||
public Languages.LanguageSet guessLanguages(final String input) {
|
||||
final String text = input.toLowerCase(Locale.ENGLISH);
|
||||
|
||||
final Set<String> langs = new HashSet<String>(this.languages.getLanguages());
|
||||
for (final LangRule rule : this.rules) {
|
||||
if (rule.matches(text)) {
|
||||
if (rule.acceptOnMatch) {
|
||||
langs.retainAll(rule.languages);
|
||||
} else {
|
||||
langs.removeAll(rule.languages);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Languages.LanguageSet ls = Languages.LanguageSet.from(langs);
|
||||
return ls.equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Language codes.
|
||||
* <p>
|
||||
* Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are
|
||||
* systematically named following the pattern:
|
||||
* <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote>
|
||||
* <p>
|
||||
* The format of these resources is the following:
|
||||
* <ul>
|
||||
* <li><b>Language:</b> a single string containing no whitespace</li>
|
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
|
||||
* discarded as a comment.</li>
|
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode.
|
||||
* This will skip all content until a line ending in '*' and '/' is found.</li>
|
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Ported from language.php
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Languages {
|
||||
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
|
||||
// exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
|
||||
// languages, and a second part that provides instance methods for accessing this set for supported languages.
|
||||
|
||||
/**
|
||||
* A set of languages.
|
||||
*/
|
||||
public static abstract class LanguageSet {
|
||||
|
||||
public static LanguageSet from(final Set<String> langs) {
|
||||
return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);
|
||||
}
|
||||
|
||||
public abstract boolean contains(String language);
|
||||
|
||||
public abstract String getAny();
|
||||
|
||||
public abstract boolean isEmpty();
|
||||
|
||||
public abstract boolean isSingleton();
|
||||
|
||||
public abstract LanguageSet restrictTo(LanguageSet other);
|
||||
|
||||
abstract LanguageSet merge(LanguageSet other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Some languages, explicitly enumerated.
|
||||
*/
|
||||
public static final class SomeLanguages extends LanguageSet {
|
||||
private final Set<String> languages;
|
||||
|
||||
private SomeLanguages(final Set<String> languages) {
|
||||
this.languages = Collections.unmodifiableSet(languages);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(final String language) {
|
||||
return this.languages.contains(language);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAny() {
|
||||
return this.languages.iterator().next();
|
||||
}
|
||||
|
||||
public Set<String> getLanguages() {
|
||||
return this.languages;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return this.languages.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSingleton() {
|
||||
return this.languages.size() == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet restrictTo(final LanguageSet other) {
|
||||
if (other == NO_LANGUAGES) {
|
||||
return other;
|
||||
} else if (other == ANY_LANGUAGE) {
|
||||
return this;
|
||||
} else {
|
||||
final SomeLanguages sl = (SomeLanguages) other;
|
||||
final Set<String> ls = new HashSet<String>(Math.min(languages.size(), sl.languages.size()));
|
||||
for (String lang : languages) {
|
||||
if (sl.languages.contains(lang)) {
|
||||
ls.add(lang);
|
||||
}
|
||||
}
|
||||
return from(ls);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet merge(final LanguageSet other) {
|
||||
if (other == NO_LANGUAGES) {
|
||||
return this;
|
||||
} else if (other == ANY_LANGUAGE) {
|
||||
return other;
|
||||
} else {
|
||||
final SomeLanguages sl = (SomeLanguages) other;
|
||||
final Set<String> ls = new HashSet<String>(languages);
|
||||
for (String lang : sl.languages) {
|
||||
ls.add(lang);
|
||||
}
|
||||
return from(ls);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Languages(" + languages.toString() + ")";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static final String ANY = "any";
|
||||
|
||||
private static final Map<NameType, Languages> LANGUAGES = new EnumMap<NameType, Languages>(NameType.class);
|
||||
|
||||
static {
|
||||
for (final NameType s : NameType.values()) {
|
||||
LANGUAGES.put(s, getInstance(langResourceName(s)));
|
||||
}
|
||||
}
|
||||
|
||||
public static Languages getInstance(final NameType nameType) {
|
||||
return LANGUAGES.get(nameType);
|
||||
}
|
||||
|
||||
public static Languages getInstance(final String languagesResourceName) {
|
||||
// read languages list
|
||||
final Set<String> ls = new HashSet<String>();
|
||||
final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName);
|
||||
|
||||
if (langIS == null) {
|
||||
throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName);
|
||||
}
|
||||
|
||||
final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING);
|
||||
try {
|
||||
boolean inExtendedComment = false;
|
||||
while (lsScanner.hasNextLine()) {
|
||||
final String line = lsScanner.nextLine().trim();
|
||||
if (inExtendedComment) {
|
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
|
||||
inExtendedComment = false;
|
||||
}
|
||||
} else {
|
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
|
||||
inExtendedComment = true;
|
||||
} else if (line.length() > 0) {
|
||||
ls.add(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
lsScanner.close();
|
||||
}
|
||||
|
||||
return new Languages(Collections.unmodifiableSet(ls));
|
||||
}
|
||||
|
||||
private static String langResourceName(final NameType nameType) {
|
||||
return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName());
|
||||
}
|
||||
|
||||
private final Set<String> languages;
|
||||
|
||||
/**
|
||||
* No languages at all.
|
||||
*/
|
||||
public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
|
||||
@Override
|
||||
public boolean contains(final String language) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAny() {
|
||||
throw new NoSuchElementException("Can't fetch any language from the empty language set.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSingleton() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet restrictTo(final LanguageSet other) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet merge(final LanguageSet other) {
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NO_LANGUAGES";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Any/all languages.
|
||||
*/
|
||||
public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
|
||||
@Override
|
||||
public boolean contains(final String language) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAny() {
|
||||
throw new NoSuchElementException("Can't fetch any language from the any language set.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSingleton() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet restrictTo(final LanguageSet other) {
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LanguageSet merge(final LanguageSet other) {
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ANY_LANGUAGE";
|
||||
}
|
||||
};
|
||||
|
||||
private Languages(final Set<String> languages) {
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
public Set<String> getLanguages() {
|
||||
return this.languages;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
/**
|
||||
* Supported types of names. Unless you are matching particular family names, use {@link #GENERIC}. The
|
||||
* <code>GENERIC</code> NameType should work reasonably well for non-name words. The other encodings are
|
||||
* specifically tuned to family names, and may not work well at all for general text.
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public enum NameType {
|
||||
|
||||
/** Ashkenazi family names */
|
||||
ASHKENAZI("ash"),
|
||||
|
||||
/** Generic names and words */
|
||||
GENERIC("gen"),
|
||||
|
||||
/** Sephardic family names */
|
||||
SEPHARDIC("sep");
|
||||
|
||||
private final String name;
|
||||
|
||||
NameType(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the short version of the name type.
|
||||
*
|
||||
* @return the NameType short string
|
||||
*/
|
||||
public String getName() {
|
||||
return this.name;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
|
||||
import org.apache.commons.codec.language.bm.Rule.Phoneme;
|
||||
|
||||
/**
|
||||
* Converts words into potential phonetic representations.
|
||||
* <p>
|
||||
* This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes
|
||||
* into account the likely source language. Next, this phonetic representation is converted into a
|
||||
* pan-European 'average' representation, allowing comparison between different versions of essentially
|
||||
* the same word from different languages.
|
||||
* <p>
|
||||
* This class is intentionally immutable and thread-safe.
|
||||
* If you wish to alter the settings for a PhoneticEngine, you
|
||||
* must make a new one with the updated settings.
|
||||
* <p>
|
||||
* Ported from phoneticengine.php
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PhoneticEngine {
|
||||
|
||||
/**
|
||||
* Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside
|
||||
* this package, and probably not outside the {@link PhoneticEngine} class.
|
||||
*
|
||||
* @since 1.6
|
||||
*/
|
||||
static final class PhonemeBuilder {
|
||||
|
||||
/**
|
||||
* An empty builder where all phonemes must come from some set of languages. This will contain a single
|
||||
* phoneme of zero characters. This can then be appended to. This should be the only way to create a new
|
||||
* phoneme from scratch.
|
||||
*
|
||||
* @param languages the set of languages
|
||||
* @return a new, empty phoneme builder
|
||||
*/
|
||||
public static PhonemeBuilder empty(final Languages.LanguageSet languages) {
|
||||
return new PhonemeBuilder(new Rule.Phoneme("", languages));
|
||||
}
|
||||
|
||||
private final Set<Rule.Phoneme> phonemes;
|
||||
|
||||
private PhonemeBuilder(final Rule.Phoneme phoneme) {
|
||||
this.phonemes = new LinkedHashSet<Rule.Phoneme>();
|
||||
this.phonemes.add(phoneme);
|
||||
}
|
||||
|
||||
private PhonemeBuilder(final Set<Rule.Phoneme> phonemes) {
|
||||
this.phonemes = phonemes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new phoneme builder containing all phonemes in this one extended by <code>str</code>.
|
||||
*
|
||||
* @param str the characters to append to the phonemes
|
||||
*/
|
||||
public void append(final CharSequence str) {
|
||||
for (final Rule.Phoneme ph : this.phonemes) {
|
||||
ph.append(str);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the given phoneme expression to all phonemes in this phoneme builder.
|
||||
* <p>
|
||||
* This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
|
||||
* incompatible.
|
||||
*
|
||||
* @param phonemeExpr the expression to apply
|
||||
* @param maxPhonemes the maximum number of phonemes to build up
|
||||
*/
|
||||
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
|
||||
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<Rule.Phoneme>(maxPhonemes);
|
||||
|
||||
EXPR: for (final Rule.Phoneme left : this.phonemes) {
|
||||
for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
|
||||
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
|
||||
if (!languages.isEmpty()) {
|
||||
final Rule.Phoneme join = new Phoneme(left, right, languages);
|
||||
if (newPhonemes.size() < maxPhonemes) {
|
||||
newPhonemes.add(join);
|
||||
if (newPhonemes.size() >= maxPhonemes) {
|
||||
break EXPR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.phonemes.clear();
|
||||
this.phonemes.addAll(newPhonemes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets underlying phoneme set. Please don't mutate.
|
||||
*
|
||||
* @return the phoneme set
|
||||
*/
|
||||
public Set<Rule.Phoneme> getPhonemes() {
|
||||
return this.phonemes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stringifies the phoneme set. This produces a single string of the strings of each phoneme,
|
||||
* joined with a pipe. This is explicitly provided in place of toString as it is a potentially
|
||||
* expensive operation, which should be avoided when debugging.
|
||||
*
|
||||
* @return the stringified phoneme set
|
||||
*/
|
||||
public String makeString() {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (final Rule.Phoneme ph : this.phonemes) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append("|");
|
||||
}
|
||||
sb.append(ph.getPhonemeText());
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A function closure capturing the application of a list of rules to an input sequence at a particular offset.
|
||||
* After invocation, the values <code>i</code> and <code>found</code> are updated. <code>i</code> points to the
|
||||
* index of the next char in <code>input</code> that must be processed next (the input up to that index having been
|
||||
* processed already), and <code>found</code> indicates if a matching rule was found or not. In the case where a
|
||||
* matching rule was found, <code>phonemeBuilder</code> is replaced with a new builder containing the phonemes
|
||||
* updated by the matching rule.
|
||||
*
|
||||
* Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads
|
||||
* as it is constructed as needed by the calling methods.
|
||||
* @since 1.6
|
||||
*/
|
||||
private static final class RulesApplication {
|
||||
private final Map<String, List<Rule>> finalRules;
|
||||
private final CharSequence input;
|
||||
|
||||
private PhonemeBuilder phonemeBuilder;
|
||||
private int i;
|
||||
private final int maxPhonemes;
|
||||
private boolean found;
|
||||
|
||||
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
|
||||
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
|
||||
if (finalRules == null) {
|
||||
throw new NullPointerException("The finalRules argument must not be null");
|
||||
}
|
||||
this.finalRules = finalRules;
|
||||
this.phonemeBuilder = phonemeBuilder;
|
||||
this.input = input;
|
||||
this.i = i;
|
||||
this.maxPhonemes = maxPhonemes;
|
||||
}
|
||||
|
||||
public int getI() {
|
||||
return this.i;
|
||||
}
|
||||
|
||||
public PhonemeBuilder getPhonemeBuilder() {
|
||||
return this.phonemeBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
|
||||
* and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
|
||||
* match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling.
|
||||
*
|
||||
* @return <code>this</code>
|
||||
*/
|
||||
public RulesApplication invoke() {
|
||||
this.found = false;
|
||||
int patternLength = 1;
|
||||
final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
|
||||
if (rules != null) {
|
||||
for (final Rule rule : rules) {
|
||||
final String pattern = rule.getPattern();
|
||||
patternLength = pattern.length();
|
||||
if (rule.patternAndContextMatches(this.input, this.i)) {
|
||||
this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
|
||||
this.found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.found) {
|
||||
patternLength = 1;
|
||||
}
|
||||
|
||||
this.i += patternLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isFound() {
|
||||
return this.found;
|
||||
}
|
||||
}
|
||||
|
||||
private static final Map<NameType, Set<String>> NAME_PREFIXES = new EnumMap<NameType, Set<String>>(NameType.class);
|
||||
|
||||
static {
|
||||
NAME_PREFIXES.put(NameType.ASHKENAZI,
|
||||
Collections.unmodifiableSet(
|
||||
new HashSet<String>(Arrays.asList("bar", "ben", "da", "de", "van", "von"))));
|
||||
NAME_PREFIXES.put(NameType.SEPHARDIC,
|
||||
Collections.unmodifiableSet(
|
||||
new HashSet<String>(Arrays.asList("al", "el", "da", "dal", "de", "del", "dela", "de la",
|
||||
"della", "des", "di", "do", "dos", "du", "van", "von"))));
|
||||
NAME_PREFIXES.put(NameType.GENERIC,
|
||||
Collections.unmodifiableSet(
|
||||
new HashSet<String>(Arrays.asList("da", "dal", "de", "del", "dela", "de la", "della",
|
||||
"des", "di", "do", "dos", "du", "van", "von"))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Joins some strings with an internal separator.
|
||||
* @param strings Strings to join
|
||||
* @param sep String to separate them with
|
||||
* @return a single String consisting of each element of <code>strings</code> interleaved by <code>sep</code>
|
||||
*/
|
||||
private static String join(final Iterable<String> strings, final String sep) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final Iterator<String> si = strings.iterator();
|
||||
if (si.hasNext()) {
|
||||
sb.append(si.next());
|
||||
}
|
||||
while (si.hasNext()) {
|
||||
sb.append(sep).append(si.next());
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static final int DEFAULT_MAX_PHONEMES = 20;
|
||||
|
||||
private final Lang lang;
|
||||
|
||||
private final NameType nameType;
|
||||
|
||||
private final RuleType ruleType;
|
||||
|
||||
private final boolean concat;
|
||||
|
||||
private final int maxPhonemes;
|
||||
|
||||
/**
|
||||
* Generates a new, fully-configured phonetic engine.
|
||||
*
|
||||
* @param nameType
|
||||
* the type of names it will use
|
||||
* @param ruleType
|
||||
* the type of rules it will apply
|
||||
* @param concat
|
||||
* if it will concatenate multiple encodings
|
||||
*/
|
||||
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) {
|
||||
this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a new, fully-configured phonetic engine.
|
||||
*
|
||||
* @param nameType
|
||||
* the type of names it will use
|
||||
* @param ruleType
|
||||
* the type of rules it will apply
|
||||
* @param concat
|
||||
* if it will concatenate multiple encodings
|
||||
* @param maxPhonemes
|
||||
* the maximum number of phonemes that will be handled
|
||||
* @since 1.7
|
||||
*/
|
||||
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat,
|
||||
final int maxPhonemes) {
|
||||
if (ruleType == RuleType.RULES) {
|
||||
throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES);
|
||||
}
|
||||
this.nameType = nameType;
|
||||
this.ruleType = ruleType;
|
||||
this.concat = concat;
|
||||
this.lang = Lang.instance(nameType);
|
||||
this.maxPhonemes = maxPhonemes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the final rules to convert from a language-specific phonetic representation to a
|
||||
* language-independent representation.
|
||||
*
|
||||
* @param phonemeBuilder the current phonemes
|
||||
* @param finalRules the final rules to apply
|
||||
* @return the resulting phonemes
|
||||
*/
|
||||
private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder,
|
||||
final Map<String, List<Rule>> finalRules) {
|
||||
if (finalRules == null) {
|
||||
throw new NullPointerException("finalRules can not be null");
|
||||
}
|
||||
if (finalRules.isEmpty()) {
|
||||
return phonemeBuilder;
|
||||
}
|
||||
|
||||
final Map<Rule.Phoneme, Rule.Phoneme> phonemes =
|
||||
new TreeMap<Rule.Phoneme, Rule.Phoneme>(Rule.Phoneme.COMPARATOR);
|
||||
|
||||
for (final Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) {
|
||||
PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages());
|
||||
final String phonemeText = phoneme.getPhonemeText().toString();
|
||||
|
||||
for (int i = 0; i < phonemeText.length();) {
|
||||
final RulesApplication rulesApplication =
|
||||
new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).invoke();
|
||||
final boolean found = rulesApplication.isFound();
|
||||
subBuilder = rulesApplication.getPhonemeBuilder();
|
||||
|
||||
if (!found) {
|
||||
// not found, appending as-is
|
||||
subBuilder.append(phonemeText.subSequence(i, i + 1));
|
||||
}
|
||||
|
||||
i = rulesApplication.getI();
|
||||
}
|
||||
|
||||
// the phonemes map orders the phonemes only based on their text, but ignores the language set
|
||||
// when adding new phonemes, check for equal phonemes and merge their language set, otherwise
|
||||
// phonemes with the same text but different language set get lost
|
||||
for (final Rule.Phoneme newPhoneme : subBuilder.getPhonemes()) {
|
||||
if (phonemes.containsKey(newPhoneme)) {
|
||||
final Rule.Phoneme oldPhoneme = phonemes.remove(newPhoneme);
|
||||
final Rule.Phoneme mergedPhoneme = oldPhoneme.mergeWithLanguage(newPhoneme.getLanguages());
|
||||
phonemes.put(mergedPhoneme, mergedPhoneme);
|
||||
} else {
|
||||
phonemes.put(newPhoneme, newPhoneme);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new PhonemeBuilder(phonemes.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string to its phonetic representation.
|
||||
*
|
||||
* @param input
|
||||
* the String to encode
|
||||
* @return the encoding of the input
|
||||
*/
|
||||
public String encode(final String input) {
|
||||
final Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
|
||||
return encode(input, languageSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an input string into an output phonetic representation, given a set of possible origin languages.
|
||||
*
|
||||
* @param input
|
||||
* String to phoneticise; a String with dashes or spaces separating each word
|
||||
* @param languageSet
|
||||
* set of possible origin languages
|
||||
* @return a phonetic representation of the input; a String containing '-'-separated phonetic representations of the
|
||||
* input
|
||||
*/
|
||||
public String encode(String input, final Languages.LanguageSet languageSet) {
|
||||
final Map<String, List<Rule>> rules = Rule.getInstanceMap(this.nameType, RuleType.RULES, languageSet);
|
||||
// rules common across many (all) languages
|
||||
final Map<String, List<Rule>> finalRules1 = Rule.getInstanceMap(this.nameType, this.ruleType, "common");
|
||||
// rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
|
||||
final Map<String, List<Rule>> finalRules2 = Rule.getInstanceMap(this.nameType, this.ruleType, languageSet);
|
||||
|
||||
// tidy the input
|
||||
// lower case is a locale-dependent operation
|
||||
input = input.toLowerCase(Locale.ENGLISH).replace('-', ' ').trim();
|
||||
|
||||
if (this.nameType == NameType.GENERIC) {
|
||||
if (input.length() >= 2 && input.substring(0, 2).equals("d'")) { // check for d'
|
||||
final String remainder = input.substring(2);
|
||||
final String combined = "d" + remainder;
|
||||
return "(" + encode(remainder) + ")-(" + encode(combined) + ")";
|
||||
}
|
||||
for (final String l : NAME_PREFIXES.get(this.nameType)) {
|
||||
// handle generic prefixes
|
||||
if (input.startsWith(l + " ")) {
|
||||
// check for any prefix in the words list
|
||||
final String remainder = input.substring(l.length() + 1); // input without the prefix
|
||||
final String combined = l + remainder; // input with prefix without space
|
||||
return "(" + encode(remainder) + ")-(" + encode(combined) + ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final List<String> words = Arrays.asList(input.split("\\s+"));
|
||||
final List<String> words2 = new ArrayList<String>();
|
||||
|
||||
// special-case handling of word prefixes based upon the name type
|
||||
switch (this.nameType) {
|
||||
case SEPHARDIC:
|
||||
for (final String aWord : words) {
|
||||
final String[] parts = aWord.split("'");
|
||||
final String lastPart = parts[parts.length - 1];
|
||||
words2.add(lastPart);
|
||||
}
|
||||
words2.removeAll(NAME_PREFIXES.get(this.nameType));
|
||||
break;
|
||||
case ASHKENAZI:
|
||||
words2.addAll(words);
|
||||
words2.removeAll(NAME_PREFIXES.get(this.nameType));
|
||||
break;
|
||||
case GENERIC:
|
||||
words2.addAll(words);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unreachable case: " + this.nameType);
|
||||
}
|
||||
|
||||
if (this.concat) {
|
||||
// concat mode enabled
|
||||
input = join(words2, " ");
|
||||
} else if (words2.size() == 1) {
|
||||
// not a multi-word name
|
||||
input = words.iterator().next();
|
||||
} else {
|
||||
// encode each word in a multi-word name separately (normally used for approx matches)
|
||||
final StringBuilder result = new StringBuilder();
|
||||
for (final String word : words2) {
|
||||
result.append("-").append(encode(word));
|
||||
}
|
||||
// return the result without the leading "-"
|
||||
return result.substring(1);
|
||||
}
|
||||
|
||||
PhonemeBuilder phonemeBuilder = PhonemeBuilder.empty(languageSet);
|
||||
|
||||
// loop over each char in the input - we will handle the increment manually
|
||||
for (int i = 0; i < input.length();) {
|
||||
final RulesApplication rulesApplication =
|
||||
new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).invoke();
|
||||
i = rulesApplication.getI();
|
||||
phonemeBuilder = rulesApplication.getPhonemeBuilder();
|
||||
}
|
||||
|
||||
// Apply the general rules
|
||||
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules1);
|
||||
// Apply the language-specific rules
|
||||
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules2);
|
||||
|
||||
return phonemeBuilder.makeString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Lang language guessing rules being used.
|
||||
*
|
||||
* @return the Lang in use
|
||||
*/
|
||||
public Lang getLang() {
|
||||
return this.lang;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the NameType being used.
|
||||
*
|
||||
* @return the NameType in use
|
||||
*/
|
||||
public NameType getNameType() {
|
||||
return this.nameType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the RuleType being used.
|
||||
*
|
||||
* @return the RuleType in use
|
||||
*/
|
||||
public RuleType getRuleType() {
|
||||
return this.ruleType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets if multiple phonetic encodings are concatenated or if just the first one is kept.
|
||||
*
|
||||
* @return true if multiple phonetic encodings are returned, false if just the first is
|
||||
*/
|
||||
public boolean isConcat() {
|
||||
return this.concat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the maximum number of phonemes the engine will calculate for a given input.
|
||||
*
|
||||
* @return the maximum number of phonemes
|
||||
* @since 1.7
|
||||
*/
|
||||
public int getMaxPhonemes() {
|
||||
return this.maxPhonemes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import org.apache.commons.codec.CharEncoding;
|
||||
|
||||
/**
|
||||
* Constants used to process resource files.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
class ResourceConstants {
|
||||
|
||||
static final String CMT = "//";
|
||||
static final String ENCODING = CharEncoding.UTF_8;
|
||||
static final String EXT_CMT_END = "*/";
|
||||
static final String EXT_CMT_START = "/*";
|
||||
|
||||
}
|
|
@ -0,0 +1,720 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
|
||||
|
||||
/**
|
||||
* A phoneme rule.
|
||||
* <p>
|
||||
* Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
|
||||
* and a logical flag indicating if all languages must be in play. A rule matches if:
|
||||
* <ul>
|
||||
* <li>the pattern matches at the current position</li>
|
||||
* <li>the string up until the beginning of the pattern matches the left context</li>
|
||||
* <li>the string from the end of the pattern matches the right context</li>
|
||||
* <li>logical is ALL and all languages are in scope; or</li>
|
||||
* <li>logical is any other value and at least one language is in scope</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
|
||||
* to explicitly construct their own.
|
||||
* <p>
|
||||
* Rules are immutable and thread-safe.
|
||||
* <p>
|
||||
* <b>Rules resources</b>
|
||||
* <p>
|
||||
* Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
|
||||
* named following the pattern:
|
||||
* <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote>
|
||||
* <p>
|
||||
* The format of these resources is the following:
|
||||
* <ul>
|
||||
* <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
|
||||
* will be interpreted as:
|
||||
* <ol>
|
||||
* <li>pattern</li>
|
||||
* <li>left context</li>
|
||||
* <li>right context</li>
|
||||
* <li>phoneme</li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded
|
||||
* as a comment.</li>
|
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip
|
||||
* all content until a line ending in '*' and '/' is found.</li>
|
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Rule {
|
||||
|
||||
public static final class Phoneme implements PhonemeExpr {
|
||||
public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() {
|
||||
@Override
|
||||
public int compare(final Phoneme o1, final Phoneme o2) {
|
||||
for (int i = 0; i < o1.phonemeText.length(); i++) {
|
||||
if (i >= o2.phonemeText.length()) {
|
||||
return +1;
|
||||
}
|
||||
final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i);
|
||||
if (c != 0) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
if (o1.phonemeText.length() < o2.phonemeText.length()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
private final StringBuilder phonemeText;
|
||||
private final Languages.LanguageSet languages;
|
||||
|
||||
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
|
||||
this.phonemeText = new StringBuilder(phonemeText);
|
||||
this.languages = languages;
|
||||
}
|
||||
|
||||
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) {
|
||||
this(phonemeLeft.phonemeText, phonemeLeft.languages);
|
||||
this.phonemeText.append(phonemeRight.phonemeText);
|
||||
}
|
||||
|
||||
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
|
||||
this(phonemeLeft.phonemeText, languages);
|
||||
this.phonemeText.append(phonemeRight.phonemeText);
|
||||
}
|
||||
|
||||
public Phoneme append(final CharSequence str) {
|
||||
this.phonemeText.append(str);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Languages.LanguageSet getLanguages() {
|
||||
return this.languages;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<Phoneme> getPhonemes() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
public CharSequence getPhonemeText() {
|
||||
return this.phonemeText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deprecated since 1.9.
|
||||
*
|
||||
* @param right the Phoneme to join
|
||||
* @return a new Phoneme
|
||||
* @deprecated since 1.9
|
||||
*/
|
||||
@Deprecated
|
||||
public Phoneme join(final Phoneme right) {
|
||||
return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
|
||||
this.languages.restrictTo(right.languages));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new Phoneme with the same text but a union of its
|
||||
* current language set and the given one.
|
||||
*
|
||||
* @param lang the language set to merge
|
||||
* @return a new Phoneme
|
||||
*/
|
||||
public Phoneme mergeWithLanguage(final LanguageSet lang) {
|
||||
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return phonemeText.toString() + "[" + languages + "]";
|
||||
}
|
||||
}
|
||||
|
||||
public interface PhonemeExpr {
|
||||
Iterable<Phoneme> getPhonemes();
|
||||
}
|
||||
|
||||
public static final class PhonemeList implements PhonemeExpr {
|
||||
private final List<Phoneme> phonemes;
|
||||
|
||||
public PhonemeList(final List<Phoneme> phonemes) {
|
||||
this.phonemes = phonemes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Phoneme> getPhonemes() {
|
||||
return this.phonemes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
|
||||
*/
|
||||
public interface RPattern {
|
||||
boolean isMatch(CharSequence input);
|
||||
}
|
||||
|
||||
public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
public static final String ALL = "ALL";
|
||||
|
||||
private static final String DOUBLE_QUOTE = "\"";
|
||||
|
||||
private static final String HASH_INCLUDE = "#include";
|
||||
|
||||
private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
|
||||
new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class);
|
||||
|
||||
static {
|
||||
for (final NameType s : NameType.values()) {
|
||||
final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
|
||||
new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class);
|
||||
|
||||
for (final RuleType rt : RuleType.values()) {
|
||||
final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>();
|
||||
|
||||
final Languages ls = Languages.getInstance(s);
|
||||
for (final String l : ls.getLanguages()) {
|
||||
try {
|
||||
rs.put(l, parseRules(createScanner(s, rt, l), createResourceName(s, rt, l)));
|
||||
} catch (final IllegalStateException e) {
|
||||
throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e);
|
||||
}
|
||||
}
|
||||
if (!rt.equals(RuleType.RULES)) {
|
||||
rs.put("common", parseRules(createScanner(s, rt, "common"), createResourceName(s, rt, "common")));
|
||||
}
|
||||
|
||||
rts.put(rt, Collections.unmodifiableMap(rs));
|
||||
}
|
||||
|
||||
RULES.put(s, Collections.unmodifiableMap(rts));
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean contains(final CharSequence chars, final char input) {
|
||||
for (int i = 0; i < chars.length(); i++) {
|
||||
if (chars.charAt(i) == input) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) {
|
||||
return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt",
|
||||
nameType.getName(), rt.getName(), lang);
|
||||
}
|
||||
|
||||
private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
|
||||
final String resName = createResourceName(nameType, rt, lang);
|
||||
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
|
||||
|
||||
if (rulesIS == null) {
|
||||
throw new IllegalArgumentException("Unable to load resource: " + resName);
|
||||
}
|
||||
|
||||
return new Scanner(rulesIS, ResourceConstants.ENCODING);
|
||||
}
|
||||
|
||||
private static Scanner createScanner(final String lang) {
|
||||
final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
|
||||
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
|
||||
|
||||
if (rulesIS == null) {
|
||||
throw new IllegalArgumentException("Unable to load resource: " + resName);
|
||||
}
|
||||
|
||||
return new Scanner(rulesIS, ResourceConstants.ENCODING);
|
||||
}
|
||||
|
||||
private static boolean endsWith(final CharSequence input, final CharSequence suffix) {
|
||||
if (suffix.length() > input.length()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) {
|
||||
if (input.charAt(i) != suffix.charAt(j)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets rules for a combination of name type, rule type and languages.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType to consider
|
||||
* @param rt
|
||||
* the RuleType to consider
|
||||
* @param langs
|
||||
* the set of languages to consider
|
||||
* @return a list of Rules that apply
|
||||
*/
|
||||
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
|
||||
final Languages.LanguageSet langs) {
|
||||
final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
|
||||
final List<Rule> allRules = new ArrayList<Rule>();
|
||||
for (final List<Rule> rules : ruleMap.values()) {
|
||||
allRules.addAll(rules);
|
||||
}
|
||||
return allRules;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets rules for a combination of name type, rule type and a single language.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType to consider
|
||||
* @param rt
|
||||
* the RuleType to consider
|
||||
* @param lang
|
||||
* the language to consider
|
||||
* @return a list of Rules that apply
|
||||
*/
|
||||
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
|
||||
return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets rules for a combination of name type, rule type and languages.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType to consider
|
||||
* @param rt
|
||||
* the RuleType to consider
|
||||
* @param langs
|
||||
* the set of languages to consider
|
||||
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
|
||||
* @since 1.9
|
||||
*/
|
||||
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
|
||||
final Languages.LanguageSet langs) {
|
||||
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
|
||||
getInstanceMap(nameType, rt, Languages.ANY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets rules for a combination of name type, rule type and a single language.
|
||||
*
|
||||
* @param nameType
|
||||
* the NameType to consider
|
||||
* @param rt
|
||||
* the RuleType to consider
|
||||
* @param lang
|
||||
* the language to consider
|
||||
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
|
||||
* @since 1.9
|
||||
*/
|
||||
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
|
||||
final String lang) {
|
||||
final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);
|
||||
|
||||
if (rules == null) {
|
||||
throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
|
||||
nameType.getName(), rt.getName(), lang));
|
||||
}
|
||||
|
||||
return rules;
|
||||
}
|
||||
|
||||
private static Phoneme parsePhoneme(final String ph) {
|
||||
final int open = ph.indexOf("[");
|
||||
if (open >= 0) {
|
||||
if (!ph.endsWith("]")) {
|
||||
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
|
||||
}
|
||||
final String before = ph.substring(0, open);
|
||||
final String in = ph.substring(open + 1, ph.length() - 1);
|
||||
final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));
|
||||
|
||||
return new Phoneme(before, Languages.LanguageSet.from(langs));
|
||||
} else {
|
||||
return new Phoneme(ph, Languages.ANY_LANGUAGE);
|
||||
}
|
||||
}
|
||||
|
||||
private static PhonemeExpr parsePhonemeExpr(final String ph) {
|
||||
if (ph.startsWith("(")) { // we have a bracketed list of options
|
||||
if (!ph.endsWith(")")) {
|
||||
throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
|
||||
}
|
||||
|
||||
final List<Phoneme> phs = new ArrayList<Phoneme>();
|
||||
final String body = ph.substring(1, ph.length() - 1);
|
||||
for (final String part : body.split("[|]")) {
|
||||
phs.add(parsePhoneme(part));
|
||||
}
|
||||
if (body.startsWith("|") || body.endsWith("|")) {
|
||||
phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
|
||||
}
|
||||
|
||||
return new PhonemeList(phs);
|
||||
} else {
|
||||
return parsePhoneme(ph);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
|
||||
final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>();
|
||||
int currentLine = 0;
|
||||
|
||||
boolean inMultilineComment = false;
|
||||
while (scanner.hasNextLine()) {
|
||||
currentLine++;
|
||||
final String rawLine = scanner.nextLine();
|
||||
String line = rawLine;
|
||||
|
||||
if (inMultilineComment) {
|
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
|
||||
inMultilineComment = false;
|
||||
}
|
||||
} else {
|
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
|
||||
inMultilineComment = true;
|
||||
} else {
|
||||
// discard comments
|
||||
final int cmtI = line.indexOf(ResourceConstants.CMT);
|
||||
if (cmtI >= 0) {
|
||||
line = line.substring(0, cmtI);
|
||||
}
|
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim();
|
||||
|
||||
if (line.length() == 0) {
|
||||
continue; // empty lines can be safely skipped
|
||||
}
|
||||
|
||||
if (line.startsWith(HASH_INCLUDE)) {
|
||||
// include statement
|
||||
final String incl = line.substring(HASH_INCLUDE.length()).trim();
|
||||
if (incl.contains(" ")) {
|
||||
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
|
||||
location);
|
||||
} else {
|
||||
lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
|
||||
}
|
||||
} else {
|
||||
// rule
|
||||
final String[] parts = line.split("\\s+");
|
||||
if (parts.length != 4) {
|
||||
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
|
||||
" parts: " + rawLine + " in " + location);
|
||||
} else {
|
||||
try {
|
||||
final String pat = stripQuotes(parts[0]);
|
||||
final String lCon = stripQuotes(parts[1]);
|
||||
final String rCon = stripQuotes(parts[2]);
|
||||
final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
|
||||
final int cLine = currentLine;
|
||||
final Rule r = new Rule(pat, lCon, rCon, ph) {
|
||||
private final int myLine = cLine;
|
||||
private final String loc = location;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append("Rule");
|
||||
sb.append("{line=").append(myLine);
|
||||
sb.append(", loc='").append(loc).append('\'');
|
||||
sb.append(", pat='").append(pat).append('\'');
|
||||
sb.append(", lcon='").append(lCon).append('\'');
|
||||
sb.append(", rcon='").append(rCon).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
};
|
||||
final String patternKey = r.pattern.substring(0,1);
|
||||
List<Rule> rules = lines.get(patternKey);
|
||||
if (rules == null) {
|
||||
rules = new ArrayList<Rule>();
|
||||
lines.put(patternKey, rules);
|
||||
}
|
||||
rules.add(r);
|
||||
} catch (final IllegalArgumentException e) {
|
||||
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
|
||||
location, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case.
|
||||
*
|
||||
* @param regex
|
||||
* the regular expression to compile
|
||||
* @return an RPattern that will match this regex
|
||||
*/
|
||||
private static RPattern pattern(final String regex) {
|
||||
final boolean startsWith = regex.startsWith("^");
|
||||
final boolean endsWith = regex.endsWith("$");
|
||||
final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length());
|
||||
final boolean boxes = content.contains("[");
|
||||
|
||||
if (!boxes) {
|
||||
if (startsWith && endsWith) {
|
||||
// exact match
|
||||
if (content.length() == 0) {
|
||||
// empty
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return input.length() == 0;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return input.equals(content);
|
||||
}
|
||||
};
|
||||
}
|
||||
} else if ((startsWith || endsWith) && content.length() == 0) {
|
||||
// matches every string
|
||||
return ALL_STRINGS_RMATCHER;
|
||||
} else if (startsWith) {
|
||||
// matches from start
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return startsWith(input, content);
|
||||
}
|
||||
};
|
||||
} else if (endsWith) {
|
||||
// matches from start
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return endsWith(input, content);
|
||||
}
|
||||
};
|
||||
}
|
||||
} else {
|
||||
final boolean startsWithBox = content.startsWith("[");
|
||||
final boolean endsWithBox = content.endsWith("]");
|
||||
|
||||
if (startsWithBox && endsWithBox) {
|
||||
String boxContent = content.substring(1, content.length() - 1);
|
||||
if (!boxContent.contains("[")) {
|
||||
// box containing alternatives
|
||||
final boolean negate = boxContent.startsWith("^");
|
||||
if (negate) {
|
||||
boxContent = boxContent.substring(1);
|
||||
}
|
||||
final String bContent = boxContent;
|
||||
final boolean shouldMatch = !negate;
|
||||
|
||||
if (startsWith && endsWith) {
|
||||
// exact match
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch;
|
||||
}
|
||||
};
|
||||
} else if (startsWith) {
|
||||
// first char
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch;
|
||||
}
|
||||
};
|
||||
} else if (endsWith) {
|
||||
// last char
|
||||
return new RPattern() {
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
return input.length() > 0 &&
|
||||
contains(bContent, input.charAt(input.length() - 1)) == shouldMatch;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new RPattern() {
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
|
||||
@Override
|
||||
public boolean isMatch(final CharSequence input) {
|
||||
final Matcher matcher = pattern.matcher(input);
|
||||
return matcher.find();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static boolean startsWith(final CharSequence input, final CharSequence prefix) {
|
||||
if (prefix.length() > input.length()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < prefix.length(); i++) {
|
||||
if (input.charAt(i) != prefix.charAt(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static String stripQuotes(String str) {
|
||||
if (str.startsWith(DOUBLE_QUOTE)) {
|
||||
str = str.substring(1);
|
||||
}
|
||||
|
||||
if (str.endsWith(DOUBLE_QUOTE)) {
|
||||
str = str.substring(0, str.length() - 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
private final RPattern lContext;
|
||||
|
||||
private final String pattern;
|
||||
|
||||
private final PhonemeExpr phoneme;
|
||||
|
||||
private final RPattern rContext;
|
||||
|
||||
/**
|
||||
* Creates a new rule.
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param lContext
|
||||
* the left context
|
||||
* @param rContext
|
||||
* the right context
|
||||
* @param phoneme
|
||||
* the resulting phoneme
|
||||
*/
|
||||
public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) {
|
||||
this.pattern = pattern;
|
||||
this.lContext = pattern(lContext + "$");
|
||||
this.rContext = pattern("^" + rContext);
|
||||
this.phoneme = phoneme;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the left context. This is a regular expression that must match to the left of the pattern.
|
||||
*
|
||||
* @return the left context Pattern
|
||||
*/
|
||||
public RPattern getLContext() {
|
||||
return this.lContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pattern. This is a string-literal that must exactly match.
|
||||
*
|
||||
* @return the pattern
|
||||
*/
|
||||
public String getPattern() {
|
||||
return this.pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
|
||||
*
|
||||
* @return the phoneme
|
||||
*/
|
||||
public PhonemeExpr getPhoneme() {
|
||||
return this.phoneme;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the right context. This is a regular expression that must match to the right of the pattern.
|
||||
*
|
||||
* @return the right context Pattern
|
||||
*/
|
||||
public RPattern getRContext() {
|
||||
return this.rContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decides if the pattern and context match the input starting at a position. It is a match if the
|
||||
* <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and
|
||||
* <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>.
|
||||
*
|
||||
* @param input
|
||||
* the input String
|
||||
* @param i
|
||||
* the int position within the input
|
||||
* @return true if the pattern and left/right context match, false otherwise
|
||||
*/
|
||||
public boolean patternAndContextMatches(final CharSequence input, final int i) {
|
||||
if (i < 0) {
|
||||
throw new IndexOutOfBoundsException("Can not match pattern at negative indexes");
|
||||
}
|
||||
|
||||
final int patternLength = this.pattern.length();
|
||||
final int ipl = i + patternLength;
|
||||
|
||||
if (ipl > input.length()) {
|
||||
// not enough room for the pattern to match
|
||||
return false;
|
||||
}
|
||||
|
||||
// evaluate the pattern, left context and right context
|
||||
// fail early if any of the evaluations is not successful
|
||||
if (!input.subSequence(i, ipl).equals(this.pattern)) {
|
||||
return false;
|
||||
} else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) {
|
||||
return false;
|
||||
}
|
||||
return this.lContext.isMatch(input.subSequence(0, i));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.language.bm;
|
||||
|
||||
/**
|
||||
* Types of rule.
|
||||
*
|
||||
* @since 1.6
|
||||
* @version $Id$
|
||||
*/
|
||||
public enum RuleType {
|
||||
|
||||
/** Approximate rules, which will lead to the largest number of phonetic interpretations. */
|
||||
APPROX("approx"),
|
||||
/** Exact rules, which will lead to a minimum number of phonetic interpretations. */
|
||||
EXACT("exact"),
|
||||
/** For internal use only. Please use {@link #APPROX} or {@link #EXACT}. */
|
||||
RULES("rules");
|
||||
|
||||
private final String name;
|
||||
|
||||
RuleType(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the rule name.
|
||||
*
|
||||
* @return the rule name.
|
||||
*/
|
||||
public String getName() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Implementation details of the Beider-Morse codec.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,21 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
Language and phonetic encoders.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,251 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringDecoder;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
|
||||
/**
|
||||
* Identical to the Base64 encoding defined by <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>
|
||||
* and allows a character set to be specified.
|
||||
* <p>
|
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
|
||||
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
|
||||
* handling software.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
|
||||
* Header Extensions for Non-ASCII Text</a>
|
||||
*
|
||||
* @since 1.3
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
|
||||
/**
|
||||
* The default charset used for string decoding and encoding.
|
||||
*/
|
||||
private final Charset charset;
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
public BCodec() {
|
||||
this(Charsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset
|
||||
*
|
||||
* @param charset
|
||||
* the default string charset to use.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @since 1.7
|
||||
*/
|
||||
public BCodec(final Charset charset) {
|
||||
this.charset = charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset
|
||||
*
|
||||
* @param charsetName
|
||||
* the default charset to use.
|
||||
* @throws java.nio.charset.UnsupportedCharsetException
|
||||
* If the named charset is unavailable
|
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public BCodec(final String charsetName) {
|
||||
this(Charset.forName(charsetName));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getEncoding() {
|
||||
return "B";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] doEncoding(final byte[] bytes) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
return Base64.encodeBase64(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] doDecoding(final byte[] bytes) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
return Base64.decodeBase64(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param value
|
||||
* string to convert to Base64 form
|
||||
* @param charset
|
||||
* the charset for <code>value</code>
|
||||
* @return Base64 string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
* @since 1.7
|
||||
*/
|
||||
public String encode(final String value, final Charset charset) throws EncoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return encodeText(value, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param value
|
||||
* string to convert to Base64 form
|
||||
* @param charset
|
||||
* the charset for <code>value</code>
|
||||
* @return Base64 string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
public String encode(final String value, final String charset) throws EncoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return this.encodeText(value, charset);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new EncoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its Base64 form using the default charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param value
|
||||
* string to convert to Base64 form
|
||||
* @return Base64 string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String value) throws EncoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return encode(value, this.getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a Base64 string into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param value
|
||||
* Base64 string to convert into its original form
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* A decoder exception is thrown if a failure condition is encountered during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public String decode(final String value) throws DecoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return this.decodeText(value);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new DecoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an object into its Base64 form using the default charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param value
|
||||
* object to convert to Base64 form
|
||||
* @return Base64 object
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object value) throws EncoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
} else if (value instanceof String) {
|
||||
return encode((String) value);
|
||||
} else {
|
||||
throw new EncoderException("Objects of type " +
|
||||
value.getClass().getName() +
|
||||
" cannot be encoded using BCodec");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a Base64 object into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param value
|
||||
* Base64 object to convert into its original form
|
||||
* @return original object
|
||||
* @throws DecoderException
|
||||
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
|
||||
* during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object value) throws DecoderException {
|
||||
if (value == null) {
|
||||
return null;
|
||||
} else if (value instanceof String) {
|
||||
return decode((String) value);
|
||||
} else {
|
||||
throw new DecoderException("Objects of type " +
|
||||
value.getClass().getName() +
|
||||
" cannot be decoded using BCodec");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
* @since 1.7
|
||||
*/
|
||||
public Charset getCharset() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
*/
|
||||
public String getDefaultCharset() {
|
||||
return this.charset.name();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,358 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringDecoder;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
|
||||
/**
|
||||
* Similar to the Quoted-Printable content-transfer-encoding defined in
|
||||
* <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
|
||||
* characters to be decipherable on an ASCII terminal without decoding.
|
||||
* <p>
|
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
|
||||
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
|
||||
* handling software.
|
||||
* <p>
|
||||
* This class is conditionally thread-safe.
|
||||
* The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
|
||||
* but is not volatile, and accesses are not synchronised.
|
||||
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
|
||||
* is used to ensure safe publication of the value between threads, and must not invoke
|
||||
* {@link #setEncodeBlanks(boolean)} after initial setup.
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
|
||||
* Header Extensions for Non-ASCII Text</a>
|
||||
*
|
||||
* @since 1.3
|
||||
* @version $Id$
|
||||
*/
|
||||
public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
|
||||
/**
|
||||
* The default charset used for string decoding and encoding.
|
||||
*/
|
||||
private final Charset charset;
|
||||
|
||||
/**
|
||||
* BitSet of printable characters as defined in RFC 1522.
|
||||
*/
|
||||
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
|
||||
// Static initializer for printable chars collection
|
||||
static {
|
||||
// alpha characters
|
||||
PRINTABLE_CHARS.set(' ');
|
||||
PRINTABLE_CHARS.set('!');
|
||||
PRINTABLE_CHARS.set('"');
|
||||
PRINTABLE_CHARS.set('#');
|
||||
PRINTABLE_CHARS.set('$');
|
||||
PRINTABLE_CHARS.set('%');
|
||||
PRINTABLE_CHARS.set('&');
|
||||
PRINTABLE_CHARS.set('\'');
|
||||
PRINTABLE_CHARS.set('(');
|
||||
PRINTABLE_CHARS.set(')');
|
||||
PRINTABLE_CHARS.set('*');
|
||||
PRINTABLE_CHARS.set('+');
|
||||
PRINTABLE_CHARS.set(',');
|
||||
PRINTABLE_CHARS.set('-');
|
||||
PRINTABLE_CHARS.set('.');
|
||||
PRINTABLE_CHARS.set('/');
|
||||
for (int i = '0'; i <= '9'; i++) {
|
||||
PRINTABLE_CHARS.set(i);
|
||||
}
|
||||
PRINTABLE_CHARS.set(':');
|
||||
PRINTABLE_CHARS.set(';');
|
||||
PRINTABLE_CHARS.set('<');
|
||||
PRINTABLE_CHARS.set('>');
|
||||
PRINTABLE_CHARS.set('@');
|
||||
for (int i = 'A'; i <= 'Z'; i++) {
|
||||
PRINTABLE_CHARS.set(i);
|
||||
}
|
||||
PRINTABLE_CHARS.set('[');
|
||||
PRINTABLE_CHARS.set('\\');
|
||||
PRINTABLE_CHARS.set(']');
|
||||
PRINTABLE_CHARS.set('^');
|
||||
PRINTABLE_CHARS.set('`');
|
||||
for (int i = 'a'; i <= 'z'; i++) {
|
||||
PRINTABLE_CHARS.set(i);
|
||||
}
|
||||
PRINTABLE_CHARS.set('{');
|
||||
PRINTABLE_CHARS.set('|');
|
||||
PRINTABLE_CHARS.set('}');
|
||||
PRINTABLE_CHARS.set('~');
|
||||
}
|
||||
|
||||
private static final byte BLANK = 32;
|
||||
|
||||
private static final byte UNDERSCORE = 95;
|
||||
|
||||
private boolean encodeBlanks = false;
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
public QCodec() {
|
||||
this(Charsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset.
|
||||
*
|
||||
* @param charset
|
||||
* the default string charset to use.
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
* @since 1.7
|
||||
*/
|
||||
public QCodec(final Charset charset) {
|
||||
super();
|
||||
this.charset = charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset.
|
||||
*
|
||||
* @param charsetName
|
||||
* the charset to use.
|
||||
* @throws java.nio.charset.UnsupportedCharsetException
|
||||
* If the named charset is unavailable
|
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
public QCodec(final String charsetName) {
|
||||
this(Charset.forName(charsetName));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getEncoding() {
|
||||
return "Q";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] doEncoding(final byte[] bytes) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
|
||||
if (this.encodeBlanks) {
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
if (data[i] == BLANK) {
|
||||
data[i] = UNDERSCORE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
boolean hasUnderscores = false;
|
||||
for (final byte b : bytes) {
|
||||
if (b == UNDERSCORE) {
|
||||
hasUnderscores = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hasUnderscores) {
|
||||
final byte[] tmp = new byte[bytes.length];
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
final byte b = bytes[i];
|
||||
if (b != UNDERSCORE) {
|
||||
tmp[i] = b;
|
||||
} else {
|
||||
tmp[i] = BLANK;
|
||||
}
|
||||
}
|
||||
return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
|
||||
}
|
||||
return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @param charset
|
||||
* the charset for str
|
||||
* @return quoted-printable string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
* @since 1.7
|
||||
*/
|
||||
public String encode(final String str, final Charset charset) throws EncoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return encodeText(str, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @param charset
|
||||
* the charset for str
|
||||
* @return quoted-printable string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
public String encode(final String str, final String charset) throws EncoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return encodeText(str, charset);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new EncoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @return quoted-printable string
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) throws EncoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return encode(str, getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param str
|
||||
* quoted-printable string to convert into its original form
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* A decoder exception is thrown if a failure condition is encountered during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public String decode(final String str) throws DecoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return decodeText(str);
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new DecoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param obj
|
||||
* object to convert to quoted-printable form
|
||||
* @return quoted-printable object
|
||||
* @throws EncoderException
|
||||
* thrown if a failure condition is encountered during the encoding process.
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof String) {
|
||||
return encode((String) obj);
|
||||
} else {
|
||||
throw new EncoderException("Objects of type " +
|
||||
obj.getClass().getName() +
|
||||
" cannot be encoded using Q codec");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param obj
|
||||
* quoted-printable object to convert into its original form
|
||||
* @return original object
|
||||
* @throws DecoderException
|
||||
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
|
||||
* during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object obj) throws DecoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof String) {
|
||||
return decode((String) obj);
|
||||
} else {
|
||||
throw new DecoderException("Objects of type " +
|
||||
obj.getClass().getName() +
|
||||
" cannot be decoded using Q codec");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
* @since 1.7
|
||||
*/
|
||||
public Charset getCharset() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
*/
|
||||
public String getDefaultCharset() {
|
||||
return this.charset.name();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if optional transformation of SPACE characters is to be used
|
||||
*
|
||||
* @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
|
||||
*/
|
||||
public boolean isEncodeBlanks() {
|
||||
return this.encodeBlanks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines whether optional transformation of SPACE characters is to be used
|
||||
*
|
||||
* @param b
|
||||
* <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
|
||||
*/
|
||||
public void setEncodeBlanks(final boolean b) {
|
||||
this.encodeBlanks = b;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,602 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.commons.codec.BinaryDecoder;
|
||||
import org.apache.commons.codec.BinaryEncoder;
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringDecoder;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
|
||||
/**
|
||||
* Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
|
||||
* <p>
|
||||
* The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
|
||||
* printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
|
||||
* unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
|
||||
* data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
|
||||
* to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
|
||||
* gateway.
|
||||
* <p>
|
||||
* Note:
|
||||
* <p>
|
||||
* Depending on the selected {@code strict} parameter, this class will implement a different set of rules of the
|
||||
* quoted-printable spec:
|
||||
* <ul>
|
||||
* <li>{@code strict=false}: only rules #1 and #2 are implemented
|
||||
* <li>{@code strict=true}: all rules #1 through #5 are implemented
|
||||
* </ul>
|
||||
* Originally, this class only supported the non-strict mode, but the codec in this partial form could already be used
|
||||
* for certain applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance
|
||||
* Q codec. The strict mode has been added in 1.10.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
|
||||
* Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
|
||||
*
|
||||
* @since 1.3
|
||||
* @version $Id$
|
||||
*/
|
||||
public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
|
||||
/**
|
||||
* The default charset used for string decoding and encoding.
|
||||
*/
|
||||
private final Charset charset;
|
||||
|
||||
/**
|
||||
* Indicates whether soft line breaks shall be used during encoding (rule #3-5).
|
||||
*/
|
||||
private final boolean strict;
|
||||
|
||||
/**
|
||||
* BitSet of printable characters as defined in RFC 1521.
|
||||
*/
|
||||
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
|
||||
|
||||
private static final byte ESCAPE_CHAR = '=';
|
||||
|
||||
private static final byte TAB = 9;
|
||||
|
||||
private static final byte SPACE = 32;
|
||||
|
||||
private static final byte CR = 13;
|
||||
|
||||
private static final byte LF = 10;
|
||||
|
||||
/**
|
||||
* Safe line length for quoted printable encoded text.
|
||||
*/
|
||||
private static final int SAFE_LENGTH = 73;
|
||||
|
||||
// Static initializer for printable chars collection
|
||||
static {
|
||||
// alpha characters
|
||||
for (int i = 33; i <= 60; i++) {
|
||||
PRINTABLE_CHARS.set(i);
|
||||
}
|
||||
for (int i = 62; i <= 126; i++) {
|
||||
PRINTABLE_CHARS.set(i);
|
||||
}
|
||||
PRINTABLE_CHARS.set(TAB);
|
||||
PRINTABLE_CHARS.set(SPACE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor, assumes default charset of {@link Charsets#UTF_8}
|
||||
*/
|
||||
public QuotedPrintableCodec() {
|
||||
this(Charsets.UTF_8, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of the strict mode.
|
||||
*
|
||||
* @param strict
|
||||
* if {@code true}, soft line breaks will be used
|
||||
* @since 1.10
|
||||
*/
|
||||
public QuotedPrintableCodec(final boolean strict) {
|
||||
this(Charsets.UTF_8, strict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset.
|
||||
*
|
||||
* @param charset
|
||||
* the default string charset to use.
|
||||
* @since 1.7
|
||||
*/
|
||||
public QuotedPrintableCodec(final Charset charset) {
|
||||
this(charset, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset and strict mode.
|
||||
*
|
||||
* @param charset
|
||||
* the default string charset to use.
|
||||
* @param strict
|
||||
* if {@code true}, soft line breaks will be used
|
||||
* @since 1.10
|
||||
*/
|
||||
public QuotedPrintableCodec(final Charset charset, final boolean strict) {
|
||||
this.charset = charset;
|
||||
this.strict = strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset.
|
||||
*
|
||||
* @param charsetName
|
||||
* the default string charset to use.
|
||||
* @throws UnsupportedCharsetException
|
||||
* If no support for the named charset is available
|
||||
* in this instance of the Java virtual machine
|
||||
* @throws IllegalArgumentException
|
||||
* If the given charsetName is null
|
||||
* @throws IllegalCharsetNameException
|
||||
* If the given charset name is illegal
|
||||
*
|
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
|
||||
*/
|
||||
public QuotedPrintableCodec(final String charsetName)
|
||||
throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
|
||||
this(Charset.forName(charsetName), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes byte into its quoted-printable representation.
|
||||
*
|
||||
* @param b
|
||||
* byte to encode
|
||||
* @param buffer
|
||||
* the buffer to write to
|
||||
* @return The number of bytes written to the <code>buffer</code>
|
||||
*/
|
||||
private static final int encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) {
|
||||
buffer.write(ESCAPE_CHAR);
|
||||
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
|
||||
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
|
||||
buffer.write(hex1);
|
||||
buffer.write(hex2);
|
||||
return 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the byte at position <code>index</code> of the byte array and
|
||||
* make sure it is unsigned.
|
||||
*
|
||||
* @param index
|
||||
* position in the array
|
||||
* @param bytes
|
||||
* the byte array
|
||||
* @return the unsigned octet at position <code>index</code> from the array
|
||||
*/
|
||||
private static int getUnsignedOctet(final int index, final byte[] bytes) {
|
||||
int b = bytes[index];
|
||||
if (b < 0) {
|
||||
b = 256 + b;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a byte to the buffer.
|
||||
*
|
||||
* @param b
|
||||
* byte to write
|
||||
* @param encode
|
||||
* indicates whether the octet shall be encoded
|
||||
* @param buffer
|
||||
* the buffer to write to
|
||||
* @return the number of bytes that have been written to the buffer
|
||||
*/
|
||||
private static int encodeByte(final int b, final boolean encode,
|
||||
final ByteArrayOutputStream buffer) {
|
||||
if (encode) {
|
||||
return encodeQuotedPrintable(b, buffer);
|
||||
} else {
|
||||
buffer.write(b);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the given byte is whitespace.
|
||||
*
|
||||
* @param b
|
||||
* byte to be checked
|
||||
* @return <code>true</code> if the byte is either a space or tab character
|
||||
*/
|
||||
private static boolean isWhitespace(final int b) {
|
||||
return b == SPACE || b == TAB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param printable
|
||||
* bitset of characters deemed quoted-printable
|
||||
* @param bytes
|
||||
* array of bytes to be encoded
|
||||
* @return array of bytes containing quoted-printable data
|
||||
*/
|
||||
public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) {
|
||||
return encodeQuotedPrintable(printable, bytes, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
|
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param printable
|
||||
* bitset of characters deemed quoted-printable
|
||||
* @param bytes
|
||||
* array of bytes to be encoded
|
||||
* @param strict
|
||||
* if {@code true} the full ruleset is used, otherwise only rule #1 and rule #2
|
||||
* @return array of bytes containing quoted-printable data
|
||||
* @since 1.10
|
||||
*/
|
||||
public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes, boolean strict) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
if (printable == null) {
|
||||
printable = PRINTABLE_CHARS;
|
||||
}
|
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
|
||||
if (strict) {
|
||||
int pos = 1;
|
||||
// encode up to buffer.length - 3, the last three octets will be treated
|
||||
// separately for simplification of note #3
|
||||
for (int i = 0; i < bytes.length - 3; i++) {
|
||||
int b = getUnsignedOctet(i, bytes);
|
||||
if (pos < SAFE_LENGTH) {
|
||||
// up to this length it is safe to add any byte, encoded or not
|
||||
pos += encodeByte(b, !printable.get(b), buffer);
|
||||
} else {
|
||||
// rule #3: whitespace at the end of a line *must* be encoded
|
||||
encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
|
||||
|
||||
// rule #5: soft line break
|
||||
buffer.write(ESCAPE_CHAR);
|
||||
buffer.write(CR);
|
||||
buffer.write(LF);
|
||||
pos = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// rule #3: whitespace at the end of a line *must* be encoded
|
||||
// if we would do a soft break line after this octet, encode whitespace
|
||||
int b = getUnsignedOctet(bytes.length - 3, bytes);
|
||||
boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5);
|
||||
pos += encodeByte(b, encode, buffer);
|
||||
|
||||
// note #3: '=' *must not* be the ultimate or penultimate character
|
||||
// simplification: if < 6 bytes left, do a soft line break as we may need
|
||||
// exactly 6 bytes space for the last 2 bytes
|
||||
if (pos > SAFE_LENGTH - 2) {
|
||||
buffer.write(ESCAPE_CHAR);
|
||||
buffer.write(CR);
|
||||
buffer.write(LF);
|
||||
}
|
||||
for (int i = bytes.length - 2; i < bytes.length; i++) {
|
||||
b = getUnsignedOctet(i, bytes);
|
||||
// rule #3: trailing whitespace shall be encoded
|
||||
encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b));
|
||||
encodeByte(b, encode, buffer);
|
||||
}
|
||||
} else {
|
||||
for (final byte c : bytes) {
|
||||
int b = c;
|
||||
if (b < 0) {
|
||||
b = 256 + b;
|
||||
}
|
||||
if (printable.get(b)) {
|
||||
buffer.write(b);
|
||||
} else {
|
||||
encodeQuotedPrintable(b, buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
|
||||
* back to their original representation.
|
||||
* <p>
|
||||
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
|
||||
* defined in RFC 1521.
|
||||
*
|
||||
* @param bytes
|
||||
* array of quoted-printable characters
|
||||
* @return array of original bytes
|
||||
* @throws DecoderException
|
||||
* Thrown if quoted-printable decoding is unsuccessful
|
||||
*/
|
||||
public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
final int b = bytes[i];
|
||||
if (b == ESCAPE_CHAR) {
|
||||
try {
|
||||
// if the next octet is a CR we have found a soft line break
|
||||
if (bytes[++i] == CR) {
|
||||
continue;
|
||||
}
|
||||
final int u = Utils.digit16(bytes[i]);
|
||||
final int l = Utils.digit16(bytes[++i]);
|
||||
buffer.write((char) ((u << 4) + l));
|
||||
} catch (final ArrayIndexOutOfBoundsException e) {
|
||||
throw new DecoderException("Invalid quoted-printable encoding", e);
|
||||
}
|
||||
} else if (b != CR && b != LF) {
|
||||
// every other octet is appended except for CR & LF
|
||||
buffer.write(b);
|
||||
}
|
||||
}
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
|
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param bytes
|
||||
* array of bytes to be encoded
|
||||
* @return array of bytes containing quoted-printable data
|
||||
*/
|
||||
@Override
|
||||
public byte[] encode(final byte[] bytes) {
|
||||
return encodeQuotedPrintable(PRINTABLE_CHARS, bytes, strict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
|
||||
* back to their original representation.
|
||||
* <p>
|
||||
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
|
||||
* defined in RFC 1521.
|
||||
*
|
||||
* @param bytes
|
||||
* array of quoted-printable characters
|
||||
* @return array of original bytes
|
||||
* @throws DecoderException
|
||||
* Thrown if quoted-printable decoding is unsuccessful
|
||||
*/
|
||||
@Override
|
||||
public byte[] decode(final byte[] bytes) throws DecoderException {
|
||||
return decodeQuotedPrintable(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
|
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @return quoted-printable string
|
||||
* @throws EncoderException
|
||||
* Thrown if quoted-printable encoding is unsuccessful
|
||||
*
|
||||
* @see #getCharset()
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) throws EncoderException {
|
||||
return this.encode(str, getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
|
||||
* are converted back to their original representation.
|
||||
*
|
||||
* @param str
|
||||
* quoted-printable string to convert into its original form
|
||||
* @param charset
|
||||
* the original string charset
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* Thrown if quoted-printable decoding is unsuccessful
|
||||
* @since 1.7
|
||||
*/
|
||||
public String decode(final String str, final Charset charset) throws DecoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
|
||||
* are converted back to their original representation.
|
||||
*
|
||||
* @param str
|
||||
* quoted-printable string to convert into its original form
|
||||
* @param charset
|
||||
* the original string charset
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* Thrown if quoted-printable decoding is unsuccessful
|
||||
* @throws UnsupportedEncodingException
|
||||
* Thrown if charset is not supported
|
||||
*/
|
||||
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
|
||||
* converted back to their original representation.
|
||||
*
|
||||
* @param str
|
||||
* quoted-printable string to convert into its original form
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
|
||||
* @see #getCharset()
|
||||
*/
|
||||
@Override
|
||||
public String decode(final String str) throws DecoderException {
|
||||
return this.decode(str, this.getCharset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
|
||||
*
|
||||
* @param obj
|
||||
* string to convert to a quoted-printable form
|
||||
* @return quoted-printable object
|
||||
* @throws EncoderException
|
||||
* Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
|
||||
* unsuccessful
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof byte[]) {
|
||||
return encode((byte[]) obj);
|
||||
} else if (obj instanceof String) {
|
||||
return encode((String) obj);
|
||||
} else {
|
||||
throw new EncoderException("Objects of type " +
|
||||
obj.getClass().getName() +
|
||||
" cannot be quoted-printable encoded");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param obj
|
||||
* quoted-printable object to convert into its original form
|
||||
* @return original object
|
||||
* @throws DecoderException
|
||||
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
|
||||
* condition is encountered during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object obj) throws DecoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof byte[]) {
|
||||
return decode((byte[]) obj);
|
||||
} else if (obj instanceof String) {
|
||||
return decode((String) obj);
|
||||
} else {
|
||||
throw new DecoderException("Objects of type " +
|
||||
obj.getClass().getName() +
|
||||
" cannot be quoted-printable decoded");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
* @since 1.7
|
||||
*/
|
||||
public Charset getCharset() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default charset name used for string decoding and encoding.
|
||||
*
|
||||
* @return the default charset name
|
||||
*/
|
||||
public String getDefaultCharset() {
|
||||
return this.charset.name();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
|
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @param charset
|
||||
* the charset for str
|
||||
* @return quoted-printable string
|
||||
* @since 1.7
|
||||
*/
|
||||
public String encode(final String str, final Charset charset) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
|
||||
* <p>
|
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
|
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
|
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to quoted-printable form
|
||||
* @param charset
|
||||
* the charset for str
|
||||
* @return quoted-printable string
|
||||
* @throws UnsupportedEncodingException
|
||||
* Thrown if the charset is not supported
|
||||
*/
|
||||
public String encode(final String str, final String charset) throws UnsupportedEncodingException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
|
||||
/**
|
||||
* Implements methods common to all codecs defined in RFC 1522.
|
||||
* <p>
|
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
|
||||
* encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
|
||||
* is unlikely to confuse existing message handling software.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
|
||||
* Message Header Extensions for Non-ASCII Text</a>
|
||||
*
|
||||
* @since 1.3
|
||||
* @version $Id$
|
||||
*/
|
||||
abstract class RFC1522Codec {
|
||||
|
||||
/** Separator. */
|
||||
protected static final char SEP = '?';
|
||||
|
||||
/** Prefix. */
|
||||
protected static final String POSTFIX = "?=";
|
||||
|
||||
/** Postfix. */
|
||||
protected static final String PREFIX = "=?";
|
||||
|
||||
/**
|
||||
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
|
||||
* <p>
|
||||
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
|
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
|
||||
*
|
||||
* @param text
|
||||
* a string to encode
|
||||
* @param charset
|
||||
* a charset to be used
|
||||
* @return RFC 1522 compliant "encoded-word"
|
||||
* @throws EncoderException
|
||||
* thrown if there is an error condition during the Encoding process.
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
protected String encodeText(final String text, final Charset charset) throws EncoderException {
|
||||
if (text == null) {
|
||||
return null;
|
||||
}
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
buffer.append(PREFIX);
|
||||
buffer.append(charset);
|
||||
buffer.append(SEP);
|
||||
buffer.append(this.getEncoding());
|
||||
buffer.append(SEP);
|
||||
final byte [] rawData = this.doEncoding(text.getBytes(charset));
|
||||
buffer.append(StringUtils.newStringUsAscii(rawData));
|
||||
buffer.append(POSTFIX);
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
|
||||
* <p>
|
||||
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
|
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
|
||||
*
|
||||
* @param text
|
||||
* a string to encode
|
||||
* @param charsetName
|
||||
* the charset to use
|
||||
* @return RFC 1522 compliant "encoded-word"
|
||||
* @throws EncoderException
|
||||
* thrown if there is an error condition during the Encoding process.
|
||||
* @throws UnsupportedEncodingException
|
||||
* if charset is not available
|
||||
*
|
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
|
||||
*/
|
||||
protected String encodeText(final String text, final String charsetName)
|
||||
throws EncoderException, UnsupportedEncodingException {
|
||||
if (text == null) {
|
||||
return null;
|
||||
}
|
||||
return this.encodeText(text, Charset.forName(charsetName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies an RFC 1522 compliant decoding scheme to the given string of text.
|
||||
* <p>
|
||||
* This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
|
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
|
||||
*
|
||||
* @param text
|
||||
* a string to decode
|
||||
* @return A new decoded String or <code>null</code> if the input is <code>null</code>.
|
||||
* @throws DecoderException
|
||||
* thrown if there is an error condition during the decoding process.
|
||||
* @throws UnsupportedEncodingException
|
||||
* thrown if charset specified in the "encoded-word" header is not supported
|
||||
*/
|
||||
protected String decodeText(final String text)
|
||||
throws DecoderException, UnsupportedEncodingException {
|
||||
if (text == null) {
|
||||
return null;
|
||||
}
|
||||
if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
|
||||
throw new DecoderException("RFC 1522 violation: malformed encoded content");
|
||||
}
|
||||
final int terminator = text.length() - 2;
|
||||
int from = 2;
|
||||
int to = text.indexOf(SEP, from);
|
||||
if (to == terminator) {
|
||||
throw new DecoderException("RFC 1522 violation: charset token not found");
|
||||
}
|
||||
final String charset = text.substring(from, to);
|
||||
if (charset.equals("")) {
|
||||
throw new DecoderException("RFC 1522 violation: charset not specified");
|
||||
}
|
||||
from = to + 1;
|
||||
to = text.indexOf(SEP, from);
|
||||
if (to == terminator) {
|
||||
throw new DecoderException("RFC 1522 violation: encoding token not found");
|
||||
}
|
||||
final String encoding = text.substring(from, to);
|
||||
if (!getEncoding().equalsIgnoreCase(encoding)) {
|
||||
throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
|
||||
}
|
||||
from = to + 1;
|
||||
to = text.indexOf(SEP, from);
|
||||
byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
|
||||
data = doDecoding(data);
|
||||
return new String(data, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the codec name (referred to as encoding in the RFC 1522).
|
||||
*
|
||||
* @return name of the codec
|
||||
*/
|
||||
protected abstract String getEncoding();
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes using the defined encoding scheme.
|
||||
*
|
||||
* @param bytes
|
||||
* Data to be encoded
|
||||
* @return A byte array containing the encoded data
|
||||
* @throws EncoderException
|
||||
* thrown if the Encoder encounters a failure condition during the encoding process.
|
||||
*/
|
||||
protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
|
||||
|
||||
/**
|
||||
* Decodes an array of bytes using the defined encoding scheme.
|
||||
*
|
||||
* @param bytes
|
||||
* Data to be decoded
|
||||
* @return a byte array that contains decoded data
|
||||
* @throws DecoderException
|
||||
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
|
||||
*/
|
||||
protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
|
||||
}
|
|
@ -0,0 +1,368 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.commons.codec.BinaryDecoder;
|
||||
import org.apache.commons.codec.BinaryEncoder;
|
||||
import org.apache.commons.codec.CharEncoding;
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringDecoder;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
|
||||
/**
|
||||
* Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
|
||||
* <p>
|
||||
* This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
|
||||
* {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
|
||||
* 1.4 rely on the platform's default charset encoding.
|
||||
* <p>
|
||||
* This class is immutable and thread-safe.
|
||||
*
|
||||
* @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
|
||||
* of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a>
|
||||
*
|
||||
* @since 1.2
|
||||
* @version $Id$
|
||||
*/
|
||||
public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
|
||||
|
||||
/**
|
||||
* Radix used in encoding and decoding.
|
||||
*/
|
||||
static final int RADIX = 16;
|
||||
|
||||
/**
|
||||
* The default charset used for string decoding and encoding.
|
||||
*
|
||||
* @deprecated TODO: This field will be changed to a private final Charset in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
protected String charset;
|
||||
|
||||
/**
|
||||
* Release 1.5 made this field final.
|
||||
*/
|
||||
protected static final byte ESCAPE_CHAR = '%';
|
||||
/**
|
||||
* BitSet of www-form-url safe characters.
|
||||
*/
|
||||
protected static final BitSet WWW_FORM_URL = new BitSet(256);
|
||||
|
||||
// Static initializer for www_form_url
|
||||
static {
|
||||
// alpha characters
|
||||
for (int i = 'a'; i <= 'z'; i++) {
|
||||
WWW_FORM_URL.set(i);
|
||||
}
|
||||
for (int i = 'A'; i <= 'Z'; i++) {
|
||||
WWW_FORM_URL.set(i);
|
||||
}
|
||||
// numeric characters
|
||||
for (int i = '0'; i <= '9'; i++) {
|
||||
WWW_FORM_URL.set(i);
|
||||
}
|
||||
// special chars
|
||||
WWW_FORM_URL.set('-');
|
||||
WWW_FORM_URL.set('_');
|
||||
WWW_FORM_URL.set('.');
|
||||
WWW_FORM_URL.set('*');
|
||||
// blank to be replaced with +
|
||||
WWW_FORM_URL.set(' ');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
public URLCodec() {
|
||||
this(CharEncoding.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which allows for the selection of a default charset.
|
||||
*
|
||||
* @param charset the default string charset to use.
|
||||
*/
|
||||
public URLCodec(final String charset) {
|
||||
super();
|
||||
this.charset = charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
|
||||
*
|
||||
* @param urlsafe
|
||||
* bitset of characters deemed URL safe
|
||||
* @param bytes
|
||||
* array of bytes to convert to URL safe characters
|
||||
* @return array of bytes containing URL safe characters
|
||||
*/
|
||||
public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
if (urlsafe == null) {
|
||||
urlsafe = WWW_FORM_URL;
|
||||
}
|
||||
|
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
for (final byte c : bytes) {
|
||||
int b = c;
|
||||
if (b < 0) {
|
||||
b = 256 + b;
|
||||
}
|
||||
if (urlsafe.get(b)) {
|
||||
if (b == ' ') {
|
||||
b = '+';
|
||||
}
|
||||
buffer.write(b);
|
||||
} else {
|
||||
buffer.write(ESCAPE_CHAR);
|
||||
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
|
||||
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
|
||||
buffer.write(hex1);
|
||||
buffer.write(hex2);
|
||||
}
|
||||
}
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
|
||||
* back to their original representation.
|
||||
*
|
||||
* @param bytes
|
||||
* array of URL safe characters
|
||||
* @return array of original bytes
|
||||
* @throws DecoderException
|
||||
* Thrown if URL decoding is unsuccessful
|
||||
*/
|
||||
public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
|
||||
if (bytes == null) {
|
||||
return null;
|
||||
}
|
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
final int b = bytes[i];
|
||||
if (b == '+') {
|
||||
buffer.write(' ');
|
||||
} else if (b == ESCAPE_CHAR) {
|
||||
try {
|
||||
final int u = Utils.digit16(bytes[++i]);
|
||||
final int l = Utils.digit16(bytes[++i]);
|
||||
buffer.write((char) ((u << 4) + l));
|
||||
} catch (final ArrayIndexOutOfBoundsException e) {
|
||||
throw new DecoderException("Invalid URL encoding: ", e);
|
||||
}
|
||||
} else {
|
||||
buffer.write(b);
|
||||
}
|
||||
}
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
|
||||
*
|
||||
* @param bytes
|
||||
* array of bytes to convert to URL safe characters
|
||||
* @return array of bytes containing URL safe characters
|
||||
*/
|
||||
@Override
|
||||
public byte[] encode(final byte[] bytes) {
|
||||
return encodeUrl(WWW_FORM_URL, bytes);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
|
||||
* back to their original representation.
|
||||
*
|
||||
* @param bytes
|
||||
* array of URL safe characters
|
||||
* @return array of original bytes
|
||||
* @throws DecoderException
|
||||
* Thrown if URL decoding is unsuccessful
|
||||
*/
|
||||
@Override
|
||||
public byte[] decode(final byte[] bytes) throws DecoderException {
|
||||
return decodeUrl(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to a URL safe form
|
||||
* @param charset
|
||||
* the charset for str
|
||||
* @return URL safe string
|
||||
* @throws UnsupportedEncodingException
|
||||
* Thrown if charset is not supported
|
||||
*/
|
||||
public String encode(final String str, final String charset) throws UnsupportedEncodingException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
|
||||
*
|
||||
* @param str
|
||||
* string to convert to a URL safe form
|
||||
* @return URL safe string
|
||||
* @throws EncoderException
|
||||
* Thrown if URL encoding is unsuccessful
|
||||
*
|
||||
* @see #getDefaultCharset()
|
||||
*/
|
||||
@Override
|
||||
public String encode(final String str) throws EncoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return encode(str, getDefaultCharset());
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new EncoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
|
||||
* back to their original representation.
|
||||
*
|
||||
* @param str
|
||||
* URL safe string to convert into its original form
|
||||
* @param charset
|
||||
* the original string charset
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* Thrown if URL decoding is unsuccessful
|
||||
* @throws UnsupportedEncodingException
|
||||
* Thrown if charset is not supported
|
||||
*/
|
||||
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a URL safe string into its original form using the default string charset. Escaped characters are
|
||||
* converted back to their original representation.
|
||||
*
|
||||
* @param str
|
||||
* URL safe string to convert into its original form
|
||||
* @return original string
|
||||
* @throws DecoderException
|
||||
* Thrown if URL decoding is unsuccessful
|
||||
* @see #getDefaultCharset()
|
||||
*/
|
||||
@Override
|
||||
public String decode(final String str) throws DecoderException {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return decode(str, getDefaultCharset());
|
||||
} catch (final UnsupportedEncodingException e) {
|
||||
throw new DecoderException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes an object into its URL safe form. Unsafe characters are escaped.
|
||||
*
|
||||
* @param obj
|
||||
* string to convert to a URL safe form
|
||||
* @return URL safe object
|
||||
* @throws EncoderException
|
||||
* Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
|
||||
*/
|
||||
@Override
|
||||
public Object encode(final Object obj) throws EncoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof byte[]) {
|
||||
return encode((byte[])obj);
|
||||
} else if (obj instanceof String) {
|
||||
return encode((String)obj);
|
||||
} else {
|
||||
throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a URL safe object into its original form. Escaped characters are converted back to their original
|
||||
* representation.
|
||||
*
|
||||
* @param obj
|
||||
* URL safe object to convert into its original form
|
||||
* @return original object
|
||||
* @throws DecoderException
|
||||
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
|
||||
* condition is encountered during the decode process.
|
||||
*/
|
||||
@Override
|
||||
public Object decode(final Object obj) throws DecoderException {
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else if (obj instanceof byte[]) {
|
||||
return decode((byte[]) obj);
|
||||
} else if (obj instanceof String) {
|
||||
return decode((String) obj);
|
||||
} else {
|
||||
throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The default charset used for string decoding and encoding.
|
||||
*
|
||||
* @return the default string charset.
|
||||
*/
|
||||
public String getDefaultCharset() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* The <code>String</code> encoding used for decoding and encoding.
|
||||
*
|
||||
* @return Returns the encoding.
|
||||
*
|
||||
* @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
public String getEncoding() {
|
||||
return this.charset;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.codec.net;
|
||||
|
||||
import org.apache.commons.codec.DecoderException;
|
||||
|
||||
/**
|
||||
* Utility methods for this package.
|
||||
*
|
||||
* <p>This class is immutable and thread-safe.</p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.4
|
||||
*/
|
||||
class Utils {
|
||||
|
||||
/**
|
||||
* Returns the numeric value of the character <code>b</code> in radix 16.
|
||||
*
|
||||
* @param b
|
||||
* The byte to be converted.
|
||||
* @return The numeric value represented by the character in radix 16.
|
||||
*
|
||||
* @throws DecoderException
|
||||
* Thrown when the byte is not valid per {@link Character#digit(char,int)}
|
||||
*/
|
||||
static int digit16(final byte b) throws DecoderException {
|
||||
final int i = Character.digit((char) b, URLCodec.RADIX);
|
||||
if (i == -1) {
|
||||
throw new DecoderException("Invalid URL encoding: not a valid digit (radix " + URLCodec.RADIX + "): " + b);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
<p>
|
||||
Network related encoding and decoding.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,29 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!-- $Id$ -->
|
||||
<html>
|
||||
<body>
|
||||
<p>
|
||||
This document is the API specification for the Apache Commons Codec Library, version 1.3.
|
||||
</p>
|
||||
<p>
|
||||
This library requires a JRE version of 1.2.2 or greater.
|
||||
The hypertext links originating from this document point to Sun's version 1.3 API as the 1.2.2 API documentation
|
||||
is no longer on-line.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,100 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<p>Interfaces and classes used by
|
||||
the various implementations in the sub-packages.</p>
|
||||
|
||||
<p>Definitive implementations of commonly used encoders and decoders.</p>
|
||||
|
||||
<p>Codec is currently comprised of a modest set of utilities and a
|
||||
simple framework for String encoding and decoding in three categories:
|
||||
Binary Encoders, Language Encoders, and Network Encoders. </p>
|
||||
|
||||
<h4><a name="Common Encoders">Binary Encoders</a></h4>
|
||||
|
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="binary/Base64.html">
|
||||
org.apache.commons.codec.binary.Base64</a>
|
||||
</td>
|
||||
<td>
|
||||
Provides Base64 content-transfer-encoding as defined in
|
||||
<a href="http://www.ietf.org/rfc/rfc2045.txt"> RFC 2045</a>
|
||||
</td>
|
||||
<td>Production</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="binary/Hex.html">
|
||||
org.apache.commons.codec.binary.Hex</a>
|
||||
</td>
|
||||
<td>
|
||||
Converts an array of bytes into an array of characters
|
||||
representing the hexadecimal values of each byte in order
|
||||
</td>
|
||||
<td>Production</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h4>
|
||||
<a name="Language Encoders">Language Encoders</a>
|
||||
</h4>
|
||||
<p>
|
||||
Codec contains a number of commonly used language and phonetic
|
||||
encoders
|
||||
</p>
|
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="#">org.apache.commons.codec.language.Soundex</a>
|
||||
</td>
|
||||
<td>Implementation of the Soundex algorithm.</td>
|
||||
<td>Production</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="#">org.apache.commons.codec.language.Metaphone</a>
|
||||
</td>
|
||||
<td>Implementation of the Metaphone algorithm.</td>
|
||||
<td>Production</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h4><a name="Network_Encoders">Network Encoders</a></h4>
|
||||
<h4> </h4>
|
||||
<p> Codec contains network related encoders </p>
|
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<a href="#">org.apache.commons.codec.net.URLCodec</a>
|
||||
</td>
|
||||
<td>Implements the 'www-form-urlencoded' encoding scheme.</td>
|
||||
<td>Production</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<br>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,594 @@
|
|||
/**
|
||||
* Copyright 2011 The Buzz Media, LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.imgscalr;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.BufferedImageOp;
|
||||
import java.awt.image.ImagingOpException;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.imgscalr.Scalr.Method;
|
||||
import org.imgscalr.Scalr.Mode;
|
||||
import org.imgscalr.Scalr.Rotation;
|
||||
|
||||
/**
|
||||
* Class used to provide the asynchronous versions of all the methods defined in
|
||||
* {@link Scalr} for the purpose of efficiently handling large amounts of image
|
||||
* operations via a select number of processing threads asynchronously.
|
||||
* <p/>
|
||||
* Given that image-scaling operations, especially when working with large
|
||||
* images, can be very hardware-intensive (both CPU and memory), in large-scale
|
||||
* deployments (e.g. a busy web application) it becomes increasingly important
|
||||
* that the scale operations performed by imgscalr be manageable so as not to
|
||||
* fire off too many simultaneous operations that the JVM's heap explodes and
|
||||
* runs out of memory or pegs the CPU on the host machine, staving all other
|
||||
* running processes.
|
||||
* <p/>
|
||||
* Up until now it was left to the caller to implement their own serialization
|
||||
* or limiting logic to handle these use-cases. Given imgscalr's popularity in
|
||||
* web applications it was determined that this requirement be common enough
|
||||
* that it should be integrated directly into the imgscalr library for everyone
|
||||
* to benefit from.
|
||||
* <p/>
|
||||
* Every method in this class wraps the matching methods in the {@link Scalr}
|
||||
* class in new {@link Callable} instances that are submitted to an internal
|
||||
* {@link ExecutorService} for execution at a later date. A {@link Future} is
|
||||
* returned to the caller representing the task that is either currently
|
||||
* performing the scale operation or will at a future date depending on where it
|
||||
* is in the {@link ExecutorService}'s queue. {@link Future#get()} or
|
||||
* {@link Future#get(long, TimeUnit)} can be used to block on the
|
||||
* <code>Future</code>, waiting for the scale operation to complete and return
|
||||
* the resultant {@link BufferedImage} to the caller.
|
||||
* <p/>
|
||||
* This design provides the following features:
|
||||
* <ul>
|
||||
* <li>Non-blocking, asynchronous scale operations that can continue execution
|
||||
* while waiting on the scaled result.</li>
|
||||
* <li>Serialize all scale requests down into a maximum number of
|
||||
* <em>simultaneous</em> scale operations with no additional/complex logic. The
|
||||
* number of simultaneous scale operations is caller-configurable (see
|
||||
* {@link #THREAD_COUNT}) so as best to optimize the host system (e.g. 1 scale
|
||||
* thread per core).</li>
|
||||
* <li>No need to worry about overloading the host system with too many scale
|
||||
* operations, they will simply queue up in this class and execute in-order.</li>
|
||||
* <li>Synchronous/blocking behavior can still be achieved (if desired) by
|
||||
* calling <code>get()</code> or <code>get(long, TimeUnit)</code> immediately on
|
||||
* the returned {@link Future} from any of the methods below.</li>
|
||||
* </ul>
|
||||
* <h3>Performance</h3>
|
||||
* When tuning this class for optimal performance, benchmarking your particular
|
||||
* hardware is the best approach. For some rough guidelines though, there are
|
||||
* two resources you want to watch closely:
|
||||
* <ol>
|
||||
* <li>JVM Heap Memory (Assume physical machine memory is always sufficiently
|
||||
* large)</li>
|
||||
* <li># of CPU Cores</li>
|
||||
* </ol>
|
||||
* You never want to allocate more scaling threads than you have CPU cores and
|
||||
* on a sufficiently busy host where some of the cores may be busy running a
|
||||
* database or a web server, you will want to allocate even less scaling
|
||||
* threads.
|
||||
* <p/>
|
||||
* So as a maximum you would never want more scaling threads than CPU cores in
|
||||
* any situation and less so on a busy server.
|
||||
* <p/>
|
||||
* If you allocate more threads than you have available CPU cores, your scaling
|
||||
* operations will slow down as the CPU will spend a considerable amount of time
|
||||
* context-switching between threads on the same core trying to finish all the
|
||||
* tasks in parallel. You might still be tempted to do this because of the I/O
|
||||
* delay some threads will encounter reading images off disk, but when you do
|
||||
* your own benchmarking you'll likely find (as I did) that the actual disk I/O
|
||||
* necessary to pull the image data off disk is a much smaller portion of the
|
||||
* execution time than the actual scaling operations.
|
||||
* <p/>
|
||||
* If you are executing on a storage medium that is unexpectedly slow and I/O is
|
||||
* a considerable portion of the scaling operation (e.g. S3 or EBS volumes),
|
||||
* feel free to try using more threads than CPU cores to see if that helps; but
|
||||
* in most normal cases, it will only slow down all other parallel scaling
|
||||
* operations.
|
||||
* <p/>
|
||||
* As for memory, every time an image is scaled it is decoded into a
|
||||
* {@link BufferedImage} and stored in the JVM Heap space (decoded image
|
||||
* instances are always larger than the source images on-disk). For larger
|
||||
* images, that can use up quite a bit of memory. You will need to benchmark
|
||||
* your particular use-cases on your hardware to get an idea of where the sweet
|
||||
* spot is for this; if you are operating within tight memory bounds, you may
|
||||
* want to limit simultaneous scaling operations to 1 or 2 regardless of the
|
||||
* number of cores just to avoid having too many {@link BufferedImage} instances
|
||||
* in JVM Heap space at the same time.
|
||||
* <p/>
|
||||
* These are rough metrics and behaviors to give you an idea of how best to tune
|
||||
* this class for your deployment, but nothing can replacement writing a small
|
||||
* Java class that scales a handful of images in a number of different ways and
|
||||
* testing that directly on your deployment hardware.
|
||||
* <h3>Resource Overhead</h3>
|
||||
* The {@link ExecutorService} utilized by this class won't be initialized until
|
||||
* one of the operation methods are called, at which point the
|
||||
* <code>service</code> will be instantiated for the first time and operation
|
||||
* queued up.
|
||||
* <p/>
|
||||
* More specifically, if you have no need for asynchronous image processing
|
||||
* offered by this class, you don't need to worry about wasted resources or
|
||||
* hanging/idle threads as they will never be created if you never use this
|
||||
* class.
|
||||
* <h3>Cleaning up Service Threads</h3>
|
||||
* By default the {@link Thread}s created by the internal
|
||||
* {@link ThreadPoolExecutor} do not run in <code>daemon</code> mode; which
|
||||
* means they will block the host VM from exiting until they are explicitly shut
|
||||
* down in a client application; in a server application the container will shut
|
||||
* down the pool forcibly.
|
||||
* <p/>
|
||||
* If you have used the {@link AsyncScalr} class and are trying to shut down a
|
||||
* client application, you will need to call {@link #getService()} then
|
||||
* {@link ExecutorService#shutdown()} or {@link ExecutorService#shutdownNow()}
|
||||
* to have the threads terminated; you may also want to look at the
|
||||
* {@link ExecutorService#awaitTermination(long, TimeUnit)} method if you'd like
|
||||
* to more closely monitor the shutting down process (and finalization of
|
||||
* pending scale operations).
|
||||
* <h3>Reusing Shutdown AsyncScalr</h3>
|
||||
* If you have previously called <code>shutdown</code> on the underlying service
|
||||
* utilized by this class, subsequent calls to any of the operations this class
|
||||
* provides will invoke the internal {@link #checkService()} method which will
|
||||
* replace the terminated underlying {@link ExecutorService} with a new one via
|
||||
* the {@link #createService()} method.
|
||||
* <h3>Custom Implementations</h3>
|
||||
* If a subclass wants to customize the {@link ExecutorService} or
|
||||
* {@link ThreadFactory} used under the covers, this can be done by overriding
|
||||
* the {@link #createService()} method which is invoked by this class anytime a
|
||||
* new {@link ExecutorService} is needed.
|
||||
* <p/>
|
||||
* By default the {@link #createService()} method delegates to the
|
||||
* {@link #createService(ThreadFactory)} method with a new instance of
|
||||
* {@link DefaultThreadFactory}. Either of these methods can be overridden and
|
||||
* customized easily if desired.
|
||||
* <p/>
|
||||
* <strong>TIP</strong>: A common customization to this class is to make the
|
||||
* {@link Thread}s generated by the underlying factory more server-friendly, in
|
||||
* which case the caller would want to use an instance of the
|
||||
* {@link ServerThreadFactory} when creating the new {@link ExecutorService}.
|
||||
* <p/>
|
||||
* This can be done in one line by overriding {@link #createService()} and
|
||||
* returning the result of:
|
||||
* <code>return createService(new ServerThreadFactory());</code>
|
||||
* <p/>
|
||||
* By default this class uses an {@link ThreadPoolExecutor} internally to handle
|
||||
* execution of queued image operations. If a different type of
|
||||
* {@link ExecutorService} is desired, again, simply overriding the
|
||||
* {@link #createService()} method of choice is the right way to do that.
|
||||
*
|
||||
* @author Riyad Kalla (software@thebuzzmedia.com)
|
||||
* @since 3.2
|
||||
*/
|
||||
@SuppressWarnings("javadoc")
|
||||
public class AsyncScalr {
|
||||
/**
|
||||
* System property name used to set the number of threads the default
|
||||
* underlying {@link ExecutorService} will use to process async image
|
||||
* operations.
|
||||
* <p/>
|
||||
* Value is "<code>imgscalr.async.threadCount</code>".
|
||||
*/
|
||||
public static final String THREAD_COUNT_PROPERTY_NAME = "imgscalr.async.threadCount";
|
||||
|
||||
/**
|
||||
* Number of threads the internal {@link ExecutorService} will use to
|
||||
* simultaneously execute scale requests.
|
||||
* <p/>
|
||||
* This value can be changed by setting the
|
||||
* <code>imgscalr.async.threadCount</code> system property (see
|
||||
* {@link #THREAD_COUNT_PROPERTY_NAME}) to a valid integer value > 0.
|
||||
* <p/>
|
||||
* Default value is <code>2</code>.
|
||||
*/
|
||||
public static final int THREAD_COUNT = Integer.getInteger(
|
||||
THREAD_COUNT_PROPERTY_NAME, 2);
|
||||
|
||||
/**
|
||||
* Initializer used to verify the THREAD_COUNT system property.
|
||||
*/
|
||||
static {
|
||||
if (THREAD_COUNT < 1)
|
||||
throw new RuntimeException("System property '"
|
||||
+ THREAD_COUNT_PROPERTY_NAME + "' set THREAD_COUNT to "
|
||||
+ THREAD_COUNT + ", but THREAD_COUNT must be > 0.");
|
||||
}
|
||||
|
||||
protected static ExecutorService service;
|
||||
|
||||
/**
|
||||
* Used to get access to the internal {@link ExecutorService} used by this
|
||||
* class to process scale operations.
|
||||
* <p/>
|
||||
* <strong>NOTE</strong>: You will need to explicitly shutdown any service
|
||||
* currently set on this class before the host JVM exits.
|
||||
* <p/>
|
||||
* You can call {@link ExecutorService#shutdown()} to wait for all scaling
|
||||
* operations to complete first or call
|
||||
* {@link ExecutorService#shutdownNow()} to kill any in-process operations
|
||||
* and purge all pending operations before exiting.
|
||||
* <p/>
|
||||
* Additionally you can use
|
||||
* {@link ExecutorService#awaitTermination(long, TimeUnit)} after issuing a
|
||||
* shutdown command to try and wait until the service has finished all
|
||||
* tasks.
|
||||
*
|
||||
* @return the current {@link ExecutorService} used by this class to process
|
||||
* scale operations.
|
||||
*/
|
||||
public static ExecutorService getService() {
|
||||
return service;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#apply(BufferedImage, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> apply(final BufferedImage src,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.apply(src, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#crop(BufferedImage, int, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> crop(final BufferedImage src,
|
||||
final int width, final int height, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.crop(src, width, height, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#crop(BufferedImage, int, int, int, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> crop(final BufferedImage src,
|
||||
final int x, final int y, final int width, final int height,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.crop(src, x, y, width, height, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#pad(BufferedImage, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> pad(final BufferedImage src,
|
||||
final int padding, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.pad(src, padding, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#pad(BufferedImage, int, Color, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> pad(final BufferedImage src,
|
||||
final int padding, final Color color, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.pad(src, padding, color, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final int targetSize, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, targetSize, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Method, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Method scalingMethod, final int targetSize,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, scalingMethod, targetSize, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Mode, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Mode resizeMode, final int targetSize,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, resizeMode, targetSize, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Method, Mode, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Method scalingMethod, final Mode resizeMode,
|
||||
final int targetSize, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, scalingMethod, resizeMode, targetSize,
|
||||
ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, int, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final int targetWidth, final int targetHeight,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, targetWidth, targetHeight, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Method, int, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Method scalingMethod, final int targetWidth,
|
||||
final int targetHeight, final BufferedImageOp... ops) {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, scalingMethod, targetWidth,
|
||||
targetHeight, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Mode, int, int, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Mode resizeMode, final int targetWidth,
|
||||
final int targetHeight, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, resizeMode, targetWidth, targetHeight,
|
||||
ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#resize(BufferedImage, Method, Mode, int, int,
|
||||
* BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> resize(final BufferedImage src,
|
||||
final Method scalingMethod, final Mode resizeMode,
|
||||
final int targetWidth, final int targetHeight,
|
||||
final BufferedImageOp... ops) throws IllegalArgumentException,
|
||||
ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.resize(src, scalingMethod, resizeMode,
|
||||
targetWidth, targetHeight, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Scalr#rotate(BufferedImage, Rotation, BufferedImageOp...)
|
||||
*/
|
||||
public static Future<BufferedImage> rotate(final BufferedImage src,
|
||||
final Rotation rotation, final BufferedImageOp... ops)
|
||||
throws IllegalArgumentException, ImagingOpException {
|
||||
checkService();
|
||||
|
||||
return service.submit(new Callable<BufferedImage>() {
|
||||
public BufferedImage call() throws Exception {
|
||||
return Scalr.rotate(src, rotation, ops);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected static ExecutorService createService() {
|
||||
return createService(new DefaultThreadFactory());
|
||||
}
|
||||
|
||||
protected static ExecutorService createService(ThreadFactory factory)
|
||||
throws IllegalArgumentException {
|
||||
if (factory == null)
|
||||
throw new IllegalArgumentException("factory cannot be null");
|
||||
|
||||
return Executors.newFixedThreadPool(THREAD_COUNT, factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to verify that the underlying <code>service</code> points at an
|
||||
* active {@link ExecutorService} instance that can be used by this class.
|
||||
* <p/>
|
||||
* If <code>service</code> is <code>null</code>, has been shutdown or
|
||||
* terminated then this method will replace it with a new
|
||||
* {@link ExecutorService} by calling the {@link #createService()} method
|
||||
* and assigning the returned value to <code>service</code>.
|
||||
* <p/>
|
||||
* Any subclass that wants to customize the {@link ExecutorService} or
|
||||
* {@link ThreadFactory} used internally by this class should override the
|
||||
* {@link #createService()}.
|
||||
*/
|
||||
protected static void checkService() {
|
||||
if (service == null || service.isShutdown() || service.isTerminated()) {
|
||||
/*
|
||||
* If service was shutdown or terminated, assigning a new value will
|
||||
* free the reference to the instance, allowing it to be GC'ed when
|
||||
* it is done shutting down (assuming it hadn't already).
|
||||
*/
|
||||
service = createService();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default {@link ThreadFactory} used by the internal
|
||||
* {@link ExecutorService} to creates execution {@link Thread}s for image
|
||||
* scaling.
|
||||
* <p/>
|
||||
* More or less a copy of the hidden class backing the
|
||||
* {@link Executors#defaultThreadFactory()} method, but exposed here to make
|
||||
* it easier for implementors to extend and customize.
|
||||
*
|
||||
* @author Doug Lea
|
||||
* @author Riyad Kalla (software@thebuzzmedia.com)
|
||||
* @since 4.0
|
||||
*/
|
||||
protected static class DefaultThreadFactory implements ThreadFactory {
|
||||
protected static final AtomicInteger poolNumber = new AtomicInteger(1);
|
||||
|
||||
protected final ThreadGroup group;
|
||||
protected final AtomicInteger threadNumber = new AtomicInteger(1);
|
||||
protected final String namePrefix;
|
||||
|
||||
DefaultThreadFactory() {
|
||||
SecurityManager manager = System.getSecurityManager();
|
||||
|
||||
/*
|
||||
* Determine the group that threads created by this factory will be
|
||||
* in.
|
||||
*/
|
||||
group = (manager == null ? Thread.currentThread().getThreadGroup()
|
||||
: manager.getThreadGroup());
|
||||
|
||||
/*
|
||||
* Define a common name prefix for the threads created by this
|
||||
* factory.
|
||||
*/
|
||||
namePrefix = "pool-" + poolNumber.getAndIncrement() + "-thread-";
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to create a {@link Thread} capable of executing the given
|
||||
* {@link Runnable}.
|
||||
* <p/>
|
||||
* Thread created by this factory are utilized by the parent
|
||||
* {@link ExecutorService} when processing queued up scale operations.
|
||||
*/
|
||||
public Thread newThread(Runnable r) {
|
||||
/*
|
||||
* Create a new thread in our specified group with a meaningful
|
||||
* thread name so it is easy to identify.
|
||||
*/
|
||||
Thread thread = new Thread(group, r, namePrefix
|
||||
+ threadNumber.getAndIncrement(), 0);
|
||||
|
||||
// Configure thread according to class or subclass
|
||||
thread.setDaemon(false);
|
||||
thread.setPriority(Thread.NORM_PRIORITY);
|
||||
|
||||
return thread;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An extension of the {@link DefaultThreadFactory} class that makes two
|
||||
* changes to the execution {@link Thread}s it generations:
|
||||
* <ol>
|
||||
* <li>Threads are set to be daemon threads instead of user threads.</li>
|
||||
* <li>Threads execute with a priority of {@link Thread#MIN_PRIORITY} to
|
||||
* make them more compatible with server environment deployments.</li>
|
||||
* </ol>
|
||||
* This class is provided as a convenience for subclasses to use if they
|
||||
* want this (common) customization to the {@link Thread}s used internally
|
||||
* by {@link AsyncScalr} to process images, but don't want to have to write
|
||||
* the implementation.
|
||||
*
|
||||
* @author Riyad Kalla (software@thebuzzmedia.com)
|
||||
* @since 4.0
|
||||
*/
|
||||
protected static class ServerThreadFactory extends DefaultThreadFactory {
|
||||
/**
|
||||
* Overridden to set <code>daemon</code> property to <code>true</code>
|
||||
* and decrease the priority of the new thread to
|
||||
* {@link Thread#MIN_PRIORITY} before returning it.
|
||||
*/
|
||||
@Override
|
||||
public Thread newThread(Runnable r) {
|
||||
Thread thread = super.newThread(r);
|
||||
|
||||
thread.setDaemon(true);
|
||||
thread.setPriority(Thread.MIN_PRIORITY);
|
||||
|
||||
return thread;
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -33,6 +33,7 @@ import org.objectweb.asm.tree.ClassNode;
|
|||
import the.bytecode.club.bytecodeviewer.api.ClassNodeLoader;
|
||||
import the.bytecode.club.bytecodeviewer.gui.ClassViewer;
|
||||
import the.bytecode.club.bytecodeviewer.gui.FileNavigationPane;
|
||||
import the.bytecode.club.bytecodeviewer.gui.BootScreen;
|
||||
import the.bytecode.club.bytecodeviewer.gui.MainViewerGUI;
|
||||
import the.bytecode.club.bytecodeviewer.gui.RunOptions;
|
||||
import the.bytecode.club.bytecodeviewer.gui.SearchingPane;
|
||||
|
@ -67,7 +68,6 @@ import the.bytecode.club.bytecodeviewer.plugin.PluginManager;
|
|||
* TODO:
|
||||
*
|
||||
* 3.0.0: (RETIREMENT PARTY, WOHOOO)
|
||||
* maybe just do AMS5 then obfuscate the dex2jar shit.
|
||||
* Add obfuscation:
|
||||
* - Add integer boxing and other obfuscation methods contra implemented
|
||||
* - Insert unadded/debug opcodes to try to fuck up decompilers
|
||||
|
@ -91,11 +91,13 @@ import the.bytecode.club.bytecodeviewer.plugin.PluginManager;
|
|||
* refresh appears under panes that are non refreshable
|
||||
* make ez-injection plugin console show all sys.out calls
|
||||
* edit then save issues?
|
||||
*
|
||||
* Search open doesnt append .class
|
||||
* Search open doesnt append .class to tab name
|
||||
*
|
||||
* -----2.9.7-----:
|
||||
* 07/02/2015 - Added ajustable font size.
|
||||
* 07/05/2015 - Started working on the new Boot Screen.
|
||||
* 07/06/2015 - Moved the font size to be under the view menu.
|
||||
* 07/06/2015 - Fixed a bug with plugins not being able to grab the currently viewed class.
|
||||
*
|
||||
* @author Konloch
|
||||
*
|
||||
|
@ -123,6 +125,7 @@ public class BytecodeViewer {
|
|||
private static String pluginsName = getBCVDirectory() + fs + "recentplugins.bcv";
|
||||
public static String settingsName = getBCVDirectory() + fs + "settings.bcv";
|
||||
public static String tempDirectory = getBCVDirectory() + fs + "bcv_temp" + fs;
|
||||
public static String libsDirectory = getBCVDirectory() + fs + "libs" + fs;
|
||||
public static String krakatauWorkingDirectory = getBCVDirectory() + fs + "krakatau_" + krakatauVersion + fs + "Krakatau-master";
|
||||
private static ArrayList<String> recentFiles = DiskReader.loadArrayList(filesName, false);
|
||||
private static ArrayList<String> recentPlugins = DiskReader.loadArrayList(pluginsName, false);
|
||||
|
@ -132,11 +135,12 @@ public class BytecodeViewer {
|
|||
public static ArrayList<Process> krakatau = new ArrayList<Process>();
|
||||
public static Refactorer refactorer = new Refactorer();
|
||||
public static boolean pingback = false;
|
||||
public static boolean deleteForiegnLibraries = true;
|
||||
|
||||
/**
|
||||
* The version checker thread
|
||||
*/
|
||||
private static Thread versionChecker = new Thread() {
|
||||
public static Thread versionChecker = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
|
@ -294,6 +298,39 @@ public class BytecodeViewer {
|
|||
}
|
||||
};
|
||||
|
||||
public static Thread PingBack = new Thread() {
|
||||
public void run() {
|
||||
try {
|
||||
new HTTPRequest(new URL("https://bytecodeviewer.com/add.php")).read();
|
||||
} catch(Exception e) {
|
||||
//ignore
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public static void pingback() {
|
||||
JOptionPane pane = new JOptionPane(
|
||||
"Would you like to 'pingback' to https://bytecodeviewer.com to be counted in the global users for BCV?");
|
||||
Object[] options = new String[] { "Yes", "No" };
|
||||
pane.setOptions(options);
|
||||
JDialog dialog = pane.createDialog(BytecodeViewer.viewer,
|
||||
"Bytecode Viewer - Optional Pingback");
|
||||
dialog.setVisible(true);
|
||||
Object obj = pane.getValue();
|
||||
int result = -1;
|
||||
for (int k = 0; k < options.length; k++)
|
||||
if (options[k].equals(obj))
|
||||
result = k;
|
||||
|
||||
if (result == 0) {
|
||||
try {
|
||||
PingBack.start();
|
||||
} catch (Exception e) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab the byte array from the loaded Class object
|
||||
* @param clazz
|
||||
|
@ -317,6 +354,15 @@ public class BytecodeViewer {
|
|||
*/
|
||||
public static void main(String[] args) {
|
||||
System.setSecurityManager(sm);
|
||||
try {
|
||||
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
|
||||
} catch (Exception e) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
|
||||
}
|
||||
new BootScreen().DO_FIRST_BOOT(args);
|
||||
}
|
||||
|
||||
public static void BOOT(String[] args) {
|
||||
checkKrakatau();
|
||||
System.out.println("https://the.bytecode.club - Created by @Konloch - Bytecode Viewer " + version);
|
||||
cleanup();
|
||||
|
@ -329,18 +375,13 @@ public class BytecodeViewer {
|
|||
cleanup();
|
||||
}
|
||||
});
|
||||
try {
|
||||
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
|
||||
} catch (Exception e) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
|
||||
}
|
||||
|
||||
viewer = new MainViewerGUI();
|
||||
Settings.loadGUI();
|
||||
resetRecentFilesMenu();
|
||||
|
||||
if (viewer.chckbxmntmNewCheckItem_12.isSelected()) // start only if selected
|
||||
versionChecker.start();
|
||||
/*if (viewer.chckbxmntmNewCheckItem_12.isSelected()) // start only if selected
|
||||
versionChecker.start();*/
|
||||
|
||||
viewer.setVisible(true);
|
||||
System.out.println("Start up took " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
|
@ -350,33 +391,10 @@ public class BytecodeViewer {
|
|||
openFiles(new File[] { new File(s) }, true);
|
||||
}
|
||||
|
||||
if(!pingback) {
|
||||
/*if(!pingback) {
|
||||
pingback = true;
|
||||
pingback();
|
||||
}
|
||||
}
|
||||
|
||||
public static void pingback() {
|
||||
JOptionPane pane = new JOptionPane(
|
||||
"Would you like to 'pingback' to https://bytecodeviewer.com to be counted in the global users for BCV?");
|
||||
Object[] options = new String[] { "Yes", "No" };
|
||||
pane.setOptions(options);
|
||||
JDialog dialog = pane.createDialog(BytecodeViewer.viewer,
|
||||
"Bytecode Viewer - Optional Pingback");
|
||||
dialog.setVisible(true);
|
||||
Object obj = pane.getValue();
|
||||
int result = -1;
|
||||
for (int k = 0; k < options.length; k++)
|
||||
if (options[k].equals(obj))
|
||||
result = k;
|
||||
|
||||
if (result == 0) {
|
||||
try {
|
||||
new HTTPRequest(new URL("https://bytecodeviewer.com/add.php")).read();
|
||||
} catch (Exception e) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
|
||||
}
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -569,7 +587,8 @@ public class BytecodeViewer {
|
|||
public static void openFiles(final File[] files, boolean recentFiles) {
|
||||
if(recentFiles)
|
||||
for (File f : files)
|
||||
BytecodeViewer.addRecentFile(f);
|
||||
if(f.exists())
|
||||
BytecodeViewer.addRecentFile(f);
|
||||
|
||||
BytecodeViewer.viewer.setIcon(true);
|
||||
update = true;
|
||||
|
|
|
@ -77,6 +77,8 @@ public class JarUtils {
|
|||
* @throws IOException
|
||||
*/
|
||||
public static void loadResources(final File zipFile) throws IOException {
|
||||
if(!zipFile.exists())
|
||||
return; //just ignore
|
||||
ZipInputStream jis = new ZipInputStream(new FileInputStream(zipFile));
|
||||
ZipEntry entry;
|
||||
while ((entry = jis.getNextEntry()) != null) {
|
||||
|
|
|
@ -189,6 +189,7 @@ public class Settings {
|
|||
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.panel2JDGUI_E.isSelected()), false);
|
||||
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.panel3JDGUI_E.isSelected()), false);
|
||||
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.fontSpinner.getValue()), false);
|
||||
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.deleteForiegnLibraries), false);
|
||||
} catch(Exception e) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
|
||||
}
|
||||
|
@ -376,6 +377,7 @@ public class Settings {
|
|||
BytecodeViewer.viewer.panel2JDGUI_E.setSelected(Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 110, false)));
|
||||
BytecodeViewer.viewer.panel3JDGUI_E.setSelected(Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 111, false)));
|
||||
BytecodeViewer.viewer.fontSpinner.setValue(Integer.parseInt(DiskReader.loadString(BytecodeViewer.settingsName, 112, false)));
|
||||
BytecodeViewer.deleteForiegnLibraries = Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 113, false));
|
||||
} catch(Exception e) {
|
||||
//ignore because errors are expected, first start up and outdated settings.
|
||||
//e.printStackTrace();
|
||||
|
|
|
@ -34,7 +34,7 @@ public class AboutWindow extends JFrame {
|
|||
getContentPane().add(txtrBytecodeViewerIs, "name_140466526081695");txtrBytecodeViewerIs.setEnabled(false);
|
||||
this.setResizable(false);
|
||||
this.setLocationRelativeTo(null);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setVisible(boolean b) {
|
||||
|
|
|
@ -0,0 +1,281 @@
|
|||
package the.bytecode.club.bytecodeviewer.gui;
|
||||
|
||||
import javax.swing.JEditorPane;
|
||||
import javax.swing.JFrame;
|
||||
|
||||
import java.awt.Dimension;
|
||||
import java.awt.GridBagLayout;
|
||||
|
||||
import javax.swing.JProgressBar;
|
||||
|
||||
import java.awt.GridBagConstraints;
|
||||
|
||||
import javax.swing.JScrollPane;
|
||||
|
||||
import java.awt.Insets;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.lang.reflect.Method;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Enumeration;
|
||||
import java.util.List;
|
||||
import java.util.jar.JarEntry;
|
||||
import java.util.jar.JarFile;
|
||||
|
||||
import javax.swing.text.html.HTMLEditorKit;
|
||||
|
||||
import the.bytecode.club.bytecodeviewer.BytecodeViewer;
|
||||
import the.bytecode.club.bytecodeviewer.Resources;
|
||||
import me.konloch.kontainer.io.HTTPRequest;
|
||||
|
||||
/**
|
||||
* First boot, will automatically connect to BytecodeViewer for PingBack
|
||||
* It'll Check BCV version
|
||||
* then it'll download repos from the library
|
||||
* After it's completed and compared MD5 hashes, it simply dynamically loads all jars in /libs/ folder of BCV
|
||||
* While all of this is happening, it'll show the HOW-TO guide for BCV
|
||||
*
|
||||
* Download Failed? Corrupt Jar? Append -clean to BCV startup
|
||||
*
|
||||
* @author Konloch
|
||||
*
|
||||
*/
|
||||
|
||||
public class BootScreen extends JFrame {
|
||||
|
||||
private static final long serialVersionUID = -1098467609722393444L;
|
||||
|
||||
private static boolean FIRST_BOOT = false;
|
||||
|
||||
private JProgressBar progressBar = new JProgressBar();
|
||||
|
||||
public BootScreen() {
|
||||
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
||||
this.setIconImages(Resources.iconList);
|
||||
setSize(new Dimension(600, 800));
|
||||
setTitle("Bytecode Viewer Boot Screen - Starting Up");
|
||||
GridBagLayout gridBagLayout = new GridBagLayout();
|
||||
gridBagLayout.columnWidths = new int[]{0, 0};
|
||||
gridBagLayout.rowHeights = new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
gridBagLayout.columnWeights = new double[]{1.0, Double.MIN_VALUE};
|
||||
gridBagLayout.rowWeights = new double[]{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, Double.MIN_VALUE};
|
||||
getContentPane().setLayout(gridBagLayout);
|
||||
|
||||
JScrollPane scrollPane = new JScrollPane();
|
||||
GridBagConstraints gbc_scrollPane = new GridBagConstraints();
|
||||
gbc_scrollPane.gridheight = 24;
|
||||
gbc_scrollPane.insets = new Insets(0, 0, 5, 0);
|
||||
gbc_scrollPane.fill = GridBagConstraints.BOTH;
|
||||
gbc_scrollPane.gridx = 0;
|
||||
gbc_scrollPane.gridy = 0;
|
||||
getContentPane().add(scrollPane, gbc_scrollPane);
|
||||
|
||||
JEditorPane editorPane = new JEditorPane();
|
||||
editorPane.setEditorKit(new HTMLEditorKit());
|
||||
|
||||
editorPane.setText("http://www.icesoft.org/java/home.jsf");
|
||||
|
||||
scrollPane.setViewportView(editorPane);
|
||||
|
||||
GridBagConstraints gbc_progressBar = new GridBagConstraints();
|
||||
gbc_progressBar.fill = GridBagConstraints.HORIZONTAL;
|
||||
gbc_progressBar.gridx = 0;
|
||||
gbc_progressBar.gridy = 24;
|
||||
getContentPane().add(progressBar, gbc_progressBar);
|
||||
this.setLocationRelativeTo(null);
|
||||
}
|
||||
|
||||
public void DO_FIRST_BOOT(String args[]) {
|
||||
this.setVisible(true);
|
||||
if(FIRST_BOOT)
|
||||
return;
|
||||
|
||||
FIRST_BOOT = true;
|
||||
boolean foundAtleastOne = false;
|
||||
|
||||
|
||||
setTitle("Bytecode Viewer Boot Screen - Checking Libraries...");
|
||||
|
||||
try {
|
||||
int completedCheck = 0;
|
||||
List<String> urlList = new ArrayList<String>();
|
||||
HTTPRequest req = new HTTPRequest(new URL("https://github.com/Konloch/bytecode-viewer/tree/master/libs"));
|
||||
for(String s : req.read())
|
||||
if(s.contains("href=\"/Konloch/bytecode-viewer/blob/master/libs/")) {
|
||||
urlList.add("https://github.com"+s.split("<a href=")[1].split("\"")[1]);
|
||||
foundAtleastOne = true;
|
||||
}
|
||||
|
||||
if(!foundAtleastOne) {
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI("Bytecode Viewer ran into an issue, for some reason github is not returning what we're expecting. Please try rebooting, if this issue persists please contact @Konloch.");
|
||||
return;
|
||||
}
|
||||
|
||||
File libsDirectory = new File(BytecodeViewer.libsDirectory);
|
||||
if(args.length >= 1)
|
||||
if(args[0].equalsIgnoreCase("-clean"))
|
||||
libsDirectory.delete();
|
||||
|
||||
if(!libsDirectory.exists())
|
||||
libsDirectory.mkdir();
|
||||
|
||||
List<String> libsList = new ArrayList<String>();
|
||||
List<String> libsFileList = new ArrayList<String>();
|
||||
for(File f : libsDirectory.listFiles()) {
|
||||
libsList.add(f.getName());
|
||||
libsFileList.add(f.getAbsolutePath());
|
||||
}
|
||||
|
||||
progressBar.setMaximum(urlList.size());
|
||||
|
||||
for(String s : urlList) {
|
||||
String fileName = s.substring("https://github.com/Konloch/bytecode-viewer/blob/master/libs/".length(), s.length());
|
||||
if(!libsList.contains(fileName)) {
|
||||
setTitle("Bytecode Viewer Boot Screen - Downloading " + fileName);
|
||||
boolean passed = false;
|
||||
while(!passed) {
|
||||
InputStream is = null;
|
||||
FileOutputStream fos = null;
|
||||
try {
|
||||
is = new URL("https://github.com/Konloch/bytecode-viewer/raw/master/libs/" + fileName).openConnection().getInputStream();
|
||||
fos = new FileOutputStream(BytecodeViewer.libsDirectory + BytecodeViewer.fs + fileName);
|
||||
System.out.println("Downloading from "+s);
|
||||
byte[] buffer = new byte[8192];
|
||||
int len;
|
||||
int downloaded = 0;
|
||||
boolean flag = false;
|
||||
while ((len = is.read(buffer)) > 0) {
|
||||
fos.write(buffer, 0, len);
|
||||
fos.flush();
|
||||
downloaded += 8192;
|
||||
int mbs = downloaded / 1048576;
|
||||
if(mbs % 5 == 0 && mbs != 0) {
|
||||
if(!flag)
|
||||
System.out.println("Downloaded " + mbs + "MBs so far");
|
||||
flag = true;
|
||||
} else
|
||||
flag = false;
|
||||
}
|
||||
libsFileList.add(BytecodeViewer.libsDirectory + BytecodeViewer.fs + fileName);
|
||||
} finally {
|
||||
try {
|
||||
if (is != null) {
|
||||
is.close();
|
||||
}
|
||||
} finally {
|
||||
if (fos != null) {
|
||||
fos.flush();
|
||||
}
|
||||
if (fos != null) {
|
||||
fos.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("Download finished!");
|
||||
passed = true;
|
||||
}
|
||||
}
|
||||
completedCheck++;
|
||||
progressBar.setValue(completedCheck);
|
||||
}
|
||||
|
||||
if(BytecodeViewer.deleteForiegnLibraries) {
|
||||
setTitle("Bytecode Viewer Boot Screen - Checking & Deleting Foriegn/Outdated Libraries...");
|
||||
for(String s : libsFileList) {
|
||||
File f = new File(s);
|
||||
boolean delete = true;
|
||||
for(String urlS : urlList) {
|
||||
String fileName = urlS.substring("https://github.com/Konloch/bytecode-viewer/blob/master/libs/".length(), urlS.length());
|
||||
if(fileName.equals(f.getName())) {
|
||||
delete = false;
|
||||
}
|
||||
}
|
||||
if(delete) {
|
||||
f.delete();
|
||||
System.out.println("Detected & Deleted Foriegn/Outdated Jar/File: " + f.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setTitle("Bytecode Viewer Boot Screen - Loading Libraries...");
|
||||
|
||||
for(String s : libsFileList ) {
|
||||
if(s.endsWith(".jar")) {
|
||||
File f = new File(s);
|
||||
setTitle("Bytecode Viewer Boot Screen - Loading Library " + f.getName());
|
||||
System.out.println(f.getName());
|
||||
|
||||
JarFile jarFile = new JarFile(s);
|
||||
Enumeration<JarEntry> e = jarFile.entries();
|
||||
ClassPathHack.addFile(f);
|
||||
while (e.hasMoreElements()) {
|
||||
JarEntry je = (JarEntry) e.nextElement();
|
||||
if(je.isDirectory() || !je.getName().endsWith(".class")){
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
String className = je.getName().substring(0,je.getName().length()-6);
|
||||
className = className.replace('/', '.');
|
||||
ClassLoader.getSystemClassLoader().loadClass(className);
|
||||
} catch(java.lang.VerifyError | java.lang.ExceptionInInitializerError | java.lang.IncompatibleClassChangeError | java.lang.NoClassDefFoundError | Exception e2) {
|
||||
//ignore
|
||||
}
|
||||
}
|
||||
jarFile.close();
|
||||
}
|
||||
}
|
||||
setTitle("Bytecode Viewer Boot Screen - Booting!");
|
||||
|
||||
} catch(Exception e) {
|
||||
StringWriter sw = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(sw));
|
||||
e.printStackTrace();
|
||||
new the.bytecode.club.bytecodeviewer.api.ExceptionUI("Bytecode Viewer ran into an error while booting, trying to force it anyways."+ BytecodeViewer.nl+ BytecodeViewer.nl+
|
||||
"Please ensure you have an active internet connection and restart BCV. If this presists please visit http://github.com/Konloch/Bytecode-Viewer or http://bytecodeviewer.com"+ BytecodeViewer.nl + BytecodeViewer.nl + sw.toString());
|
||||
}
|
||||
|
||||
setTitle("Bytecode Viewer Boot Screen - Finished");
|
||||
|
||||
BytecodeViewer.BOOT(args);
|
||||
|
||||
if(BytecodeViewer.pingback) {
|
||||
BytecodeViewer.PingBack.start();
|
||||
BytecodeViewer.pingback = true;
|
||||
}
|
||||
|
||||
if(BytecodeViewer.viewer.chckbxmntmNewCheckItem_12.isSelected())
|
||||
BytecodeViewer.versionChecker.start();
|
||||
|
||||
this.setVisible(false);
|
||||
}
|
||||
|
||||
public static class ClassPathHack {
|
||||
private static final Class<?>[] parameters = new Class[] {URL.class};
|
||||
|
||||
public static void addFile(File f) throws IOException {
|
||||
// f.toURL is deprecated
|
||||
addURL(f.toURI().toURL());
|
||||
}
|
||||
|
||||
protected static void addURL(URL u) throws IOException {
|
||||
URLClassLoader sysloader = (URLClassLoader) ClassLoader.getSystemClassLoader();
|
||||
Class<?> sysclass = URLClassLoader.class;
|
||||
|
||||
try {
|
||||
Method method = sysclass.getDeclaredMethod("addURL", parameters);
|
||||
method.setAccessible(true);
|
||||
method.invoke(sysloader, u);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -222,7 +222,6 @@ public class ClassViewer extends Viewer {
|
|||
private static final long serialVersionUID = -8650495368920680024L;
|
||||
ArrayList<MethodData> lnData = new ArrayList<MethodData>();
|
||||
String name;
|
||||
public ClassNode cn;
|
||||
JSplitPane sp;
|
||||
JSplitPane sp2;
|
||||
public JPanel panel1Search = new JPanel(new BorderLayout());
|
||||
|
|
|
@ -1293,13 +1293,6 @@ public class MainViewerGUI extends JFrame implements FileChangeNotifier {
|
|||
mnSettings.add(decodeAPKResources);
|
||||
|
||||
mnSettings.add(separator_36);
|
||||
|
||||
mnSettings.add(mnFontSize);
|
||||
fontSpinner.setModel(new SpinnerNumberModel(new Integer(12), new Integer(1), null, new Integer(1)));
|
||||
|
||||
mnFontSize.add(fontSpinner);
|
||||
|
||||
mnSettings.add(separator_13);
|
||||
mntmSetPythonDirectory.addActionListener(new ActionListener() {
|
||||
@Override
|
||||
public void actionPerformed(ActionEvent arg0) {
|
||||
|
@ -1758,6 +1751,13 @@ public class MainViewerGUI extends JFrame implements FileChangeNotifier {
|
|||
panelGroup3.add(panel3Smali);
|
||||
panelGroup3.add(panel3Bytecode);
|
||||
panelGroup3.add(panel3Hexcode);
|
||||
mnNewMenu_6.add(separator_13);
|
||||
fontSpinner.setPreferredSize(new Dimension(42, 20));
|
||||
fontSpinner.setSize(new Dimension(42, 20));
|
||||
fontSpinner.setModel(new SpinnerNumberModel(new Integer(12), new Integer(1), null, new Integer(1)));
|
||||
mnNewMenu_6.add(mnFontSize);
|
||||
|
||||
mnFontSize.add(fontSpinner);
|
||||
|
||||
|
||||
panelGroup1.setSelected(panel1Proc.getModel(), true);//my one true love
|
||||
|
|
|
@ -17,6 +17,7 @@ import the.bytecode.club.bytecodeviewer.api.Plugin;
|
|||
import the.bytecode.club.bytecodeviewer.plugin.PluginLaunchStrategy;
|
||||
|
||||
/**
|
||||
* @author Konloch
|
||||
* @author Bibl (don't ban me pls)
|
||||
* @created 1 Jun 2015
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue