some changes

This commit is contained in:
Konloch 2015-07-06 08:07:29 -06:00
parent 6cb3116b81
commit 98e7e333bb
78 changed files with 19872 additions and 46 deletions

Binary file not shown.

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Defines common decoding methods for byte array decoders.
*
* @version $Id$
*/
public interface BinaryDecoder extends Decoder {
/**
* Decodes a byte array and returns the results as a byte array.
*
* @param source
* A byte array which has been encoded with the appropriate encoder
* @return a byte array that contains decoded content
* @throws DecoderException
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
*/
byte[] decode(byte[] source) throws DecoderException;
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Defines common encoding methods for byte array encoders.
*
* @version $Id$
*/
public interface BinaryEncoder extends Encoder {
/**
* Encodes a byte array and return the encoded data as a byte array.
*
* @param source
* Data to be encoded
* @return A byte array containing the encoded data
* @throws EncoderException
* thrown if the Encoder encounters a failure condition during the encoding process.
*/
byte[] encode(byte[] source) throws EncoderException;
}

View File

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Character encoding names required of every implementation of the Java platform.
*
* From the Java documentation <a
* href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>:
* <p>
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
* release documentation for your implementation to see if any other encodings are supported. Consult the release
* documentation for your implementation to see if any other encodings are supported.</cite>
* </p>
*
* <ul>
* <li><code>US-ASCII</code><br>
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
* <li><code>ISO-8859-1</code><br>
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
* <li><code>UTF-8</code><br>
* Eight-bit Unicode Transformation Format.</li>
* <li><code>UTF-16BE</code><br>
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
* <li><code>UTF-16LE</code><br>
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
* <li><code>UTF-16</code><br>
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
* accepted on input, big-endian used on output.)</li>
* </ul>
*
* This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not
* foreseen that [codec] would be made to depend on [lang].
*
* <p>
* This class is immutable and thread-safe.
* </p>
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @since 1.4
* @version $Id$
*/
public class CharEncoding {
/**
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String ISO_8859_1 = "ISO-8859-1";
/**
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String US_ASCII = "US-ASCII";
/**
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
* (either order accepted on input, big-endian used on output)
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String UTF_16 = "UTF-16";
/**
* Sixteen-bit Unicode Transformation Format, big-endian byte order.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String UTF_16BE = "UTF-16BE";
/**
* Sixteen-bit Unicode Transformation Format, little-endian byte order.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String UTF_16LE = "UTF-16LE";
/**
* Eight-bit Unicode Transformation Format.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public static final String UTF_8 = "UTF-8";
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
import java.nio.charset.Charset;
/**
* Charsets required of every implementation of the Java platform.
*
* From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
* charsets</a>:
* <p>
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
* release documentation for your implementation to see if any other encodings are supported. Consult the release
* documentation for your implementation to see if any other encodings are supported. </cite>
* </p>
*
* <ul>
* <li><code>US-ASCII</code><br>
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
* <li><code>ISO-8859-1</code><br>
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
* <li><code>UTF-8</code><br>
* Eight-bit Unicode Transformation Format.</li>
* <li><code>UTF-16BE</code><br>
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
* <li><code>UTF-16LE</code><br>
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
* <li><code>UTF-16</code><br>
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
* accepted on input, big-endian used on output.)</li>
* </ul>
*
* This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is
* not foreseen that Commons Codec would be made to depend on Commons Lang.
*
* <p>
* This class is immutable and thread-safe.
* </p>
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @since 1.7
* @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
*/
public class Charsets {
//
// This class should only contain Charset instances for required encodings. This guarantees that it will load
// correctly and without delay on all Java platforms.
//
/**
* Returns the given Charset or the default Charset if the given Charset is null.
*
* @param charset
* A charset or null.
* @return the given Charset or the default Charset if the given Charset is null
*/
public static Charset toCharset(final Charset charset) {
return charset == null ? Charset.defaultCharset() : charset;
}
/**
* Returns a Charset for the named charset. If the name is null, return the default Charset.
*
* @param charset
* The name of the requested charset, may be null.
* @return a Charset for the named charset
* @throws java.nio.charset.UnsupportedCharsetException
* If the named charset is unavailable
*/
public static Charset toCharset(final String charset) {
return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
}
/**
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.ISO_8859_1} instead
*/
@Deprecated
public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
/**
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.US_ASCII} instead
*/
@Deprecated
public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
/**
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
* (either order accepted on input, big-endian used on output)
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16} instead
*/
@Deprecated
public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
/**
* Sixteen-bit Unicode Transformation Format, big-endian byte order.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16BE} instead
*/
@Deprecated
public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
/**
* Sixteen-bit Unicode Transformation Format, little-endian byte order.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_16LE} instead
*/
@Deprecated
public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
/**
* Eight-bit Unicode Transformation Format.
* <p>
* Every implementation of the Java platform is required to support this character encoding.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets.UTF_8}
*/
@Deprecated
public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Provides the highest level of abstraction for Decoders.
* <p>
* This is the sister interface of {@link Encoder}. All Decoders implement this common generic interface.
* Allows a user to pass a generic Object to any Decoder implementation in the codec package.
* <p>
* One of the two interfaces at the center of the codec package.
*
* @version $Id$
*/
public interface Decoder {
/**
* Decodes an "encoded" Object and returns a "decoded" Object. Note that the implementation of this interface will
* try to cast the Object parameter to the specific type expected by a particular Decoder implementation. If a
* {@link ClassCastException} occurs this decode method will throw a DecoderException.
*
* @param source
* the object to decode
* @return a 'decoded" object
* @throws DecoderException
* a decoder exception can be thrown for any number of reasons. Some good candidates are that the
* parameter passed to this method is null, a param cannot be cast to the appropriate type for a
* specific encoder.
*/
Object decode(Object source) throws DecoderException;
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Thrown when there is a failure condition during the decoding process. This exception is thrown when a {@link Decoder}
* encounters a decoding specific exception such as invalid data, or characters outside of the expected range.
*
* @version $Id$
*/
public class DecoderException extends Exception {
/**
* Declares the Serial Version Uid.
*
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a>
*/
private static final long serialVersionUID = 1L;
/**
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may
* subsequently be initialized by a call to {@link #initCause}.
*
* @since 1.4
*/
public DecoderException() {
super();
}
/**
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently
* be initialized by a call to {@link #initCause}.
*
* @param message
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
*/
public DecoderException(final String message) {
super(message);
}
/**
* Constructs a new exception with the specified detail message and cause.
* <p>
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this
* exception's detail message.
*
* @param message
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
* @param cause
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
* value is permitted, and indicates that the cause is nonexistent or unknown.
* @since 1.4
*/
public DecoderException(final String message, final Throwable cause) {
super(message, cause);
}
/**
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ?
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>).
* This constructor is useful for exceptions that are little more than wrappers for other throwables.
*
* @param cause
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
* value is permitted, and indicates that the cause is nonexistent or unknown.
* @since 1.4
*/
public DecoderException(final Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Provides the highest level of abstraction for Encoders.
* <p>
* This is the sister interface of {@link Decoder}. Every implementation of Encoder provides this
* common generic interface which allows a user to pass a generic Object to any Encoder implementation
* in the codec package.
*
* @version $Id$
*/
public interface Encoder {
/**
* Encodes an "Object" and returns the encoded content as an Object. The Objects here may just be
* <code>byte[]</code> or <code>String</code>s depending on the implementation used.
*
* @param source
* An object to encode
* @return An "encoded" Object
* @throws EncoderException
* An encoder exception is thrown if the encoder experiences a failure condition during the encoding
* process.
*/
Object encode(Object source) throws EncoderException;
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Thrown when there is a failure condition during the encoding process. This exception is thrown when an
* {@link Encoder} encounters a encoding specific exception such as invalid data, inability to calculate a checksum,
* characters outside of the expected range.
*
* @version $Id$
*/
public class EncoderException extends Exception {
/**
* Declares the Serial Version Uid.
*
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a>
*/
private static final long serialVersionUID = 1L;
/**
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may
* subsequently be initialized by a call to {@link #initCause}.
*
* @since 1.4
*/
public EncoderException() {
super();
}
/**
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently
* be initialized by a call to {@link #initCause}.
*
* @param message
* a useful message relating to the encoder specific error.
*/
public EncoderException(final String message) {
super(message);
}
/**
* Constructs a new exception with the specified detail message and cause.
*
* <p>
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this
* exception's detail message.
* </p>
*
* @param message
* The detail message which is saved for later retrieval by the {@link #getMessage()} method.
* @param cause
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
* value is permitted, and indicates that the cause is nonexistent or unknown.
* @since 1.4
*/
public EncoderException(final String message, final Throwable cause) {
super(message, cause);
}
/**
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ?
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>).
* This constructor is useful for exceptions that are little more than wrappers for other throwables.
*
* @param cause
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code>
* value is permitted, and indicates that the cause is nonexistent or unknown.
* @since 1.4
*/
public EncoderException(final Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Defines common decoding methods for String decoders.
*
* @version $Id$
*/
public interface StringDecoder extends Decoder {
/**
* Decodes a String and returns a String.
*
* @param source
* the String to decode
* @return the encoded String
* @throws DecoderException
* thrown if there is an error condition during the Encoding process.
*/
String decode(String source) throws DecoderException;
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
/**
* Defines common encoding methods for String encoders.
*
* @version $Id$
*/
public interface StringEncoder extends Encoder {
/**
* Encodes a String and returns a String.
*
* @param source
* the String to encode
* @return the encoded String
* @throws EncoderException
* thrown if there is an error condition during the encoding process.
*/
String encode(String source) throws EncoderException;
}

View File

@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec;
import java.util.Comparator;
/**
* Compares Strings using a {@link StringEncoder}. This comparator is used to sort Strings by an encoding scheme such as
* Soundex, Metaphone, etc. This class can come in handy if one need to sort Strings by an encoded form of a name such
* as Soundex.
*
* <p>This class is immutable and thread-safe.</p>
*
* @version $Id$
*/
@SuppressWarnings("rawtypes")
// TODO ought to implement Comparator<String> but that's not possible whilst maintaining binary compatibility.
public class StringEncoderComparator implements Comparator {
/**
* Internal encoder instance.
*/
private final StringEncoder stringEncoder;
/**
* Constructs a new instance.
*
* @deprecated Creating an instance without a {@link StringEncoder} leads to a {@link NullPointerException}. Will be
* removed in 2.0.
*/
@Deprecated
public StringEncoderComparator() {
this.stringEncoder = null; // Trying to use this will cause things to break
}
/**
* Constructs a new instance with the given algorithm.
*
* @param stringEncoder
* the StringEncoder used for comparisons.
*/
public StringEncoderComparator(final StringEncoder stringEncoder) {
this.stringEncoder = stringEncoder;
}
/**
* Compares two strings based not on the strings themselves, but on an encoding of the two strings using the
* StringEncoder this Comparator was created with.
*
* If an {@link EncoderException} is encountered, return <code>0</code>.
*
* @param o1
* the object to compare
* @param o2
* the object to compare to
* @return the Comparable.compareTo() return code or 0 if an encoding error was caught.
* @see Comparable
*/
@Override
public int compare(final Object o1, final Object o2) {
int compareCode = 0;
try {
@SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable
// However this was always the case.
final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1);
final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2);
compareCode = s1.compareTo(s2);
} catch (final EncoderException ee) {
compareCode = 0;
}
return compareCode;
}
}

View File

@ -0,0 +1,539 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
/**
* Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
*
* <p>
* The class can be parameterized in the following manner with various constructors:
* </p>
* <ul>
* <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
* <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of
* 8 in the encoded data.
* <li>Line separator: Default is CRLF ("\r\n")</li>
* </ul>
* <p>
* This class operates directly on byte streams, and not character streams.
* </p>
* <p>
* This class is thread-safe.
* </p>
*
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
*
* @since 1.5
* @version $Id$
*/
public class Base32 extends BaseNCodec {
/**
* BASE32 characters are 5 bits in length.
* They are formed by taking a block of five octets to form a 40-bit string,
* which is converted into eight BASE32 characters.
*/
private static final int BITS_PER_ENCODED_BYTE = 5;
private static final int BYTES_PER_ENCODED_BLOCK = 8;
private static final int BYTES_PER_UNENCODED_BLOCK = 5;
/**
* Chunk separator per RFC 2045 section 2.1.
*
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
*/
private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
* in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32
* alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
};
/**
* This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
* equivalents as specified in Table 3 of RFC 4648.
*/
private static final byte[] ENCODE_TABLE = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'2', '3', '4', '5', '6', '7',
};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as
* specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the
* Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] HEX_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
25, 26, 27, 28, 29, 30, 31, // 50-57 P-V
};
/**
* This array is a lookup table that translates 5-bit positive integer index values into their
* "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648.
*/
private static final byte[] HEX_ENCODE_TABLE = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
};
/** Mask used to extract 5 bits, used when encoding Base32 bytes */
private static final int MASK_5BITS = 0x1f;
// The static final fields above are used for the original static byte[] methods on Base32.
// The private member fields below are used with the new streaming approach, which requires
// some state be preserved between calls of encode() and decode().
/**
* Place holder for the bytes we're dealing with for our based logic.
* Bitwise operations store and extract the encoding or decoding from this variable.
*/
/**
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
* <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code>
*/
private final int decodeSize;
/**
* Decode table to use.
*/
private final byte[] decodeTable;
/**
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
* <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code>
*/
private final int encodeSize;
/**
* Encode table to use.
*/
private final byte[] encodeTable;
/**
* Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
*/
private final byte[] lineSeparator;
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length is 0 (no chunking).
* </p>
*
*/
public Base32() {
this(false);
}
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length is 0 (no chunking).
* </p>
* @param pad byte used as padding byte.
*/
public Base32(final byte pad) {
this(false, pad);
}
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length is 0 (no chunking).
* </p>
* @param useHex if {@code true} then use Base32 Hex alphabet
*/
public Base32(final boolean useHex) {
this(0, null, useHex, PAD_DEFAULT);
}
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length is 0 (no chunking).
* </p>
* @param useHex if {@code true} then use Base32 Hex alphabet
* @param pad byte used as padding byte.
*/
public Base32(final boolean useHex, final byte pad) {
this(0, null, useHex, pad);
}
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length is given in the constructor, the line separator is CRLF.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
*/
public Base32(final int lineLength) {
this(lineLength, CHUNK_SEPARATOR);
}
/**
* Creates a Base32 codec used for decoding and encoding.
* <p>
* When encoding the line length and line separator are given in the constructor.
* </p>
* <p>
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @param lineSeparator
* Each line of encoded data will end with this sequence of bytes.
* @throws IllegalArgumentException
* The provided lineSeparator included some Base32 characters. That's not going to work!
*/
public Base32(final int lineLength, final byte[] lineSeparator) {
this(lineLength, lineSeparator, false, PAD_DEFAULT);
}
/**
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
* <p>
* When encoding the line length and line separator are given in the constructor.
* </p>
* <p>
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @param lineSeparator
* Each line of encoded data will end with this sequence of bytes.
* @param useHex
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
* @throws IllegalArgumentException
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
* lineLength &gt; 0 and lineSeparator is null.
*/
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
}
/**
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
* <p>
* When encoding the line length and line separator are given in the constructor.
* </p>
* <p>
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @param lineSeparator
* Each line of encoded data will end with this sequence of bytes.
* @param useHex
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
* @param pad byte used as padding byte.
* @throws IllegalArgumentException
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
* lineLength &gt; 0 and lineSeparator is null.
*/
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte pad) {
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength,
lineSeparator == null ? 0 : lineSeparator.length, pad);
if (useHex) {
this.encodeTable = HEX_ENCODE_TABLE;
this.decodeTable = HEX_DECODE_TABLE;
} else {
this.encodeTable = ENCODE_TABLE;
this.decodeTable = DECODE_TABLE;
}
if (lineLength > 0) {
if (lineSeparator == null) {
throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
}
// Must be done after initializing the tables
if (containsAlphabetOrPad(lineSeparator)) {
final String sep = StringUtils.newStringUtf8(lineSeparator);
throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
}
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
this.lineSeparator = new byte[lineSeparator.length];
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
} else {
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
this.lineSeparator = null;
}
this.decodeSize = this.encodeSize - 1;
if (isInAlphabet(pad) || isWhiteSpace(pad)) {
throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
}
}
/**
* <p>
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
* call is not necessary when decoding, but it doesn't hurt, either.
* </p>
* <p>
* Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
* garbage-out philosophy: it will not check the provided data for validity.
* </p>
*
* @param in
* byte[] array of ascii data to Base32 decode.
* @param inPos
* Position to start reading data from.
* @param inAvail
* Amount of bytes available from input for encoding.
* @param context the context to be used
*
* Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position
*/
@Override
void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
// package protected for access from I/O streams
if (context.eof) {
return;
}
if (inAvail < 0) {
context.eof = true;
}
for (int i = 0; i < inAvail; i++) {
final byte b = in[inPos++];
if (b == pad) {
// We're done.
context.eof = true;
break;
} else {
final byte[] buffer = ensureBufferSize(decodeSize, context);
if (b >= 0 && b < this.decodeTable.length) {
final int result = this.decodeTable[b];
if (result >= 0) {
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
// collect decoded bytes
context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
if (context.modulus == 0) { // we can output the 5 bytes
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
}
}
}
}
}
// Two forms of EOF as far as Base32 decoder is concerned: actual
// EOF (-1) and first time '=' character is encountered in stream.
// This approach makes the '=' padding characters completely optional.
if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
final byte[] buffer = ensureBufferSize(decodeSize, context);
// we ignore partial bytes, i.e. only multiples of 8 count
switch (context.modulus) {
case 2 : // 10 bits, drop 2 and output one byte
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS);
break;
case 3 : // 15 bits, drop 7 and output 1 byte
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS);
break;
case 4 : // 20 bits = 2*8 + 4
context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 5 : // 25bits = 3*8 + 1
context.lbitWorkArea = context.lbitWorkArea >> 1;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 6 : // 30bits = 3*8 + 6
context.lbitWorkArea = context.lbitWorkArea >> 6;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 7 : // 35 = 4*8 +3
context.lbitWorkArea = context.lbitWorkArea >> 3;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
default:
// modulus can be 0-7, and we excluded 0,1 already
throw new IllegalStateException("Impossible modulus "+context.modulus);
}
}
}
/**
* <p>
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last
* remaining bytes (if not multiple of 5).
* </p>
*
* @param in
* byte[] array of binary data to Base32 encode.
* @param inPos
* Position to start reading data from.
* @param inAvail
* Amount of bytes available from input for encoding.
* @param context the context to be used
*/
@Override
void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
// package protected for access from I/O streams
if (context.eof) {
return;
}
// inAvail < 0 is how we're informed of EOF in the underlying data we're
// encoding.
if (inAvail < 0) {
context.eof = true;
if (0 == context.modulus && lineLength == 0) {
return; // no leftovers to process and not using chunking
}
final byte[] buffer = ensureBufferSize(encodeSize, context);
final int savedPos = context.pos;
switch (context.modulus) { // % 5
case 0 :
break;
case 1 : // Only 1 octet; take top 5 bits then remainder
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
break;
case 2 : // 2 octets = 16 bits to use
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
break;
case 3 : // 3 octets = 24 bits to use
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
break;
case 4 : // 4 octets = 32 bits to use
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
buffer[context.pos++] = pad;
break;
default:
throw new IllegalStateException("Impossible modulus "+context.modulus);
}
context.currentLinePos += context.pos - savedPos; // keep track of current line position
// if currentPos == 0 we are at the start of a line, so don't add CRLF
if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
context.pos += lineSeparator.length;
}
} else {
for (int i = 0; i < inAvail; i++) {
final byte[] buffer = ensureBufferSize(encodeSize, context);
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
int b = in[inPos++];
if (b < 0) {
b += 256;
}
context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
if (0 == context.modulus) { // we have enough bytes to create our output
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS];
buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS];
context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
if (lineLength > 0 && lineLength <= context.currentLinePos) {
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
context.pos += lineSeparator.length;
context.currentLinePos = 0;
}
}
}
}
}
/**
* Returns whether or not the {@code octet} is in the Base32 alphabet.
*
* @param octet
* The value to test
* @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise.
*/
@Override
public boolean isInAlphabet(final byte octet) {
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
}
}

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.InputStream;
/**
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
* constructor.
* <p>
* The default behaviour of the Base32InputStream is to DECODE, whereas the default behaviour of the Base32OutputStream
* is to ENCODE, but this behaviour can be overridden by using a different constructor.
* </p>
* <p>
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
* </p>
*
* @version $Id$
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
* @since 1.5
*/
public class Base32InputStream extends BaseNCodecInputStream {
/**
* Creates a Base32InputStream such that all data read is Base32-decoded from the original provided InputStream.
*
* @param in
* InputStream to wrap.
*/
public Base32InputStream(final InputStream in) {
this(in, false);
}
/**
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
* provided InputStream.
*
* @param in
* InputStream to wrap.
* @param doEncode
* true if we should encode all data read from us, false if we should decode.
*/
public Base32InputStream(final InputStream in, final boolean doEncode) {
super(in, new Base32(false), doEncode);
}
/**
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
* provided InputStream.
*
* @param in
* InputStream to wrap.
* @param doEncode
* true if we should encode all data read from us, false if we should decode.
* @param lineLength
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
* nearest multiple of 4). If lineLength &lt;= 0, the encoded data is not divided into lines. If doEncode
* is false, lineLength is ignored.
* @param lineSeparator
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
* If lineLength &lt;= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
*/
public Base32InputStream(final InputStream in, final boolean doEncode,
final int lineLength, final byte[] lineSeparator) {
super(in, new Base32(lineLength, lineSeparator), doEncode);
}
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.OutputStream;
/**
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
* constructor.
* <p>
* The default behaviour of the Base32OutputStream is to ENCODE, whereas the default behaviour of the Base32InputStream
* is to DECODE. But this behaviour can be overridden by using a different constructor.
* </p>
* <p>
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
* </p>
* <p>
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the
* final padding will be omitted and the resulting data will be incomplete/inconsistent.
* </p>
*
* @version $Id$
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
* @since 1.5
*/
public class Base32OutputStream extends BaseNCodecOutputStream {
/**
* Creates a Base32OutputStream such that all data written is Base32-encoded to the original provided OutputStream.
*
* @param out
* OutputStream to wrap.
*/
public Base32OutputStream(final OutputStream out) {
this(out, true);
}
/**
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
* original provided OutputStream.
*
* @param out
* OutputStream to wrap.
* @param doEncode
* true if we should encode all data written to us, false if we should decode.
*/
public Base32OutputStream(final OutputStream out, final boolean doEncode) {
super(out, new Base32(false), doEncode);
}
/**
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
* original provided OutputStream.
*
* @param out
* OutputStream to wrap.
* @param doEncode
* true if we should encode all data written to us, false if we should decode.
* @param lineLength
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
* nearest multiple of 4). If lineLength &lt;= 0, the encoded data is not divided into lines. If doEncode
* is false, lineLength is ignored.
* @param lineSeparator
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
* If lineLength &lt;= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
*/
public Base32OutputStream(final OutputStream out, final boolean doEncode,
final int lineLength, final byte[] lineSeparator) {
super(out, new Base32(lineLength, lineSeparator), doEncode);
}
}

View File

@ -0,0 +1,786 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.math.BigInteger;
/**
* Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
*
* <p>
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
* </p>
* <p>
* The class can be parameterized in the following manner with various constructors:
* </p>
* <ul>
* <li>URL-safe mode: Default off.</li>
* <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
* 4 in the encoded data.
* <li>Line separator: Default is CRLF ("\r\n")</li>
* </ul>
* <p>
* The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
* </p>
* <p>
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
* encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
* UTF-8, etc).
* </p>
* <p>
* This class is thread-safe.
* </p>
*
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
* @since 1.0
* @version $Id$
*/
public class Base64 extends BaseNCodec {
/**
* BASE32 characters are 6 bits in length.
* They are formed by taking a block of 3 octets to form a 24-bit string,
* which is converted into 4 BASE64 characters.
*/
private static final int BITS_PER_ENCODED_BYTE = 6;
private static final int BYTES_PER_UNENCODED_BLOCK = 3;
private static final int BYTES_PER_ENCODED_BLOCK = 4;
/**
* Chunk separator per RFC 2045 section 2.1.
*
* <p>
* N.B. The next major release may break compatibility and make this field private.
* </p>
*
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
*/
static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
/**
* This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
* equivalents as specified in Table 1 of RFC 2045.
*
* Thanks to "commons" project in ws.apache.org for this code.
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
*/
private static final byte[] STANDARD_ENCODE_TABLE = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
};
/**
* This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
* changed to - and _ to make the encoded Base64 results more URL-SAFE.
* This table is only used when the Base64's mode is set to URL-SAFE.
*/
private static final byte[] URL_SAFE_ENCODE_TABLE = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
* in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
* alphabet but fall within the bounds of the array are translated to -1.
*
* Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
* URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
*
* Thanks to "commons" project in ws.apache.org for this code.
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
*/
private static final byte[] DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
};
/**
* Base64 uses 6-bit fields.
*/
/** Mask used to extract 6 bits, used when encoding */
private static final int MASK_6BITS = 0x3f;
// The static final fields above are used for the original static byte[] methods on Base64.
// The private member fields below are used with the new streaming approach, which requires
// some state be preserved between calls of encode() and decode().
/**
* Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
* to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
* between the two modes.
*/
private final byte[] encodeTable;
// Only one decode table currently; keep for consistency with Base32 code
private final byte[] decodeTable = DECODE_TABLE;
/**
* Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
*/
private final byte[] lineSeparator;
/**
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
* <code>decodeSize = 3 + lineSeparator.length;</code>
*/
private final int decodeSize;
/**
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
* <code>encodeSize = 4 + lineSeparator.length;</code>
*/
private final int encodeSize;
/**
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
* <p>
* When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
* </p>
*
* <p>
* When decoding all variants are supported.
* </p>
*/
public Base64() {
this(0);
}
/**
* Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
* <p>
* When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
* </p>
*
* <p>
* When decoding all variants are supported.
* </p>
*
* @param urlSafe
* if <code>true</code>, URL-safe encoding is used. In most cases this should be set to
* <code>false</code>.
* @since 1.4
*/
public Base64(final boolean urlSafe) {
this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
}
/**
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
* <p>
* When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
* STANDARD_ENCODE_TABLE.
* </p>
* <p>
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
* </p>
* <p>
* When decoding all variants are supported.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @since 1.4
*/
public Base64(final int lineLength) {
this(lineLength, CHUNK_SEPARATOR);
}
/**
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
* <p>
* When encoding the line length and line separator are given in the constructor, and the encoding table is
* STANDARD_ENCODE_TABLE.
* </p>
* <p>
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
* </p>
* <p>
* When decoding all variants are supported.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @param lineSeparator
* Each line of encoded data will end with this sequence of bytes.
* @throws IllegalArgumentException
* Thrown when the provided lineSeparator included some base64 characters.
* @since 1.4
*/
public Base64(final int lineLength, final byte[] lineSeparator) {
this(lineLength, lineSeparator, false);
}
/**
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
* <p>
* When encoding the line length and line separator are given in the constructor, and the encoding table is
* STANDARD_ENCODE_TABLE.
* </p>
* <p>
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
* </p>
* <p>
* When decoding all variants are supported.
* </p>
*
* @param lineLength
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
* 4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
* decoding.
* @param lineSeparator
* Each line of encoded data will end with this sequence of bytes.
* @param urlSafe
* Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
* operations. Decoding seamlessly handles both modes.
* <b>Note: no padding is added when using the URL-safe alphabet.</b>
* @throws IllegalArgumentException
* The provided lineSeparator included some base64 characters. That's not going to work!
* @since 1.4
*/
public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
lineLength,
lineSeparator == null ? 0 : lineSeparator.length);
// TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
// @see test case Base64Test.testConstructors()
if (lineSeparator != null) {
if (containsAlphabetOrPad(lineSeparator)) {
final String sep = StringUtils.newStringUtf8(lineSeparator);
throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
}
if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
this.lineSeparator = new byte[lineSeparator.length];
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
} else {
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
this.lineSeparator = null;
}
} else {
this.encodeSize = BYTES_PER_ENCODED_BLOCK;
this.lineSeparator = null;
}
this.decodeSize = this.encodeSize - 1;
this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
}
/**
* Returns our current encode mode. True if we're URL-SAFE, false otherwise.
*
* @return true if we're in URL-SAFE mode, false otherwise.
* @since 1.4
*/
public boolean isUrlSafe() {
return this.encodeTable == URL_SAFE_ENCODE_TABLE;
}
/**
* <p>
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
* remaining bytes (if not multiple of 3).
* </p>
* <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
* <p>
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
* </p>
*
* @param in
* byte[] array of binary data to base64 encode.
* @param inPos
* Position to start reading data from.
* @param inAvail
* Amount of bytes available from input for encoding.
* @param context
* the context to be used
*/
@Override
void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
if (context.eof) {
return;
}
// inAvail < 0 is how we're informed of EOF in the underlying data we're
// encoding.
if (inAvail < 0) {
context.eof = true;
if (0 == context.modulus && lineLength == 0) {
return; // no leftovers to process and not using chunking
}
final byte[] buffer = ensureBufferSize(encodeSize, context);
final int savedPos = context.pos;
switch (context.modulus) { // 0-2
case 0 : // nothing to do here
break;
case 1 : // 8 bits = 6 + 2
// top 6 bits:
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS];
// remaining 2:
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS];
// URL-SAFE skips the padding to further reduce size.
if (encodeTable == STANDARD_ENCODE_TABLE) {
buffer[context.pos++] = pad;
buffer[context.pos++] = pad;
}
break;
case 2 : // 16 bits = 6 + 6 + 4
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
// URL-SAFE skips the padding to further reduce size.
if (encodeTable == STANDARD_ENCODE_TABLE) {
buffer[context.pos++] = pad;
}
break;
default:
throw new IllegalStateException("Impossible modulus "+context.modulus);
}
context.currentLinePos += context.pos - savedPos; // keep track of current line position
// if currentPos == 0 we are at the start of a line, so don't add CRLF
if (lineLength > 0 && context.currentLinePos > 0) {
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
context.pos += lineSeparator.length;
}
} else {
for (int i = 0; i < inAvail; i++) {
final byte[] buffer = ensureBufferSize(encodeSize, context);
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
int b = in[inPos++];
if (b < 0) {
b += 256;
}
context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
if (lineLength > 0 && lineLength <= context.currentLinePos) {
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
context.pos += lineSeparator.length;
context.currentLinePos = 0;
}
}
}
}
}
/**
* <p>
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
* call is not necessary when decoding, but it doesn't hurt, either.
* </p>
* <p>
* Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
* garbage-out philosophy: it will not check the provided data for validity.
* </p>
* <p>
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
* </p>
*
* @param in
* byte[] array of ascii data to base64 decode.
* @param inPos
* Position to start reading data from.
* @param inAvail
* Amount of bytes available from input for encoding.
* @param context
* the context to be used
*/
@Override
void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
if (context.eof) {
return;
}
if (inAvail < 0) {
context.eof = true;
}
for (int i = 0; i < inAvail; i++) {
final byte[] buffer = ensureBufferSize(decodeSize, context);
final byte b = in[inPos++];
if (b == pad) {
// We're done.
context.eof = true;
break;
} else {
if (b >= 0 && b < DECODE_TABLE.length) {
final int result = DECODE_TABLE[b];
if (result >= 0) {
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
if (context.modulus == 0) {
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
}
}
}
}
}
// Two forms of EOF as far as base64 decoder is concerned: actual
// EOF (-1) and first time '=' character is encountered in stream.
// This approach makes the '=' padding characters completely optional.
if (context.eof && context.modulus != 0) {
final byte[] buffer = ensureBufferSize(decodeSize, context);
// We have some spare bits remaining
// Output all whole multiples of 8 bits and ignore the rest
switch (context.modulus) {
// case 0 : // impossible, as excluded above
case 1 : // 6 bits - ignore entirely
// TODO not currently tested; perhaps it is impossible?
break;
case 2 : // 12 bits = 8 + 4
context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
break;
case 3 : // 18 bits = 8 + 8 + 2
context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
break;
default:
throw new IllegalStateException("Impossible modulus "+context.modulus);
}
}
}
/**
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
* method treats whitespace as valid.
*
* @param arrayOctet
* byte array to test
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
* <code>false</code>, otherwise
* @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
*/
@Deprecated
public static boolean isArrayByteBase64(final byte[] arrayOctet) {
return isBase64(arrayOctet);
}
/**
* Returns whether or not the <code>octet</code> is in the base 64 alphabet.
*
* @param octet
* The value to test
* @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
* @since 1.4
*/
public static boolean isBase64(final byte octet) {
return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
}
/**
* Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
* method treats whitespace as valid.
*
* @param base64
* String to test
* @return <code>true</code> if all characters in the String are valid characters in the Base64 alphabet or if
* the String is empty; <code>false</code>, otherwise
* @since 1.5
*/
public static boolean isBase64(final String base64) {
return isBase64(StringUtils.getBytesUtf8(base64));
}
/**
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
* method treats whitespace as valid.
*
* @param arrayOctet
* byte array to test
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
* <code>false</code>, otherwise
* @since 1.5
*/
public static boolean isBase64(final byte[] arrayOctet) {
for (int i = 0; i < arrayOctet.length; i++) {
if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
return false;
}
}
return true;
}
/**
* Encodes binary data using the base64 algorithm but does not chunk the output.
*
* @param binaryData
* binary data to encode
* @return byte[] containing Base64 characters in their UTF-8 representation.
*/
public static byte[] encodeBase64(final byte[] binaryData) {
return encodeBase64(binaryData, false);
}
/**
* Encodes binary data using the base64 algorithm but does not chunk the output.
*
* NOTE: We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to
* single-line non-chunking (commons-codec-1.5).
*
* @param binaryData
* binary data to encode
* @return String containing Base64 characters.
* @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
*/
public static String encodeBase64String(final byte[] binaryData) {
return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
}
/**
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
* url-safe variation emits - and _ instead of + and / characters.
* <b>Note: no padding is added.</b>
* @param binaryData
* binary data to encode
* @return byte[] containing Base64 characters in their UTF-8 representation.
* @since 1.4
*/
public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
return encodeBase64(binaryData, false, true);
}
/**
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
* url-safe variation emits - and _ instead of + and / characters.
* <b>Note: no padding is added.</b>
* @param binaryData
* binary data to encode
* @return String containing Base64 characters
* @since 1.4
*/
public static String encodeBase64URLSafeString(final byte[] binaryData) {
return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
}
/**
* Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
*
* @param binaryData
* binary data to encode
* @return Base64 characters chunked in 76 character blocks
*/
public static byte[] encodeBase64Chunked(final byte[] binaryData) {
return encodeBase64(binaryData, true);
}
/**
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
*
* @param binaryData
* Array containing binary data to encode.
* @param isChunked
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
* @return Base64-encoded data.
* @throws IllegalArgumentException
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
*/
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
return encodeBase64(binaryData, isChunked, false);
}
/**
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
*
* @param binaryData
* Array containing binary data to encode.
* @param isChunked
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
* @param urlSafe
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
* @return Base64-encoded data.
* @throws IllegalArgumentException
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
* @since 1.4
*/
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
}
/**
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
*
* @param binaryData
* Array containing binary data to encode.
* @param isChunked
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
* @param urlSafe
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
* @param maxResultSize
* The maximum result size to accept.
* @return Base64-encoded data.
* @throws IllegalArgumentException
* Thrown when the input array needs an output array bigger than maxResultSize
* @since 1.4
*/
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
final boolean urlSafe, final int maxResultSize) {
if (binaryData == null || binaryData.length == 0) {
return binaryData;
}
// Create this so can use the super-class method
// Also ensures that the same roundings are performed by the ctor and the code
final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
final long len = b64.getEncodedLength(binaryData);
if (len > maxResultSize) {
throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
len +
") than the specified maximum size of " +
maxResultSize);
}
return b64.encode(binaryData);
}
/**
* Decodes a Base64 String into octets.
* <p>
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
* </p>
*
* @param base64String
* String containing Base64 data
* @return Array containing decoded data.
* @since 1.4
*/
public static byte[] decodeBase64(final String base64String) {
return new Base64().decode(base64String);
}
/**
* Decodes Base64 data into octets.
* <p>
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
* </p>
*
* @param base64Data
* Byte array containing Base64 data
* @return Array containing decoded data.
*/
public static byte[] decodeBase64(final byte[] base64Data) {
return new Base64().decode(base64Data);
}
// Implementation of the Encoder Interface
// Implementation of integer encoding used for crypto
/**
* Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
*
* @param pArray
* a byte array containing base64 character data
* @return A BigInteger
* @since 1.4
*/
public static BigInteger decodeInteger(final byte[] pArray) {
return new BigInteger(1, decodeBase64(pArray));
}
/**
* Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
*
* @param bigInt
* a BigInteger
* @return A byte array containing base64 character data
* @throws NullPointerException
* if null is passed in
* @since 1.4
*/
public static byte[] encodeInteger(final BigInteger bigInt) {
if (bigInt == null) {
throw new NullPointerException("encodeInteger called with null parameter");
}
return encodeBase64(toIntegerBytes(bigInt), false);
}
/**
* Returns a byte-array representation of a <code>BigInteger</code> without sign bit.
*
* @param bigInt
* <code>BigInteger</code> to be converted
* @return a byte array representation of the BigInteger parameter
*/
static byte[] toIntegerBytes(final BigInteger bigInt) {
int bitlen = bigInt.bitLength();
// round bitlen
bitlen = ((bitlen + 7) >> 3) << 3;
final byte[] bigBytes = bigInt.toByteArray();
if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
return bigBytes;
}
// set up params for copying everything but sign bit
int startSrc = 0;
int len = bigBytes.length;
// if bigInt is exactly byte-aligned, just skip signbit in copy
if ((bigInt.bitLength() % 8) == 0) {
startSrc = 1;
len--;
}
final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
final byte[] resizedBytes = new byte[bitlen / 8];
System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
return resizedBytes;
}
/**
* Returns whether or not the <code>octet</code> is in the Base64 alphabet.
*
* @param octet
* The value to test
* @return <code>true</code> if the value is defined in the the Base64 alphabet <code>false</code> otherwise.
*/
@Override
protected boolean isInAlphabet(final byte octet) {
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.InputStream;
/**
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
* constructor.
* <p>
* The default behaviour of the Base64InputStream is to DECODE, whereas the default behaviour of the Base64OutputStream
* is to ENCODE, but this behaviour can be overridden by using a different constructor.
* </p>
* <p>
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
* </p>
* <p>
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
* </p>
*
* @version $Id$
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
* @since 1.4
*/
public class Base64InputStream extends BaseNCodecInputStream {
/**
* Creates a Base64InputStream such that all data read is Base64-decoded from the original provided InputStream.
*
* @param in
* InputStream to wrap.
*/
public Base64InputStream(final InputStream in) {
this(in, false);
}
/**
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
* provided InputStream.
*
* @param in
* InputStream to wrap.
* @param doEncode
* true if we should encode all data read from us, false if we should decode.
*/
public Base64InputStream(final InputStream in, final boolean doEncode) {
super(in, new Base64(false), doEncode);
}
/**
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
* provided InputStream.
*
* @param in
* InputStream to wrap.
* @param doEncode
* true if we should encode all data read from us, false if we should decode.
* @param lineLength
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
* nearest multiple of 4). If lineLength &lt;= 0, the encoded data is not divided into lines. If doEncode
* is false, lineLength is ignored.
* @param lineSeparator
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
* If lineLength &lt;= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
*/
public Base64InputStream(final InputStream in, final boolean doEncode,
final int lineLength, final byte[] lineSeparator) {
super(in, new Base64(lineLength, lineSeparator), doEncode);
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.OutputStream;
/**
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate
* constructor.
* <p>
* The default behaviour of the Base64OutputStream is to ENCODE, whereas the default behaviour of the Base64InputStream
* is to DECODE. But this behaviour can be overridden by using a different constructor.
* </p>
* <p>
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
* </p>
* <p>
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
* </p>
* <p>
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the
* final padding will be omitted and the resulting data will be incomplete/inconsistent.
* </p>
*
* @version $Id$
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
* @since 1.4
*/
public class Base64OutputStream extends BaseNCodecOutputStream {
/**
* Creates a Base64OutputStream such that all data written is Base64-encoded to the original provided OutputStream.
*
* @param out
* OutputStream to wrap.
*/
public Base64OutputStream(final OutputStream out) {
this(out, true);
}
/**
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
* original provided OutputStream.
*
* @param out
* OutputStream to wrap.
* @param doEncode
* true if we should encode all data written to us, false if we should decode.
*/
public Base64OutputStream(final OutputStream out, final boolean doEncode) {
super(out,new Base64(false), doEncode);
}
/**
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
* original provided OutputStream.
*
* @param out
* OutputStream to wrap.
* @param doEncode
* true if we should encode all data written to us, false if we should decode.
* @param lineLength
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
* nearest multiple of 4). If lineLength &lt;= 0, the encoded data is not divided into lines. If doEncode
* is false, lineLength is ignored.
* @param lineSeparator
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
* If lineLength &lt;= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.
*/
public Base64OutputStream(final OutputStream out, final boolean doEncode,
final int lineLength, final byte[] lineSeparator) {
super(out, new Base64(lineLength, lineSeparator), doEncode);
}
}

View File

@ -0,0 +1,525 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.util.Arrays;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
/**
* Abstract superclass for Base-N encoders and decoders.
*
* <p>
* This class is thread-safe.
* </p>
*
* @version $Id$
*/
public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
/**
* Holds thread context so classes can be thread-safe.
*
* This class is not itself thread-safe; each thread must allocate its own copy.
*
* @since 1.7
*/
static class Context {
/**
* Place holder for the bytes we're dealing with for our based logic.
* Bitwise operations store and extract the encoding or decoding from this variable.
*/
int ibitWorkArea;
/**
* Place holder for the bytes we're dealing with for our based logic.
* Bitwise operations store and extract the encoding or decoding from this variable.
*/
long lbitWorkArea;
/**
* Buffer for streaming.
*/
byte[] buffer;
/**
* Position where next character should be written in the buffer.
*/
int pos;
/**
* Position where next character should be read from the buffer.
*/
int readPos;
/**
* Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
* and must be thrown away.
*/
boolean eof;
/**
* Variable tracks how many characters have been written to the current line. Only used when encoding. We use
* it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
*/
int currentLinePos;
/**
* Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
* variable helps track that.
*/
int modulus;
Context() {
}
/**
* Returns a String useful for debugging (especially within a debugger.)
*
* @return a String useful for debugging.
*/
@SuppressWarnings("boxing") // OK to ignore boxing here
@Override
public String toString() {
return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
"modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
}
}
/**
* EOF
*
* @since 1.7
*/
static final int EOF = -1;
/**
* MIME chunk size per RFC 2045 section 6.8.
*
* <p>
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
* equal signs.
* </p>
*
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
*/
public static final int MIME_CHUNK_SIZE = 76;
/**
* PEM chunk size per RFC 1421 section 4.3.2.4.
*
* <p>
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
* equal signs.
* </p>
*
* @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
*/
public static final int PEM_CHUNK_SIZE = 64;
private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
/**
* Defines the default buffer size - currently {@value}
* - must be large enough for at least one encoded block+separator
*/
private static final int DEFAULT_BUFFER_SIZE = 8192;
/** Mask used to extract 8 bits, used in decoding bytes */
protected static final int MASK_8BITS = 0xff;
/**
* Byte used to pad output.
*/
protected static final byte PAD_DEFAULT = '='; // Allow static access to default
/**
* @deprecated Use {@link #pad}. Will be removed in 2.0.
*/
@Deprecated
protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
protected final byte pad; // instance variable just in case it needs to vary later
/** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
private final int unencodedBlockSize;
/** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
private final int encodedBlockSize;
/**
* Chunksize for encoding. Not used when decoding.
* A value of zero or less implies no chunking of the encoded data.
* Rounded down to nearest multiple of encodedBlockSize.
*/
protected final int lineLength;
/**
* Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
*/
private final int chunkSeparatorLength;
/**
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
* @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
* @param chunkSeparatorLength the chunk separator length, if relevant
*/
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
final int lineLength, final int chunkSeparatorLength) {
this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
}
/**
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
* @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
* @param chunkSeparatorLength the chunk separator length, if relevant
* @param pad byte used as padding byte.
*/
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
final int lineLength, final int chunkSeparatorLength, final byte pad) {
this.unencodedBlockSize = unencodedBlockSize;
this.encodedBlockSize = encodedBlockSize;
final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
this.chunkSeparatorLength = chunkSeparatorLength;
this.pad = pad;
}
/**
* Returns true if this object has buffered data for reading.
*
* @param context the context to be used
* @return true if there is data still available for reading.
*/
boolean hasData(final Context context) { // package protected for access from I/O streams
return context.buffer != null;
}
/**
* Returns the amount of buffered data available for reading.
*
* @param context the context to be used
* @return The amount of buffered data available for reading.
*/
int available(final Context context) { // package protected for access from I/O streams
return context.buffer != null ? context.pos - context.readPos : 0;
}
/**
* Get the default buffer size. Can be overridden.
*
* @return {@link #DEFAULT_BUFFER_SIZE}
*/
protected int getDefaultBufferSize() {
return DEFAULT_BUFFER_SIZE;
}
/**
* Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
* @param context the context to be used
*/
private byte[] resizeBuffer(final Context context) {
if (context.buffer == null) {
context.buffer = new byte[getDefaultBufferSize()];
context.pos = 0;
context.readPos = 0;
} else {
final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
context.buffer = b;
}
return context.buffer;
}
/**
* Ensure that the buffer has room for <code>size</code> bytes
*
* @param size minimum spare space required
* @param context the context to be used
* @return the buffer
*/
protected byte[] ensureBufferSize(final int size, final Context context){
if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
return resizeBuffer(context);
}
return context.buffer;
}
/**
* Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
* bytes. Returns how many bytes were actually extracted.
* <p>
* Package protected for access from I/O streams.
*
* @param b
* byte[] array to extract the buffered data into.
* @param bPos
* position in byte[] array to start extraction at.
* @param bAvail
* amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
* @param context
* the context to be used
* @return The number of bytes successfully extracted into the provided byte[] array.
*/
int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
if (context.buffer != null) {
final int len = Math.min(available(context), bAvail);
System.arraycopy(context.buffer, context.readPos, b, bPos, len);
context.readPos += len;
if (context.readPos >= context.pos) {
context.buffer = null; // so hasData() will return false, and this method can return -1
}
return len;
}
return context.eof ? EOF : 0;
}
/**
* Checks if a byte value is whitespace or not.
* Whitespace is taken to mean: space, tab, CR, LF
* @param byteToCheck
* the byte to check
* @return true if byte is whitespace, false otherwise
*/
protected static boolean isWhiteSpace(final byte byteToCheck) {
switch (byteToCheck) {
case ' ' :
case '\n' :
case '\r' :
case '\t' :
return true;
default :
return false;
}
}
/**
* Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
*
* @param obj
* Object to encode
* @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
* @throws EncoderException
* if the parameter supplied is not of type byte[]
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof byte[])) {
throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
}
return encode((byte[]) obj);
}
/**
* Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
* Uses UTF8 encoding.
*
* @param pArray
* a byte array containing binary data
* @return A String containing only Base-N character data
*/
public String encodeToString(final byte[] pArray) {
return StringUtils.newStringUtf8(encode(pArray));
}
/**
* Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
* Uses UTF8 encoding.
*
* @param pArray a byte array containing binary data
* @return String containing only character data in the appropriate alphabet.
*/
public String encodeAsString(final byte[] pArray){
return StringUtils.newStringUtf8(encode(pArray));
}
/**
* Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
* the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
*
* @param obj
* Object to decode
* @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
* supplied.
* @throws DecoderException
* if the parameter supplied is not of type byte[]
*/
@Override
public Object decode(final Object obj) throws DecoderException {
if (obj instanceof byte[]) {
return decode((byte[]) obj);
} else if (obj instanceof String) {
return decode((String) obj);
} else {
throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
}
}
/**
* Decodes a String containing characters in the Base-N alphabet.
*
* @param pArray
* A String containing Base-N character data
* @return a byte array containing binary data
*/
public byte[] decode(final String pArray) {
return decode(StringUtils.getBytesUtf8(pArray));
}
/**
* Decodes a byte[] containing characters in the Base-N alphabet.
*
* @param pArray
* A byte array containing Base-N character data
* @return a byte array containing binary data
*/
@Override
public byte[] decode(final byte[] pArray) {
if (pArray == null || pArray.length == 0) {
return pArray;
}
final Context context = new Context();
decode(pArray, 0, pArray.length, context);
decode(pArray, 0, EOF, context); // Notify decoder of EOF.
final byte[] result = new byte[context.pos];
readResults(result, 0, result.length, context);
return result;
}
/**
* Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
*
* @param pArray
* a byte array containing binary data
* @return A byte array containing only the basen alphabetic character data
*/
@Override
public byte[] encode(final byte[] pArray) {
if (pArray == null || pArray.length == 0) {
return pArray;
}
final Context context = new Context();
encode(pArray, 0, pArray.length, context);
encode(pArray, 0, EOF, context); // Notify encoder of EOF.
final byte[] buf = new byte[context.pos - context.readPos];
readResults(buf, 0, buf.length, context);
return buf;
}
// package protected for access from I/O streams
abstract void encode(byte[] pArray, int i, int length, Context context);
// package protected for access from I/O streams
abstract void decode(byte[] pArray, int i, int length, Context context);
/**
* Returns whether or not the <code>octet</code> is in the current alphabet.
* Does not allow whitespace or pad.
*
* @param value The value to test
*
* @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
*/
protected abstract boolean isInAlphabet(byte value);
/**
* Tests a given byte array to see if it contains only valid characters within the alphabet.
* The method optionally treats whitespace and pad as valid.
*
* @param arrayOctet byte array to test
* @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
*
* @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
* <code>false</code>, otherwise
*/
public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
for (int i = 0; i < arrayOctet.length; i++) {
if (!isInAlphabet(arrayOctet[i]) &&
(!allowWSPad || (arrayOctet[i] != pad) && !isWhiteSpace(arrayOctet[i]))) {
return false;
}
}
return true;
}
/**
* Tests a given String to see if it contains only valid characters within the alphabet.
* The method treats whitespace and PAD as valid.
*
* @param basen String to test
* @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
* the String is empty; <code>false</code>, otherwise
* @see #isInAlphabet(byte[], boolean)
*/
public boolean isInAlphabet(final String basen) {
return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
}
/**
* Tests a given byte array to see if it contains any characters within the alphabet or PAD.
*
* Intended for use in checking line-ending arrays
*
* @param arrayOctet
* byte array to test
* @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
*/
protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
if (arrayOctet == null) {
return false;
}
for (final byte element : arrayOctet) {
if (pad == element || isInAlphabet(element)) {
return true;
}
}
return false;
}
/**
* Calculates the amount of space needed to encode the supplied array.
*
* @param pArray byte[] array which will later be encoded
*
* @return amount of space needed to encoded the supplied array.
* Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
*/
public long getEncodedLength(final byte[] pArray) {
// Calculate non-chunked size - rounded up to allow for padding
// cast to long is needed to avoid possibility of overflow
long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize;
if (lineLength > 0) { // We're using chunking
// Round up to nearest multiple
len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
}
return len;
}
}

View File

@ -0,0 +1,211 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import static org.apache.commons.codec.binary.BaseNCodec.EOF;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.codec.binary.BaseNCodec.Context;
/**
* Abstract superclass for Base-N input streams.
*
* @since 1.5
* @version $Id$
*/
public class BaseNCodecInputStream extends FilterInputStream {
private final BaseNCodec baseNCodec;
private final boolean doEncode;
private final byte[] singleByte = new byte[1];
private final Context context = new Context();
protected BaseNCodecInputStream(final InputStream in, final BaseNCodec baseNCodec, final boolean doEncode) {
super(in);
this.doEncode = doEncode;
this.baseNCodec = baseNCodec;
}
/**
* {@inheritDoc}
*
* @return <code>0</code> if the {@link InputStream} has reached <code>EOF</code>,
* <code>1</code> otherwise
* @since 1.7
*/
@Override
public int available() throws IOException {
// Note: the logic is similar to the InflaterInputStream:
// as long as we have not reached EOF, indicate that there is more
// data available. As we do not know for sure how much data is left,
// just return 1 as a safe guess.
return context.eof ? 0 : 1;
}
/**
* Marks the current position in this input stream.
* <p>The {@link #mark} method of {@link BaseNCodecInputStream} does nothing.</p>
*
* @param readLimit the maximum limit of bytes that can be read before the mark position becomes invalid.
* @since 1.7
*/
@Override
public synchronized void mark(final int readLimit) {
}
/**
* {@inheritDoc}
*
* @return always returns <code>false</code>
*/
@Override
public boolean markSupported() {
return false; // not an easy job to support marks
}
/**
* Reads one <code>byte</code> from this input stream.
*
* @return the byte as an integer in the range 0 to 255. Returns -1 if EOF has been reached.
* @throws IOException
* if an I/O error occurs.
*/
@Override
public int read() throws IOException {
int r = read(singleByte, 0, 1);
while (r == 0) {
r = read(singleByte, 0, 1);
}
if (r > 0) {
final byte b = singleByte[0];
return b < 0 ? 256 + b : b;
}
return EOF;
}
/**
* Attempts to read <code>len</code> bytes into the specified <code>b</code> array starting at <code>offset</code>
* from this InputStream.
*
* @param b
* destination byte array
* @param offset
* where to start writing the bytes
* @param len
* maximum number of bytes to read
*
* @return number of bytes read
* @throws IOException
* if an I/O error occurs.
* @throws NullPointerException
* if the byte array parameter is null
* @throws IndexOutOfBoundsException
* if offset, len or buffer size are invalid
*/
@Override
public int read(final byte b[], final int offset, final int len) throws IOException {
if (b == null) {
throw new NullPointerException();
} else if (offset < 0 || len < 0) {
throw new IndexOutOfBoundsException();
} else if (offset > b.length || offset + len > b.length) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
} else {
int readLen = 0;
/*
Rationale for while-loop on (readLen == 0):
-----
Base32.readResults() usually returns > 0 or EOF (-1). In the
rare case where it returns 0, we just keep trying.
This is essentially an undocumented contract for InputStream
implementors that want their code to work properly with
java.io.InputStreamReader, since the latter hates it when
InputStream.read(byte[]) returns a zero. Unfortunately our
readResults() call must return 0 if a large amount of the data
being decoded was non-base32, so this while-loop enables proper
interop with InputStreamReader for that scenario.
-----
This is a fix for CODEC-101
*/
while (readLen == 0) {
if (!baseNCodec.hasData(context)) {
final byte[] buf = new byte[doEncode ? 4096 : 8192];
final int c = in.read(buf);
if (doEncode) {
baseNCodec.encode(buf, 0, c, context);
} else {
baseNCodec.decode(buf, 0, c, context);
}
}
readLen = baseNCodec.readResults(b, offset, len, context);
}
return readLen;
}
}
/**
* Repositions this stream to the position at the time the mark method was last called on this input stream.
* <p>
* The {@link #reset} method of {@link BaseNCodecInputStream} does nothing except throw an {@link IOException}.
*
* @throws IOException if this method is invoked
* @since 1.7
*/
@Override
public synchronized void reset() throws IOException {
throw new IOException("mark/reset not supported");
}
/**
* {@inheritDoc}
*
* @throws IllegalArgumentException if the provided skip length is negative
* @since 1.7
*/
@Override
public long skip(final long n) throws IOException {
if (n < 0) {
throw new IllegalArgumentException("Negative skip length: " + n);
}
// skip in chunks of 512 bytes
final byte[] b = new byte[512];
long todo = n;
while (todo > 0) {
int len = (int) Math.min(b.length, todo);
len = this.read(b, 0, len);
if (len == EOF) {
break;
}
todo -= len;
}
return n - todo;
}
}

View File

@ -0,0 +1,176 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import static org.apache.commons.codec.binary.BaseNCodec.EOF;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.codec.binary.BaseNCodec.Context;
/**
* Abstract superclass for Base-N output streams.
* <p>
* To write the EOF marker without closing the stream, call {@link #eof()} or use an <a
* href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href=
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html"
* >CloseShieldOutputStream</a>.
* </p>
*
* @since 1.5
* @version $Id$
*/
public class BaseNCodecOutputStream extends FilterOutputStream {
private final boolean doEncode;
private final BaseNCodec baseNCodec;
private final byte[] singleByte = new byte[1];
private final Context context = new Context();
// TODO should this be protected?
public BaseNCodecOutputStream(final OutputStream out, final BaseNCodec basedCodec, final boolean doEncode) {
super(out);
this.baseNCodec = basedCodec;
this.doEncode = doEncode;
}
/**
* Writes the specified <code>byte</code> to this output stream.
*
* @param i
* source byte
* @throws IOException
* if an I/O error occurs.
*/
@Override
public void write(final int i) throws IOException {
singleByte[0] = (byte) i;
write(singleByte, 0, 1);
}
/**
* Writes <code>len</code> bytes from the specified <code>b</code> array starting at <code>offset</code> to this
* output stream.
*
* @param b
* source byte array
* @param offset
* where to start reading the bytes
* @param len
* maximum number of bytes to write
*
* @throws IOException
* if an I/O error occurs.
* @throws NullPointerException
* if the byte array parameter is null
* @throws IndexOutOfBoundsException
* if offset, len or buffer size are invalid
*/
@Override
public void write(final byte b[], final int offset, final int len) throws IOException {
if (b == null) {
throw new NullPointerException();
} else if (offset < 0 || len < 0) {
throw new IndexOutOfBoundsException();
} else if (offset > b.length || offset + len > b.length) {
throw new IndexOutOfBoundsException();
} else if (len > 0) {
if (doEncode) {
baseNCodec.encode(b, offset, len, context);
} else {
baseNCodec.decode(b, offset, len, context);
}
flush(false);
}
}
/**
* Flushes this output stream and forces any buffered output bytes to be written out to the stream. If propagate is
* true, the wrapped stream will also be flushed.
*
* @param propagate
* boolean flag to indicate whether the wrapped OutputStream should also be flushed.
* @throws IOException
* if an I/O error occurs.
*/
private void flush(final boolean propagate) throws IOException {
final int avail = baseNCodec.available(context);
if (avail > 0) {
final byte[] buf = new byte[avail];
final int c = baseNCodec.readResults(buf, 0, avail, context);
if (c > 0) {
out.write(buf, 0, c);
}
}
if (propagate) {
out.flush();
}
}
/**
* Flushes this output stream and forces any buffered output bytes to be written out to the stream.
*
* @throws IOException
* if an I/O error occurs.
*/
@Override
public void flush() throws IOException {
flush(true);
}
/**
* Closes this output stream and releases any system resources associated with the stream.
* <p>
* To write the EOF marker without closing the stream, call {@link #eof()} or use an
* <a href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href=
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html"
* >CloseShieldOutputStream</a>.
* </p>
*
* @throws IOException
* if an I/O error occurs.
*/
@Override
public void close() throws IOException {
eof();
flush();
out.close();
}
/**
* Writes EOF.
*
* @throws IOException
* if an I/O error occurs.
* @since 1.11
*/
public void eof() throws IOException {
// Notify encoder of EOF (-1).
if (doEncode) {
baseNCodec.encode(singleByte, 0, EOF, context);
} else {
baseNCodec.decode(singleByte, 0, EOF, context);
}
}
}

View File

@ -0,0 +1,301 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
/**
* Converts between byte arrays and strings of "0"s and "1"s.
*
* <p>This class is immutable and thread-safe.</p>
*
* TODO: may want to add more bit vector functions like and/or/xor/nand
* TODO: also might be good to generate boolean[] from byte[] et cetera.
*
* @since 1.3
* @version $Id$
*/
public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
/*
* tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
* it.
*/
/** Empty char array. */
private static final char[] EMPTY_CHAR_ARRAY = new char[0];
/** Empty byte array. */
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
/** Mask for bit 0 of a byte. */
private static final int BIT_0 = 1;
/** Mask for bit 1 of a byte. */
private static final int BIT_1 = 0x02;
/** Mask for bit 2 of a byte. */
private static final int BIT_2 = 0x04;
/** Mask for bit 3 of a byte. */
private static final int BIT_3 = 0x08;
/** Mask for bit 4 of a byte. */
private static final int BIT_4 = 0x10;
/** Mask for bit 5 of a byte. */
private static final int BIT_5 = 0x20;
/** Mask for bit 6 of a byte. */
private static final int BIT_6 = 0x40;
/** Mask for bit 7 of a byte. */
private static final int BIT_7 = 0x80;
private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
/**
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
*
* @param raw
* the raw binary data to convert
* @return 0 and 1 ASCII character bytes one for each bit of the argument
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
*/
@Override
public byte[] encode(final byte[] raw) {
return toAsciiBytes(raw);
}
/**
* Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
*
* @param raw
* the raw binary data to convert
* @return 0 and 1 ASCII character chars one for each bit of the argument
* @throws EncoderException
* if the argument is not a byte[]
* @see org.apache.commons.codec.Encoder#encode(Object)
*/
@Override
public Object encode(final Object raw) throws EncoderException {
if (!(raw instanceof byte[])) {
throw new EncoderException("argument not a byte array");
}
return toAsciiChars((byte[]) raw);
}
/**
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
*
* @param ascii
* each byte represents an ASCII '0' or '1'
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
* @throws DecoderException
* if argument is not a byte[], char[] or String
* @see org.apache.commons.codec.Decoder#decode(Object)
*/
@Override
public Object decode(final Object ascii) throws DecoderException {
if (ascii == null) {
return EMPTY_BYTE_ARRAY;
}
if (ascii instanceof byte[]) {
return fromAscii((byte[]) ascii);
}
if (ascii instanceof char[]) {
return fromAscii((char[]) ascii);
}
if (ascii instanceof String) {
return fromAscii(((String) ascii).toCharArray());
}
throw new DecoderException("argument not a byte array");
}
/**
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
*
* @param ascii
* each byte represents an ASCII '0' or '1'
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
* @see org.apache.commons.codec.Decoder#decode(Object)
*/
@Override
public byte[] decode(final byte[] ascii) {
return fromAscii(ascii);
}
/**
* Decodes a String where each char of the String represents an ASCII '0' or '1'.
*
* @param ascii
* String of '0' and '1' characters
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
* @see org.apache.commons.codec.Decoder#decode(Object)
*/
public byte[] toByteArray(final String ascii) {
if (ascii == null) {
return EMPTY_BYTE_ARRAY;
}
return fromAscii(ascii.toCharArray());
}
// ------------------------------------------------------------------------
//
// static codec operations
//
// ------------------------------------------------------------------------
/**
* Decodes a char array where each char represents an ASCII '0' or '1'.
*
* @param ascii
* each char represents an ASCII '0' or '1'
* @return the raw encoded binary where each bit corresponds to a char in the char array argument
*/
public static byte[] fromAscii(final char[] ascii) {
if (ascii == null || ascii.length == 0) {
return EMPTY_BYTE_ARRAY;
}
// get length/8 times bytes with 3 bit shifts to the right of the length
final byte[] l_raw = new byte[ascii.length >> 3];
/*
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
* loop.
*/
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
for (int bits = 0; bits < BITS.length; ++bits) {
if (ascii[jj - bits] == '1') {
l_raw[ii] |= BITS[bits];
}
}
}
return l_raw;
}
/**
* Decodes a byte array where each byte represents an ASCII '0' or '1'.
*
* @param ascii
* each byte represents an ASCII '0' or '1'
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
*/
public static byte[] fromAscii(final byte[] ascii) {
if (isEmpty(ascii)) {
return EMPTY_BYTE_ARRAY;
}
// get length/8 times bytes with 3 bit shifts to the right of the length
final byte[] l_raw = new byte[ascii.length >> 3];
/*
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
* loop.
*/
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
for (int bits = 0; bits < BITS.length; ++bits) {
if (ascii[jj - bits] == '1') {
l_raw[ii] |= BITS[bits];
}
}
}
return l_raw;
}
/**
* Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
*
* @param array
* the source array
* @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
*/
private static boolean isEmpty(final byte[] array) {
return array == null || array.length == 0;
}
/**
* Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
* char.
*
* @param raw
* the raw binary data to convert
* @return an array of 0 and 1 character bytes for each bit of the argument
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
*/
public static byte[] toAsciiBytes(final byte[] raw) {
if (isEmpty(raw)) {
return EMPTY_BYTE_ARRAY;
}
// get 8 times the bytes with 3 bit shifts to the left of the length
final byte[] l_ascii = new byte[raw.length << 3];
/*
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
* loop.
*/
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
for (int bits = 0; bits < BITS.length; ++bits) {
if ((raw[ii] & BITS[bits]) == 0) {
l_ascii[jj - bits] = '0';
} else {
l_ascii[jj - bits] = '1';
}
}
}
return l_ascii;
}
/**
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
*
* @param raw
* the raw binary data to convert
* @return an array of 0 and 1 characters for each bit of the argument
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
*/
public static char[] toAsciiChars(final byte[] raw) {
if (isEmpty(raw)) {
return EMPTY_CHAR_ARRAY;
}
// get 8 times the bytes with 3 bit shifts to the left of the length
final char[] l_ascii = new char[raw.length << 3];
/*
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
* loop.
*/
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
for (int bits = 0; bits < BITS.length; ++bits) {
if ((raw[ii] & BITS[bits]) == 0) {
l_ascii[jj - bits] = '0';
} else {
l_ascii[jj - bits] = '1';
}
}
}
return l_ascii;
}
/**
* Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
*
* @param raw
* the raw binary data to convert
* @return a String of 0 and 1 characters representing the binary data
* @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
*/
public static String toAsciiString(final byte[] raw) {
return new String(toAsciiChars(raw));
}
}

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
/**
* <p>
* Operations on {@link CharSequence} that are <code>null</code> safe.
* </p>
* <p>
* Copied from Apache Commons Lang r1586295 on April 10, 2014 (day of 3.3.2 release).
* </p>
*
* @see CharSequence
* @since 1.10
*/
public class CharSequenceUtils {
/**
* Green implementation of regionMatches.
*
* @param cs
* the <code>CharSequence</code> to be processed
* @param ignoreCase
* whether or not to be case insensitive
* @param thisStart
* the index to start on the <code>cs</code> CharSequence
* @param substring
* the <code>CharSequence</code> to be looked for
* @param start
* the index to start on the <code>substring</code> CharSequence
* @param length
* character length of the region
* @return whether the region matched
*/
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
final CharSequence substring, final int start, final int length) {
if (cs instanceof String && substring instanceof String) {
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
}
int index1 = thisStart;
int index2 = start;
int tmpLen = length;
while (tmpLen-- > 0) {
char c1 = cs.charAt(index1++);
char c2 = substring.charAt(index2++);
if (c1 == c2) {
continue;
}
if (!ignoreCase) {
return false;
}
// The same check as in String.regionMatches():
if (Character.toUpperCase(c1) != Character.toUpperCase(c2) &&
Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,443 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.Charsets;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
/**
* Converts hexadecimal Strings. The charset used for certain operation can be set, the default is set in
* {@link #DEFAULT_CHARSET_NAME}
*
* This class is thread-safe.
*
* @since 1.1
* @version $Id$
*/
public class Hex implements BinaryEncoder, BinaryDecoder {
/**
* Default charset name is {@link Charsets#UTF_8}
*
* @since 1.7
*/
public static final Charset DEFAULT_CHARSET = Charsets.UTF_8;
/**
* Default charset name is {@link CharEncoding#UTF_8}
*
* @since 1.4
*/
public static final String DEFAULT_CHARSET_NAME = CharEncoding.UTF_8;
/**
* Used to build output as Hex
*/
private static final char[] DIGITS_LOWER =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
/**
* Used to build output as Hex
*/
private static final char[] DIGITS_UPPER =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
/**
* Converts an array of characters representing hexadecimal values into an array of bytes of those same values. The
* returned array will be half the length of the passed array, as it takes two characters to represent any given
* byte. An exception is thrown if the passed char array has an odd number of elements.
*
* @param data
* An array of characters containing hexadecimal digits
* @return A byte array containing binary data decoded from the supplied char array.
* @throws DecoderException
* Thrown if an odd number or illegal of characters is supplied
*/
public static byte[] decodeHex(final char[] data) throws DecoderException {
final int len = data.length;
if ((len & 0x01) != 0) {
throw new DecoderException("Odd number of characters.");
}
final byte[] out = new byte[len >> 1];
// two characters form the hex value.
for (int i = 0, j = 0; j < len; i++) {
int f = toDigit(data[j], j) << 4;
j++;
f = f | toDigit(data[j], j);
j++;
out[i] = (byte) (f & 0xFF);
}
return out;
}
/**
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte[] to convert to Hex characters
* @return A char[] containing hexadecimal characters
*/
public static char[] encodeHex(final byte[] data) {
return encodeHex(data, true);
}
/**
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte buffer to convert to Hex characters
* @return A char[] containing hexadecimal characters
* @since 1.11
*/
public static char[] encodeHex(final ByteBuffer data) {
return encodeHex(data, true);
}
/**
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte[] to convert to Hex characters
* @param toLowerCase
* <code>true</code> converts to lowercase, <code>false</code> to uppercase
* @return A char[] containing hexadecimal characters
* @since 1.4
*/
public static char[] encodeHex(final byte[] data, final boolean toLowerCase) {
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER);
}
/**
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte buffer to convert to Hex characters
* @param toLowerCase
* <code>true</code> converts to lowercase, <code>false</code> to uppercase
* @return A char[] containing hexadecimal characters
* @since 1.11
*/
public static char[] encodeHex(final ByteBuffer data, final boolean toLowerCase) {
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER);
}
/**
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte[] to convert to Hex characters
* @param toDigits
* the output alphabet
* @return A char[] containing hexadecimal characters
* @since 1.4
*/
protected static char[] encodeHex(final byte[] data, final char[] toDigits) {
final int l = data.length;
final char[] out = new char[l << 1];
// two characters form the hex value.
for (int i = 0, j = 0; i < l; i++) {
out[j++] = toDigits[(0xF0 & data[i]) >>> 4];
out[j++] = toDigits[0x0F & data[i]];
}
return out;
}
/**
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
* The returned array will be double the length of the passed array, as it takes two characters to represent any
* given byte.
*
* @param data
* a byte buffer to convert to Hex characters
* @param toDigits
* the output alphabet
* @return A char[] containing hexadecimal characters
* @since 1.11
*/
protected static char[] encodeHex(final ByteBuffer data, final char[] toDigits) {
return encodeHex(data.array(), toDigits);
}
/**
* Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned
* String will be double the length of the passed array, as it takes two characters to represent any given byte.
*
* @param data
* a byte[] to convert to Hex characters
* @return A String containing hexadecimal characters
* @since 1.4
*/
public static String encodeHexString(final byte[] data) {
return new String(encodeHex(data));
}
/**
* Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned
* String will be double the length of the passed array, as it takes two characters to represent any given byte.
*
* @param data
* a byte buffer to convert to Hex characters
* @return A String containing hexadecimal characters
* @since 1.11
*/
public static String encodeHexString(final ByteBuffer data) {
return new String(encodeHex(data));
}
/**
* Converts a hexadecimal character to an integer.
*
* @param ch
* A character to convert to an integer digit
* @param index
* The index of the character in the source
* @return An integer
* @throws DecoderException
* Thrown if ch is an illegal hex character
*/
protected static int toDigit(final char ch, final int index) throws DecoderException {
final int digit = Character.digit(ch, 16);
if (digit == -1) {
throw new DecoderException("Illegal hexadecimal character " + ch + " at index " + index);
}
return digit;
}
private final Charset charset;
/**
* Creates a new codec with the default charset name {@link #DEFAULT_CHARSET}
*/
public Hex() {
// use default encoding
this.charset = DEFAULT_CHARSET;
}
/**
* Creates a new codec with the given Charset.
*
* @param charset
* the charset.
* @since 1.7
*/
public Hex(final Charset charset) {
this.charset = charset;
}
/**
* Creates a new codec with the given charset name.
*
* @param charsetName
* the charset name.
* @throws java.nio.charset.UnsupportedCharsetException
* If the named charset is unavailable
* @since 1.4
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
*/
public Hex(final String charsetName) {
this(Charset.forName(charsetName));
}
/**
* Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values.
* The returned array will be half the length of the passed array, as it takes two characters to represent any given
* byte. An exception is thrown if the passed char array has an odd number of elements.
*
* @param array
* An array of character bytes containing hexadecimal digits
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
* @throws DecoderException
* Thrown if an odd number of characters is supplied to this function
* @see #decodeHex(char[])
*/
@Override
public byte[] decode(final byte[] array) throws DecoderException {
return decodeHex(new String(array, getCharset()).toCharArray());
}
/**
* Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values.
* The returned array will be half the length of the passed array, as it takes two characters to represent any given
* byte. An exception is thrown if the passed char array has an odd number of elements.
*
* @param buffer
* An array of character bytes containing hexadecimal digits
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
* @throws DecoderException
* Thrown if an odd number of characters is supplied to this function
* @see #decodeHex(char[])
* @since 1.11
*/
public byte[] decode(final ByteBuffer buffer) throws DecoderException {
return decodeHex(new String(buffer.array(), getCharset()).toCharArray());
}
/**
* Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those
* same values. The returned array will be half the length of the passed String or array, as it takes two characters
* to represent any given byte. An exception is thrown if the passed char array has an odd number of elements.
*
* @param object
* A String, ByteBuffer, byte[], or an array of character bytes containing hexadecimal digits
* @return A byte array containing binary data decoded from the supplied byte array (representing characters).
* @throws DecoderException
* Thrown if an odd number of characters is supplied to this function or the object is not a String or
* char[]
* @see #decodeHex(char[])
*/
@Override
public Object decode(final Object object) throws DecoderException {
if (object instanceof String) {
return decode(((String) object).toCharArray());
} else if (object instanceof byte[]) {
return decode((byte[]) object);
} else if (object instanceof ByteBuffer) {
return decode((ByteBuffer) object);
} else {
try {
return decodeHex((char[]) object);
} catch (final ClassCastException e) {
throw new DecoderException(e.getMessage(), e);
}
}
}
/**
* Converts an array of bytes into an array of bytes for the characters representing the hexadecimal values of each
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to
* represent any given byte.
* <p>
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
* {@link #getCharset()}.
* </p>
*
* @param array
* a byte[] to convert to Hex characters
* @return A byte[] containing the bytes of the hexadecimal characters
* @since 1.7 No longer throws IllegalStateException if the charsetName is invalid.
* @see #encodeHex(byte[])
*/
@Override
public byte[] encode(final byte[] array) {
return encodeHexString(array).getBytes(this.getCharset());
}
/**
* Converts byte buffer into an array of bytes for the characters representing the hexadecimal values of each
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to
* represent any given byte.
* <p>
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
* {@link #getCharset()}.
* </p>
*
* @param array
* a byte buffer to convert to Hex characters
* @return A byte[] containing the bytes of the hexadecimal characters
* @see #encodeHex(byte[])
* @since 1.11
*/
public byte[] encode(final ByteBuffer array) {
return encodeHexString(array).getBytes(this.getCharset());
}
/**
* Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each
* byte in order. The returned array will be double the length of the passed String or array, as it takes two
* characters to represent any given byte.
* <p>
* The conversion from hexadecimal characters to bytes to be encoded to performed with the charset named by
* {@link #getCharset()}.
* </p>
*
* @param object
* a String, ByteBuffer, or byte[] to convert to Hex characters
* @return A char[] containing hexadecimal characters
* @throws EncoderException
* Thrown if the given object is not a String or byte[]
* @see #encodeHex(byte[])
*/
@Override
public Object encode(final Object object) throws EncoderException {
byte[] byteArray;
if (object instanceof String) {
byteArray = ((String) object).getBytes(this.getCharset());
} else if (object instanceof ByteBuffer) {
byteArray = ((ByteBuffer) object).array();
} else {
try {
byteArray = (byte[]) object;
} catch (final ClassCastException e) {
throw new EncoderException(e.getMessage(), e);
}
}
return encodeHex(byteArray);
}
/**
* Gets the charset.
*
* @return the charset.
* @since 1.7
*/
public Charset getCharset() {
return this.charset;
}
/**
* Gets the charset name.
*
* @return the charset name.
* @since 1.4
*/
public String getCharsetName() {
return this.charset.name();
}
/**
* Returns a string representation of the object, which includes the charset name.
*
* @return a string representation of the object.
*/
@Override
public String toString() {
return super.toString() + "[charsetName=" + this.charset + "]";
}
}

View File

@ -0,0 +1,422 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.Charsets;
/**
* Converts String to and from bytes using the encodings required by the Java specification. These encodings are
* specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
* Standard charsets</a>.
*
* <p>This class is immutable and thread-safe.</p>
*
* @see CharEncoding
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @version $Id$
* @since 1.4
*/
public class StringUtils {
/**
* <p>
* Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
* </p>
*
* <p>
* <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
* The comparison is case sensitive.
* </p>
*
* <pre>
* StringUtils.equals(null, null) = true
* StringUtils.equals(null, "abc") = false
* StringUtils.equals("abc", null) = false
* StringUtils.equals("abc", "abc") = true
* StringUtils.equals("abc", "ABC") = false
* </pre>
*
* <p>
* Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
* </p>
*
* @see Object#equals(Object)
* @param cs1
* the first CharSequence, may be <code>null</code>
* @param cs2
* the second CharSequence, may be <code>null</code>
* @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
* @since 1.10
*/
public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
if (cs1 == cs2) {
return true;
}
if (cs1 == null || cs2 == null) {
return false;
}
if (cs1 instanceof String && cs2 instanceof String) {
return cs1.equals(cs2);
}
return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
}
/**
* Calls {@link String#getBytes(Charset)}
*
* @param string
* The string to encode (if null, return null).
* @param charset
* The {@link Charset} to encode the <code>String</code>
* @return the encoded bytes
*/
private static byte[] getBytes(final String string, final Charset charset) {
if (string == null) {
return null;
}
return string.getBytes(charset);
}
/**
* Calls {@link String#getBytes(Charset)}
*
* @param string
* The string to encode (if null, return null).
* @param charset
* The {@link Charset} to encode the <code>String</code>
* @return the encoded bytes
* @since 1.11
*/
private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
if (string == null) {
return null;
}
return ByteBuffer.wrap(string.getBytes(charset));
}
/**
* Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
* @since 1.11
*/
public static ByteBuffer getByteBufferUtf8(final String string) {
return getByteBuffer(string, Charsets.UTF_8);
}
/**
* Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
* byte array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesIso8859_1(final String string) {
return getBytes(string, Charsets.ISO_8859_1);
}
/**
* Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
* array.
* <p>
* This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
* </p>
*
* @param string
* the String to encode, may be <code>null</code>
* @param charsetName
* The name of a required {@link java.nio.charset.Charset}
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws IllegalStateException
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
* required charset name.
* @see CharEncoding
* @see String#getBytes(String)
*/
public static byte[] getBytesUnchecked(final String string, final String charsetName) {
if (string == null) {
return null;
}
try {
return string.getBytes(charsetName);
} catch (final UnsupportedEncodingException e) {
throw StringUtils.newIllegalStateException(charsetName, e);
}
}
/**
* Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUsAscii(final String string) {
return getBytes(string, Charsets.US_ASCII);
}
/**
* Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16(final String string) {
return getBytes(string, Charsets.UTF_16);
}
/**
* Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16Be(final String string) {
return getBytes(string, Charsets.UTF_16BE);
}
/**
* Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf16Le(final String string) {
return getBytes(string, Charsets.UTF_16LE);
}
/**
* Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
* array.
*
* @param string
* the String to encode, may be <code>null</code>
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @see #getBytesUnchecked(String, String)
*/
public static byte[] getBytesUtf8(final String string) {
return getBytes(string, Charsets.UTF_8);
}
private static IllegalStateException newIllegalStateException(final String charsetName,
final UnsupportedEncodingException e) {
return new IllegalStateException(charsetName + ": " + e);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
*
* @param bytes
* The bytes to be decoded into characters
* @param charset
* The {@link Charset} to encode the <code>String</code>
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
*/
private static String newString(final byte[] bytes, final Charset charset) {
return bytes == null ? null : new String(bytes, charset);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
* <p>
* This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
* </p>
*
* @param bytes
* The bytes to be decoded into characters, may be <code>null</code>
* @param charsetName
* The name of a required {@link java.nio.charset.Charset}
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws IllegalStateException
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
* required charset name.
* @see CharEncoding
* @see String#String(byte[], String)
*/
public static String newString(final byte[] bytes, final String charsetName) {
if (bytes == null) {
return null;
}
try {
return new String(bytes, charsetName);
} catch (final UnsupportedEncodingException e) {
throw StringUtils.newIllegalStateException(charsetName, e);
}
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
*
* @param bytes
* The bytes to be decoded into characters, may be <code>null</code>
* @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
* <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringIso8859_1(final byte[] bytes) {
return new String(bytes, Charsets.ISO_8859_1);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUsAscii(final byte[] bytes) {
return new String(bytes, Charsets.US_ASCII);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16(final byte[] bytes) {
return new String(bytes, Charsets.UTF_16);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16Be(final byte[] bytes) {
return new String(bytes, Charsets.UTF_16BE);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf16Le(final byte[] bytes) {
return new String(bytes, Charsets.UTF_16LE);
}
/**
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
*
* @param bytes
* The bytes to be decoded into characters
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
* or <code>null</code> if the input byte array was <code>null</code>.
* @throws NullPointerException
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
* required by the Java platform specification.
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
*/
public static String newStringUtf8(final byte[] bytes) {
return newString(bytes, Charsets.UTF_8);
}
}

View File

@ -0,0 +1,21 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
</body>
</html>

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.util.Random;
/**
* Base64 like method to convert binary bytes into ASCII chars.
*
* TODO: Can Base64 be reused?
*
* <p>
* This class is immutable and thread-safe.
* </p>
*
* @version $Id$
* @since 1.7
*/
class B64 {
/**
* Table with characters for Base64 transformation.
*/
static final String B64T = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
/**
* Base64 like conversion of bytes to ASCII chars.
*
* @param b2
* A byte from the result.
* @param b1
* A byte from the result.
* @param b0
* A byte from the result.
* @param outLen
* The number of expected output chars.
* @param buffer
* Where the output chars is appended to.
*/
static void b64from24bit(final byte b2, final byte b1, final byte b0, final int outLen,
final StringBuilder buffer) {
// The bit masking is necessary because the JVM byte type is signed!
int w = ((b2 << 16) & 0x00ffffff) | ((b1 << 8) & 0x00ffff) | (b0 & 0xff);
// It's effectively a "for" loop but kept to resemble the original C code.
int n = outLen;
while (n-- > 0) {
buffer.append(B64T.charAt(w & 0x3f));
w >>= 6;
}
}
/**
* Generates a string of random chars from the B64T set.
*
* @param num
* Number of chars to generate.
*/
static String getRandomSalt(final int num) {
final StringBuilder saltString = new StringBuilder();
for (int i = 1; i <= num; i++) {
saltString.append(B64T.charAt(new Random().nextInt(B64T.length())));
}
return saltString.toString();
}
}

View File

@ -0,0 +1,151 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import org.apache.commons.codec.Charsets;
/**
* GNU libc crypt(3) compatible hash method.
* <p>
* See {@link #crypt(String, String)} for further details.
* <p>
* This class is immutable and thread-safe.
*
* @version $Id$
* @since 1.7
*/
public class Crypt {
/**
* Encrypts a password in a crypt(3) compatible way.
* <p>
* A random salt and the default algorithm (currently SHA-512) are used. See {@link #crypt(String, String)} for
* details.
*
* @param keyBytes
* plaintext password
* @return hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String crypt(final byte[] keyBytes) {
return crypt(keyBytes, null);
}
/**
* Encrypts a password in a crypt(3) compatible way.
* <p>
* If no salt is provided, a random salt and the default algorithm (currently SHA-512) will be used. See
* {@link #crypt(String, String)} for details.
*
* @param keyBytes
* plaintext password
* @param salt
* salt value
* @return hash value
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String crypt(final byte[] keyBytes, final String salt) {
if (salt == null) {
return Sha2Crypt.sha512Crypt(keyBytes);
} else if (salt.startsWith(Sha2Crypt.SHA512_PREFIX)) {
return Sha2Crypt.sha512Crypt(keyBytes, salt);
} else if (salt.startsWith(Sha2Crypt.SHA256_PREFIX)) {
return Sha2Crypt.sha256Crypt(keyBytes, salt);
} else if (salt.startsWith(Md5Crypt.MD5_PREFIX)) {
return Md5Crypt.md5Crypt(keyBytes, salt);
} else {
return UnixCrypt.crypt(keyBytes, salt);
}
}
/**
* Calculates the digest using the strongest crypt(3) algorithm.
* <p>
* A random salt and the default algorithm (currently SHA-512) are used.
*
* @see #crypt(String, String)
* @param key
* plaintext password
* @return hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String crypt(final String key) {
return crypt(key, null);
}
/**
* Encrypts a password in a crypt(3) compatible way.
* <p>
* The exact algorithm depends on the format of the salt string:
* <ul>
* <li>SHA-512 salts start with {@code $6$} and are up to 16 chars long.
* <li>SHA-256 salts start with {@code $5$} and are up to 16 chars long
* <li>MD5 salts start with {@code $1$} and are up to 8 chars long
* <li>DES, the traditional UnixCrypt algorithm is used with only 2 chars
* <li>Only the first 8 chars of the passwords are used in the DES algorithm!
* </ul>
* The magic strings {@code "$apr1$"} and {@code "$2a$"} are not recognized by this method as its output should be
* identical with that of the libc implementation.
* <p>
* The rest of the salt string is drawn from the set {@code [a-zA-Z0-9./]} and is cut at the maximum length of if a
* {@code "$"} sign is encountered. It is therefore valid to enter a complete hash value as salt to e.g. verify a
* password with:
*
* <pre>
* storedPwd.equals(crypt(enteredPwd, storedPwd))
* </pre>
* <p>
* The resulting string starts with the marker string ({@code $6$}), continues with the salt value and ends with a
* {@code "$"} sign followed by the actual hash value. For DES the string only contains the salt and actual hash.
* It's total length is dependent on the algorithm used:
* <ul>
* <li>SHA-512: 106 chars
* <li>SHA-256: 63 chars
* <li>MD5: 34 chars
* <li>DES: 13 chars
* </ul>
* <p>
* Example:
*
* <pre>
* crypt("secret", "$1$xxxx") =&gt; "$1$xxxx$aMkevjfEIpa35Bh3G4bAc."
* crypt("secret", "xx") =&gt; "xxWAum7tHdIUw"
* </pre>
* <p>
* This method comes in a variation that accepts a byte[] array to support input strings that are not encoded in
* UTF-8 but e.g. in ISO-8859-1 where equal characters result in different byte values.
*
* @see "The man page of the libc crypt (3) function."
* @param key
* plaintext password as entered by the used
* @param salt
* salt value
* @return hash value, i.e. encrypted password including the salt string
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught. *
*/
public static String crypt(final String key, final String salt) {
return crypt(key.getBytes(Charsets.UTF_8), salt);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
/**
* Standard {@link HmacUtils} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name
* Documentation</cite>.
*
* <p>
* <strong>Note: Not all JCE implementations supports all algorithms in this enum.</strong>
* </p>
*
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html">Java Cryptography
* Architecture Standard Algorithm Name Documentation</a>
* @since 1.10
* @version $Id$
*/
public enum HmacAlgorithms {
/**
* The HmacMD5 Message Authentication Code (MAC) algorithm specified in RFC 2104 and RFC 1321.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*/
HMAC_MD5("HmacMD5"),
/**
* The HmacSHA1 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*/
HMAC_SHA_1("HmacSHA1"),
/**
* The HmacSHA256 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*/
HMAC_SHA_256("HmacSHA256"),
/**
* The HmacSHA384 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
* <p>
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
* </p>
*/
HMAC_SHA_384("HmacSHA384"),
/**
* The HmacSHA512 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
* <p>
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
* </p>
*/
HMAC_SHA_512("HmacSHA512");
private final String algorithm;
private HmacAlgorithms(final String algorithm) {
this.algorithm = algorithm;
}
/**
* The algorithm name
*
* @see <a
* href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider">Java
* Cryptography Architecture Sun Providers Documentation</a>
* @return The algorithm name ("HmacSHA512" for example)
*/
@Override
public String toString() {
return algorithm;
}
}

View File

@ -0,0 +1,794 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.io.IOException;
import java.io.InputStream;
import java.security.InvalidKeyException;
import java.security.Key;
import java.security.NoSuchAlgorithmException;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
/**
* Simplifies common {@link javax.crypto.Mac} tasks. This class is immutable and thread-safe.
*
*
* <p>
* <strong>Note: Not all JCE implementations supports all algorithms. If not supported, an IllegalArgumentException is
* thrown.</strong>
* </p>
*
* @since 1.10
* @version $Id$
*/
public final class HmacUtils {
private static final int STREAM_BUFFER_LENGTH = 1024;
/**
* Returns an initialized <code>Mac</code> for the HmacMD5 algorithm.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getHmacMd5(final byte[] key) {
return getInitializedMac(HmacAlgorithms.HMAC_MD5, key);
}
/**
* Returns an initialized <code>Mac</code> for the HmacSHA1 algorithm.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getHmacSha1(final byte[] key) {
return getInitializedMac(HmacAlgorithms.HMAC_SHA_1, key);
}
/**
* Returns an initialized <code>Mac</code> for the HmacSHA256 algorithm.
* <p>
* Every implementation of the Java platform is required to support this standard Mac algorithm.
* </p>
*
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getHmacSha256(final byte[] key) {
return getInitializedMac(HmacAlgorithms.HMAC_SHA_256, key);
}
/**
* Returns an initialized <code>Mac</code> for the HmacSHA384 algorithm.
* <p>
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
* </p>
*
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getHmacSha384(final byte[] key) {
return getInitializedMac(HmacAlgorithms.HMAC_SHA_384, key);
}
/**
* Returns an initialized <code>Mac</code> for the HmacSHA512 algorithm.
* <p>
* Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm.
* </p>
*
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getHmacSha512(final byte[] key) {
return getInitializedMac(HmacAlgorithms.HMAC_SHA_512, key);
}
/**
* Returns an initialized <code>Mac</code> for the given <code>algorithm</code>.
*
* @param algorithm
* the name of the algorithm requested. See <a href=
* "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" >Appendix
* A in the Java Cryptography Architecture Reference Guide</a> for information about standard algorithm
* names.
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getInitializedMac(final HmacAlgorithms algorithm, final byte[] key) {
return getInitializedMac(algorithm.toString(), key);
}
/**
* Returns an initialized <code>Mac</code> for the given <code>algorithm</code>.
*
* @param algorithm
* the name of the algorithm requested. See <a href=
* "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" >Appendix
* A in the Java Cryptography Architecture Reference Guide</a> for information about standard algorithm
* names.
* @param key
* They key for the keyed digest (must not be null)
* @return A Mac instance initialized with the given key.
* @see Mac#getInstance(String)
* @see Mac#init(Key)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static Mac getInitializedMac(final String algorithm, final byte[] key) {
if (key == null) {
throw new IllegalArgumentException("Null key");
}
try {
final SecretKeySpec keySpec = new SecretKeySpec(key, algorithm);
final Mac mac = Mac.getInstance(algorithm);
mac.init(keySpec);
return mac;
} catch (final NoSuchAlgorithmException e) {
throw new IllegalArgumentException(e);
} catch (final InvalidKeyException e) {
throw new IllegalArgumentException(e);
}
}
// hmacMd5
/**
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacMD5 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacMd5(final byte[] key, final byte[] valueToDigest) {
try {
return getHmacMd5(key).doFinal(valueToDigest);
} catch (final IllegalStateException e) {
// cannot happen
throw new IllegalArgumentException(e);
}
}
/**
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacMD5 MAC for the given key and value
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacMd5(final byte[] key, final InputStream valueToDigest) throws IOException {
return updateHmac(getHmacMd5(key), valueToDigest).doFinal();
}
/**
* Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacMD5 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacMd5(final String key, final String valueToDigest) {
return hmacMd5(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
}
/**
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacMd5Hex(final byte[] key, final byte[] valueToDigest) {
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
}
/**
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacMd5Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
}
/**
* Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacMD5 MAC for the given key and value as a hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacMd5Hex(final String key, final String valueToDigest) {
return Hex.encodeHexString(hmacMd5(key, valueToDigest));
}
// hmacSha1
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA1 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha1(final byte[] key, final byte[] valueToDigest) {
try {
return getHmacSha1(key).doFinal(valueToDigest);
} catch (final IllegalStateException e) {
// cannot happen
throw new IllegalArgumentException(e);
}
}
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA1 MAC for the given key and value
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha1(final byte[] key, final InputStream valueToDigest) throws IOException {
return updateHmac(getHmacSha1(key), valueToDigest).doFinal();
}
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA1 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha1(final String key, final String valueToDigest) {
return hmacSha1(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
}
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha1Hex(final byte[] key, final byte[] valueToDigest) {
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
}
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha1Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
}
/**
* Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA1 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha1Hex(final String key, final String valueToDigest) {
return Hex.encodeHexString(hmacSha1(key, valueToDigest));
}
// hmacSha256
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA256 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha256(final byte[] key, final byte[] valueToDigest) {
try {
return getHmacSha256(key).doFinal(valueToDigest);
} catch (final IllegalStateException e) {
// cannot happen
throw new IllegalArgumentException(e);
}
}
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA256 MAC for the given key and value
* @throws IOException
* If an I/O error occurs.
s * @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha256(final byte[] key, final InputStream valueToDigest) throws IOException {
return updateHmac(getHmacSha256(key), valueToDigest).doFinal();
}
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA256 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha256(final String key, final String valueToDigest) {
return hmacSha256(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
}
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha256Hex(final byte[] key, final byte[] valueToDigest) {
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
}
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha256Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
}
/**
* Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA256 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha256Hex(final String key, final String valueToDigest) {
return Hex.encodeHexString(hmacSha256(key, valueToDigest));
}
// hmacSha384
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA384 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha384(final byte[] key, final byte[] valueToDigest) {
try {
return getHmacSha384(key).doFinal(valueToDigest);
} catch (final IllegalStateException e) {
// cannot happen
throw new IllegalArgumentException(e);
}
}
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA384 MAC for the given key and value
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha384(final byte[] key, final InputStream valueToDigest) throws IOException {
return updateHmac(getHmacSha384(key), valueToDigest).doFinal();
}
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA384 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha384(final String key, final String valueToDigest) {
return hmacSha384(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
}
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha384Hex(final byte[] key, final byte[] valueToDigest) {
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
}
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha384Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
}
/**
* Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA384 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha384Hex(final String key, final String valueToDigest) {
return Hex.encodeHexString(hmacSha384(key, valueToDigest));
}
// hmacSha512
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA512 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha512(final byte[] key, final byte[] valueToDigest) {
try {
return getHmacSha512(key).doFinal(valueToDigest);
} catch (final IllegalStateException e) {
// cannot happen
throw new IllegalArgumentException(e);
}
}
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA512 MAC for the given key and value
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha512(final byte[] key, final InputStream valueToDigest) throws IOException {
return updateHmac(getHmacSha512(key), valueToDigest).doFinal();
}
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA512 MAC for the given key and value
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static byte[] hmacSha512(final String key, final String valueToDigest) {
return hmacSha512(StringUtils.getBytesUtf8(key), StringUtils.getBytesUtf8(valueToDigest));
}
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha512Hex(final byte[] key, final byte[] valueToDigest) {
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
}
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
* @throws IOException
* If an I/O error occurs.
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha512Hex(final byte[] key, final InputStream valueToDigest) throws IOException {
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
}
/**
* Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value.
*
* @param key
* They key for the keyed digest (must not be null)
* @param valueToDigest
* The value (data) which should to digest (maybe empty or null)
* @return HmacSHA512 MAC for the given key and value as hex string (lowercase)
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid.
*/
public static String hmacSha512Hex(final String key, final String valueToDigest) {
return Hex.encodeHexString(hmacSha512(key, valueToDigest));
}
// update
/**
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
*
* @param mac
* the initialized {@link Mac} to update
* @param valueToDigest
* the value to update the {@link Mac} with (maybe null or empty)
* @return the updated {@link Mac}
* @throws IllegalStateException
* if the Mac was not initialized
* @since 1.x
*/
public static Mac updateHmac(final Mac mac, final byte[] valueToDigest) {
mac.reset();
mac.update(valueToDigest);
return mac;
}
/**
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
*
* @param mac
* the initialized {@link Mac} to update
* @param valueToDigest
* the value to update the {@link Mac} with
* <p>
* The InputStream must not be null and will not be closed
* </p>
* @return the updated {@link Mac}
* @throws IOException
* If an I/O error occurs.
* @throws IllegalStateException
* If the Mac was not initialized
* @since 1.x
*/
public static Mac updateHmac(final Mac mac, final InputStream valueToDigest) throws IOException {
mac.reset();
final byte[] buffer = new byte[STREAM_BUFFER_LENGTH];
int read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH);
while (read > -1) {
mac.update(buffer, 0, read);
read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH);
}
return mac;
}
/**
* Updates the given {@link Mac}. This generates a digest for valueToDigest and the key the Mac was initialized
*
* @param mac
* the initialized {@link Mac} to update
* @param valueToDigest
* the value to update the {@link Mac} with (maybe null or empty)
* @return the updated {@link Mac}
* @throws IllegalStateException
* if the Mac was not initialized
* @since 1.x
*/
public static Mac updateHmac(final Mac mac, final String valueToDigest) {
mac.reset();
mac.update(StringUtils.getBytesUtf8(valueToDigest));
return mac;
}
}

View File

@ -0,0 +1,302 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.security.MessageDigest;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.Charsets;
/**
* The libc crypt() "$1$" and Apache "$apr1$" MD5-based hash algorithm.
* <p>
* Based on the public domain ("beer-ware") C implementation from Poul-Henning Kamp which was found at: <a
* href="http://www.freebsd.org/cgi/cvsweb.cgi/src/lib/libcrypt/crypt-md5.c?rev=1.1;content-type=text%2Fplain">
* crypt-md5.c @ freebsd.org</a><br>
* <p>
* Source:
*
* <pre>
* $FreeBSD: src/lib/libcrypt/crypt-md5.c,v 1.1 1999/01/21 13:50:09 brandon Exp $
* </pre>
* <p>
* Conversion to Kotlin and from there to Java in 2012.
* <p>
* The C style comments are from the original C code, the ones with "//" from the port.
* <p>
* This class is immutable and thread-safe.
*
* @version $Id$
* @since 1.7
*/
public class Md5Crypt {
/** The Identifier of the Apache variant. */
static final String APR1_PREFIX = "$apr1$";
/** The number of bytes of the final hash. */
private static final int BLOCKSIZE = 16;
/** The Identifier of this crypt() variant. */
static final String MD5_PREFIX = "$1$";
/** The number of rounds of the big loop. */
private static final int ROUNDS = 1000;
/**
* See {@link #apr1Crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @return the hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught. *
*/
public static String apr1Crypt(final byte[] keyBytes) {
return apr1Crypt(keyBytes, APR1_PREFIX + B64.getRandomSalt(8));
}
/**
* See {@link #apr1Crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @param salt An APR1 salt.
* @return the hash value
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String apr1Crypt(final byte[] keyBytes, String salt) {
// to make the md5Crypt regex happy
if (salt != null && !salt.startsWith(APR1_PREFIX)) {
salt = APR1_PREFIX + salt;
}
return Md5Crypt.md5Crypt(keyBytes, salt, APR1_PREFIX);
}
/**
* See {@link #apr1Crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @return the hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String apr1Crypt(final String keyBytes) {
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8));
}
/**
* Generates an Apache htpasswd compatible "$apr1$" MD5 based hash value.
* <p>
* The algorithm is identical to the crypt(3) "$1$" one but produces different outputs due to the different salt
* prefix.
*
* @param keyBytes
* plaintext string to hash.
* @param salt
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if
* null.
* @return the hash value
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String apr1Crypt(final String keyBytes, final String salt) {
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8), salt);
}
/**
* Generates a libc6 crypt() compatible "$1$" hash value.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @return the hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String md5Crypt(final byte[] keyBytes) {
return md5Crypt(keyBytes, MD5_PREFIX + B64.getRandomSalt(8));
}
/**
* Generates a libc crypt() compatible "$1$" MD5 based hash value.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @param salt
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if
* null.
* @return the hash value
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String md5Crypt(final byte[] keyBytes, final String salt) {
return md5Crypt(keyBytes, salt, MD5_PREFIX);
}
/**
* Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value.
* <p>
* See {@link Crypt#crypt(String, String)} or {@link #apr1Crypt(String, String)} for details.
*
* @param keyBytes
* plaintext string to hash.
* @param salt May be null.
* @param prefix salt prefix
* @return the hash value
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String md5Crypt(final byte[] keyBytes, final String salt, final String prefix) {
final int keyLen = keyBytes.length;
// Extract the real salt from the given string which can be a complete hash string.
String saltString;
if (salt == null) {
saltString = B64.getRandomSalt(8);
} else {
final Pattern p = Pattern.compile("^" + prefix.replace("$", "\\$") + "([\\.\\/a-zA-Z0-9]{1,8}).*");
final Matcher m = p.matcher(salt);
if (m == null || !m.find()) {
throw new IllegalArgumentException("Invalid salt value: " + salt);
}
saltString = m.group(1);
}
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8);
final MessageDigest ctx = DigestUtils.getMd5Digest();
/*
* The password first, since that is what is most unknown
*/
ctx.update(keyBytes);
/*
* Then our magic string
*/
ctx.update(prefix.getBytes(Charsets.UTF_8));
/*
* Then the raw salt
*/
ctx.update(saltBytes);
/*
* Then just as many characters of the MD5(pw,salt,pw)
*/
MessageDigest ctx1 = DigestUtils.getMd5Digest();
ctx1.update(keyBytes);
ctx1.update(saltBytes);
ctx1.update(keyBytes);
byte[] finalb = ctx1.digest();
int ii = keyLen;
while (ii > 0) {
ctx.update(finalb, 0, ii > 16 ? 16 : ii);
ii -= 16;
}
/*
* Don't leave anything around in vm they could use.
*/
Arrays.fill(finalb, (byte) 0);
/*
* Then something really weird...
*/
ii = keyLen;
final int j = 0;
while (ii > 0) {
if ((ii & 1) == 1) {
ctx.update(finalb[j]);
} else {
ctx.update(keyBytes[j]);
}
ii >>= 1;
}
/*
* Now make the output string
*/
final StringBuilder passwd = new StringBuilder(prefix + saltString + "$");
finalb = ctx.digest();
/*
* and now, just to make sure things don't run too fast On a 60 Mhz Pentium this takes 34 msec, so you would
* need 30 seconds to build a 1000 entry dictionary...
*/
for (int i = 0; i < ROUNDS; i++) {
ctx1 = DigestUtils.getMd5Digest();
if ((i & 1) != 0) {
ctx1.update(keyBytes);
} else {
ctx1.update(finalb, 0, BLOCKSIZE);
}
if (i % 3 != 0) {
ctx1.update(saltBytes);
}
if (i % 7 != 0) {
ctx1.update(keyBytes);
}
if ((i & 1) != 0) {
ctx1.update(finalb, 0, BLOCKSIZE);
} else {
ctx1.update(keyBytes);
}
finalb = ctx1.digest();
}
// The following was nearly identical to the Sha2Crypt code.
// Again, the buflen is not really needed.
// int buflen = MD5_PREFIX.length() - 1 + salt_string.length() + 1 + BLOCKSIZE + 1;
B64.b64from24bit(finalb[0], finalb[6], finalb[12], 4, passwd);
B64.b64from24bit(finalb[1], finalb[7], finalb[13], 4, passwd);
B64.b64from24bit(finalb[2], finalb[8], finalb[14], 4, passwd);
B64.b64from24bit(finalb[3], finalb[9], finalb[15], 4, passwd);
B64.b64from24bit(finalb[4], finalb[10], finalb[5], 4, passwd);
B64.b64from24bit((byte) 0, (byte) 0, finalb[11], 2, passwd);
/*
* Don't leave anything around in vm they could use.
*/
// Is there a better way to do this with the JVM?
ctx.reset();
ctx1.reset();
Arrays.fill(keyBytes, (byte) 0);
Arrays.fill(saltBytes, (byte) 0);
Arrays.fill(finalb, (byte) 0);
return passwd.toString();
}
}

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.security.MessageDigest;
/**
* Standard {@link MessageDigest} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name
* Documentation</cite>.
* <p>
* This class is immutable and thread-safe.
* </p>
* TODO 2.0 This should be an enum.
*
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html">Java Cryptography
* Architecture Standard Algorithm Name Documentation</a>
* @since 1.7
* @version $Id$
*/
public class MessageDigestAlgorithms {
private MessageDigestAlgorithms() {
// cannot be instantiated.
}
/**
* The MD2 message digest algorithm defined in RFC 1319.
*/
public static final String MD2 = "MD2";
/**
* The MD5 message digest algorithm defined in RFC 1321.
*/
public static final String MD5 = "MD5";
/**
* The SHA-1 hash algorithm defined in the FIPS PUB 180-2.
*/
public static final String SHA_1 = "SHA-1";
/**
* The SHA-224 hash algorithm defined in the FIPS PUB 180-4.
* <p>
* Java 8 only.
* </p>
*
* @since 1.11
*/
public static final String SHA_224 = "SHA-224";
/**
* The SHA-256 hash algorithm defined in the FIPS PUB 180-2.
*/
public static final String SHA_256 = "SHA-256";
/**
* The SHA-384 hash algorithm defined in the FIPS PUB 180-2.
*/
public static final String SHA_384 = "SHA-384";
/**
* The SHA-512 hash algorithm defined in the FIPS PUB 180-2.
*/
public static final String SHA_512 = "SHA-512";
}

View File

@ -0,0 +1,545 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.Charsets;
/**
* SHA2-based Unix crypt implementation.
* <p>
* Based on the C implementation released into the Public Domain by Ulrich Drepper &lt;drepper@redhat.com&gt;
* http://www.akkadia.org/drepper/SHA-crypt.txt
* <p>
* Conversion to Kotlin and from there to Java in 2012 by Christian Hammers &lt;ch@lathspell.de&gt; and likewise put
* into the Public Domain.
* <p>
* This class is immutable and thread-safe.
*
* @version $Id$
* @since 1.7
*/
public class Sha2Crypt {
/** Default number of rounds if not explicitly specified. */
private static final int ROUNDS_DEFAULT = 5000;
/** Maximum number of rounds. */
private static final int ROUNDS_MAX = 999999999;
/** Minimum number of rounds. */
private static final int ROUNDS_MIN = 1000;
/** Prefix for optional rounds specification. */
private static final String ROUNDS_PREFIX = "rounds=";
/** The number of bytes the final hash value will have (SHA-256 variant). */
private static final int SHA256_BLOCKSIZE = 32;
/** The prefixes that can be used to identify this crypt() variant (SHA-256). */
static final String SHA256_PREFIX = "$5$";
/** The number of bytes the final hash value will have (SHA-512 variant). */
private static final int SHA512_BLOCKSIZE = 64;
/** The prefixes that can be used to identify this crypt() variant (SHA-512). */
static final String SHA512_PREFIX = "$6$";
/** The pattern to match valid salt values. */
private static final Pattern SALT_PATTERN = Pattern
.compile("^\\$([56])\\$(rounds=(\\d+)\\$)?([\\.\\/a-zA-Z0-9]{1,16}).*");
/**
* Generates a libc crypt() compatible "$5$" hash value with random salt.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext to hash
* @return complete hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String sha256Crypt(final byte[] keyBytes) {
return sha256Crypt(keyBytes, null);
}
/**
* Generates a libc6 crypt() compatible "$5$" hash value.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext to hash
* @param salt
* real salt value without prefix or "rounds="
* @return complete hash value including salt
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String sha256Crypt(final byte[] keyBytes, String salt) {
if (salt == null) {
salt = SHA256_PREFIX + B64.getRandomSalt(8);
}
return sha2Crypt(keyBytes, salt, SHA256_PREFIX, SHA256_BLOCKSIZE, MessageDigestAlgorithms.SHA_256);
}
/**
* Generates a libc6 crypt() compatible "$5$" or "$6$" SHA2 based hash value.
* <p>
* This is a nearly line by line conversion of the original C function. The numbered comments are from the algorithm
* description, the short C-style ones from the original C code and the ones with "Remark" from me.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext to hash
* @param salt
* real salt value without prefix or "rounds="
* @param saltPrefix
* either $5$ or $6$
* @param blocksize
* a value that differs between $5$ and $6$
* @param algorithm
* {@link MessageDigest} algorithm identifier string
* @return complete hash value including prefix and salt
* @throws IllegalArgumentException
* if the given salt is <code>null</code> or does not match the allowed pattern
* @throws IllegalArgumentException
* when a {@link NoSuchAlgorithmException} is caught
* @see MessageDigestAlgorithms
*/
private static String sha2Crypt(final byte[] keyBytes, final String salt, final String saltPrefix,
final int blocksize, final String algorithm) {
final int keyLen = keyBytes.length;
// Extracts effective salt and the number of rounds from the given salt.
int rounds = ROUNDS_DEFAULT;
boolean roundsCustom = false;
if (salt == null) {
throw new IllegalArgumentException("Salt must not be null");
}
final Matcher m = SALT_PATTERN.matcher(salt);
if (m == null || !m.find()) {
throw new IllegalArgumentException("Invalid salt value: " + salt);
}
if (m.group(3) != null) {
rounds = Integer.parseInt(m.group(3));
rounds = Math.max(ROUNDS_MIN, Math.min(ROUNDS_MAX, rounds));
roundsCustom = true;
}
final String saltString = m.group(4);
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8);
final int saltLen = saltBytes.length;
// 1. start digest A
// Prepare for the real work.
MessageDigest ctx = DigestUtils.getDigest(algorithm);
// 2. the password string is added to digest A
/*
* Add the key string.
*/
ctx.update(keyBytes);
// 3. the salt string is added to digest A. This is just the salt string
// itself without the enclosing '$', without the magic salt_prefix $5$ and
// $6$ respectively and without the rounds=<N> specification.
//
// NB: the MD5 algorithm did add the $1$ salt_prefix. This is not deemed
// necessary since it is a constant string and does not add security
// and /possibly/ allows a plain text attack. Since the rounds=<N>
// specification should never be added this would also create an
// inconsistency.
/*
* The last part is the salt string. This must be at most 16 characters and it ends at the first `$' character
* (for compatibility with existing implementations).
*/
ctx.update(saltBytes);
// 4. start digest B
/*
* Compute alternate sha512 sum with input KEY, SALT, and KEY. The final result will be added to the first
* context.
*/
MessageDigest altCtx = DigestUtils.getDigest(algorithm);
// 5. add the password to digest B
/*
* Add key.
*/
altCtx.update(keyBytes);
// 6. add the salt string to digest B
/*
* Add salt.
*/
altCtx.update(saltBytes);
// 7. add the password again to digest B
/*
* Add key again.
*/
altCtx.update(keyBytes);
// 8. finish digest B
/*
* Now get result of this (32 bytes) and add it to the other context.
*/
byte[] altResult = altCtx.digest();
// 9. For each block of 32 or 64 bytes in the password string (excluding
// the terminating NUL in the C representation), add digest B to digest A
/*
* Add for any character in the key one byte of the alternate sum.
*/
/*
* (Remark: the C code comment seems wrong for key length > 32!)
*/
int cnt = keyBytes.length;
while (cnt > blocksize) {
ctx.update(altResult, 0, blocksize);
cnt -= blocksize;
}
// 10. For the remaining N bytes of the password string add the first
// N bytes of digest B to digest A
ctx.update(altResult, 0, cnt);
// 11. For each bit of the binary representation of the length of the
// password string up to and including the highest 1-digit, starting
// from to lowest bit position (numeric value 1):
//
// a) for a 1-digit add digest B to digest A
//
// b) for a 0-digit add the password string
//
// NB: this step differs significantly from the MD5 algorithm. It
// adds more randomness.
/*
* Take the binary representation of the length of the key and for every 1 add the alternate sum, for every 0
* the key.
*/
cnt = keyBytes.length;
while (cnt > 0) {
if ((cnt & 1) != 0) {
ctx.update(altResult, 0, blocksize);
} else {
ctx.update(keyBytes);
}
cnt >>= 1;
}
// 12. finish digest A
/*
* Create intermediate result.
*/
altResult = ctx.digest();
// 13. start digest DP
/*
* Start computation of P byte sequence.
*/
altCtx = DigestUtils.getDigest(algorithm);
// 14. for every byte in the password (excluding the terminating NUL byte
// in the C representation of the string)
//
// add the password to digest DP
/*
* For every character in the password add the entire password.
*/
for (int i = 1; i <= keyLen; i++) {
altCtx.update(keyBytes);
}
// 15. finish digest DP
/*
* Finish the digest.
*/
byte[] tempResult = altCtx.digest();
// 16. produce byte sequence P of the same length as the password where
//
// a) for each block of 32 or 64 bytes of length of the password string
// the entire digest DP is used
//
// b) for the remaining N (up to 31 or 63) bytes use the first N
// bytes of digest DP
/*
* Create byte sequence P.
*/
final byte[] pBytes = new byte[keyLen];
int cp = 0;
while (cp < keyLen - blocksize) {
System.arraycopy(tempResult, 0, pBytes, cp, blocksize);
cp += blocksize;
}
System.arraycopy(tempResult, 0, pBytes, cp, keyLen - cp);
// 17. start digest DS
/*
* Start computation of S byte sequence.
*/
altCtx = DigestUtils.getDigest(algorithm);
// 18. repeast the following 16+A[0] times, where A[0] represents the first
// byte in digest A interpreted as an 8-bit unsigned value
//
// add the salt to digest DS
/*
* For every character in the password add the entire password.
*/
for (int i = 1; i <= 16 + (altResult[0] & 0xff); i++) {
altCtx.update(saltBytes);
}
// 19. finish digest DS
/*
* Finish the digest.
*/
tempResult = altCtx.digest();
// 20. produce byte sequence S of the same length as the salt string where
//
// a) for each block of 32 or 64 bytes of length of the salt string
// the entire digest DS is used
//
// b) for the remaining N (up to 31 or 63) bytes use the first N
// bytes of digest DS
/*
* Create byte sequence S.
*/
// Remark: The salt is limited to 16 chars, how does this make sense?
final byte[] sBytes = new byte[saltLen];
cp = 0;
while (cp < saltLen - blocksize) {
System.arraycopy(tempResult, 0, sBytes, cp, blocksize);
cp += blocksize;
}
System.arraycopy(tempResult, 0, sBytes, cp, saltLen - cp);
// 21. repeat a loop according to the number specified in the rounds=<N>
// specification in the salt (or the default value if none is
// present). Each round is numbered, starting with 0 and up to N-1.
//
// The loop uses a digest as input. In the first round it is the
// digest produced in step 12. In the latter steps it is the digest
// produced in step 21.h. The following text uses the notation
// "digest A/C" to describe this behavior.
/*
* Repeatedly run the collected hash value through sha512 to burn CPU cycles.
*/
for (int i = 0; i <= rounds - 1; i++) {
// a) start digest C
/*
* New context.
*/
ctx = DigestUtils.getDigest(algorithm);
// b) for odd round numbers add the byte sequense P to digest C
// c) for even round numbers add digest A/C
/*
* Add key or last result.
*/
if ((i & 1) != 0) {
ctx.update(pBytes, 0, keyLen);
} else {
ctx.update(altResult, 0, blocksize);
}
// d) for all round numbers not divisible by 3 add the byte sequence S
/*
* Add salt for numbers not divisible by 3.
*/
if (i % 3 != 0) {
ctx.update(sBytes, 0, saltLen);
}
// e) for all round numbers not divisible by 7 add the byte sequence P
/*
* Add key for numbers not divisible by 7.
*/
if (i % 7 != 0) {
ctx.update(pBytes, 0, keyLen);
}
// f) for odd round numbers add digest A/C
// g) for even round numbers add the byte sequence P
/*
* Add key or last result.
*/
if ((i & 1) != 0) {
ctx.update(altResult, 0, blocksize);
} else {
ctx.update(pBytes, 0, keyLen);
}
// h) finish digest C.
/*
* Create intermediate result.
*/
altResult = ctx.digest();
}
// 22. Produce the output string. This is an ASCII string of the maximum
// size specified above, consisting of multiple pieces:
//
// a) the salt salt_prefix, $5$ or $6$ respectively
//
// b) the rounds=<N> specification, if one was present in the input
// salt string. A trailing '$' is added in this case to separate
// the rounds specification from the following text.
//
// c) the salt string truncated to 16 characters
//
// d) a '$' character
/*
* Now we can construct the result string. It consists of three parts.
*/
final StringBuilder buffer = new StringBuilder(saltPrefix);
if (roundsCustom) {
buffer.append(ROUNDS_PREFIX);
buffer.append(rounds);
buffer.append("$");
}
buffer.append(saltString);
buffer.append("$");
// e) the base-64 encoded final C digest. The encoding used is as
// follows:
// [...]
//
// Each group of three bytes from the digest produces four
// characters as output:
//
// 1. character: the six low bits of the first byte
// 2. character: the two high bits of the first byte and the
// four low bytes from the second byte
// 3. character: the four high bytes from the second byte and
// the two low bits from the third byte
// 4. character: the six high bits from the third byte
//
// The groups of three bytes are as follows (in this sequence).
// These are the indices into the byte array containing the
// digest, starting with index 0. For the last group there are
// not enough bytes left in the digest and the value zero is used
// in its place. This group also produces only three or two
// characters as output for SHA-512 and SHA-512 respectively.
// This was just a safeguard in the C implementation:
// int buflen = salt_prefix.length() - 1 + ROUNDS_PREFIX.length() + 9 + 1 + salt_string.length() + 1 + 86 + 1;
if (blocksize == 32) {
B64.b64from24bit(altResult[0], altResult[10], altResult[20], 4, buffer);
B64.b64from24bit(altResult[21], altResult[1], altResult[11], 4, buffer);
B64.b64from24bit(altResult[12], altResult[22], altResult[2], 4, buffer);
B64.b64from24bit(altResult[3], altResult[13], altResult[23], 4, buffer);
B64.b64from24bit(altResult[24], altResult[4], altResult[14], 4, buffer);
B64.b64from24bit(altResult[15], altResult[25], altResult[5], 4, buffer);
B64.b64from24bit(altResult[6], altResult[16], altResult[26], 4, buffer);
B64.b64from24bit(altResult[27], altResult[7], altResult[17], 4, buffer);
B64.b64from24bit(altResult[18], altResult[28], altResult[8], 4, buffer);
B64.b64from24bit(altResult[9], altResult[19], altResult[29], 4, buffer);
B64.b64from24bit((byte) 0, altResult[31], altResult[30], 3, buffer);
} else {
B64.b64from24bit(altResult[0], altResult[21], altResult[42], 4, buffer);
B64.b64from24bit(altResult[22], altResult[43], altResult[1], 4, buffer);
B64.b64from24bit(altResult[44], altResult[2], altResult[23], 4, buffer);
B64.b64from24bit(altResult[3], altResult[24], altResult[45], 4, buffer);
B64.b64from24bit(altResult[25], altResult[46], altResult[4], 4, buffer);
B64.b64from24bit(altResult[47], altResult[5], altResult[26], 4, buffer);
B64.b64from24bit(altResult[6], altResult[27], altResult[48], 4, buffer);
B64.b64from24bit(altResult[28], altResult[49], altResult[7], 4, buffer);
B64.b64from24bit(altResult[50], altResult[8], altResult[29], 4, buffer);
B64.b64from24bit(altResult[9], altResult[30], altResult[51], 4, buffer);
B64.b64from24bit(altResult[31], altResult[52], altResult[10], 4, buffer);
B64.b64from24bit(altResult[53], altResult[11], altResult[32], 4, buffer);
B64.b64from24bit(altResult[12], altResult[33], altResult[54], 4, buffer);
B64.b64from24bit(altResult[34], altResult[55], altResult[13], 4, buffer);
B64.b64from24bit(altResult[56], altResult[14], altResult[35], 4, buffer);
B64.b64from24bit(altResult[15], altResult[36], altResult[57], 4, buffer);
B64.b64from24bit(altResult[37], altResult[58], altResult[16], 4, buffer);
B64.b64from24bit(altResult[59], altResult[17], altResult[38], 4, buffer);
B64.b64from24bit(altResult[18], altResult[39], altResult[60], 4, buffer);
B64.b64from24bit(altResult[40], altResult[61], altResult[19], 4, buffer);
B64.b64from24bit(altResult[62], altResult[20], altResult[41], 4, buffer);
B64.b64from24bit((byte) 0, (byte) 0, altResult[63], 2, buffer);
}
/*
* Clear the buffer for the intermediate result so that people attaching to processes or reading core dumps
* cannot get any information.
*/
// Is there a better way to do this with the JVM?
Arrays.fill(tempResult, (byte) 0);
Arrays.fill(pBytes, (byte) 0);
Arrays.fill(sBytes, (byte) 0);
ctx.reset();
altCtx.reset();
Arrays.fill(keyBytes, (byte) 0);
Arrays.fill(saltBytes, (byte) 0);
return buffer.toString();
}
/**
* Generates a libc crypt() compatible "$6$" hash value with random salt.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext to hash
* @return complete hash value
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String sha512Crypt(final byte[] keyBytes) {
return sha512Crypt(keyBytes, null);
}
/**
* Generates a libc6 crypt() compatible "$6$" hash value.
* <p>
* See {@link Crypt#crypt(String, String)} for details.
*
* @param keyBytes
* plaintext to hash
* @param salt
* real salt value without prefix or "rounds="
* @return complete hash value including salt
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
* @throws RuntimeException
* when a {@link java.security.NoSuchAlgorithmException} is caught.
*/
public static String sha512Crypt(final byte[] keyBytes, String salt) {
if (salt == null) {
salt = SHA512_PREFIX + B64.getRandomSalt(8);
}
return sha2Crypt(keyBytes, salt, SHA512_PREFIX, SHA512_BLOCKSIZE, MessageDigestAlgorithms.SHA_512);
}
}

View File

@ -0,0 +1,413 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.digest;
import java.util.Random;
import org.apache.commons.codec.Charsets;
/**
* Unix crypt(3) algorithm implementation.
* <p>
* This class only implements the traditional 56 bit DES based algorithm. Please use DigestUtils.crypt() for a method
* that distinguishes between all the algorithms supported in the current glibc's crypt().
* <p>
* The Java implementation was taken from the JetSpeed Portal project (see
* org.apache.jetspeed.services.security.ldap.UnixCrypt).
* <p>
* This class is slightly incompatible if the given salt contains characters that are not part of the allowed range
* [a-zA-Z0-9./].
* <p>
* This class is immutable and thread-safe.
*
* @version $Id$
* @since 1.7
*/
public class UnixCrypt {
private static final int CON_SALT[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 0, 0, 0, 0 };
private static final int COV2CHAR[] = { 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122 };
private static final char SALT_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./"
.toCharArray();
private static final boolean SHIFT2[] = { false, false, true, true, true, true, true, true, false, true, true,
true, true, true, true, false };
private static final int SKB[][] = {
{ 0, 16, 0x20000000, 0x20000010, 0x10000, 0x10010, 0x20010000, 0x20010010, 2048, 2064, 0x20000800,
0x20000810, 0x10800, 0x10810, 0x20010800, 0x20010810, 32, 48, 0x20000020, 0x20000030, 0x10020,
0x10030, 0x20010020, 0x20010030, 2080, 2096, 0x20000820, 0x20000830, 0x10820, 0x10830, 0x20010820,
0x20010830, 0x80000, 0x80010, 0x20080000, 0x20080010, 0x90000, 0x90010, 0x20090000, 0x20090010,
0x80800, 0x80810, 0x20080800, 0x20080810, 0x90800, 0x90810, 0x20090800, 0x20090810, 0x80020,
0x80030, 0x20080020, 0x20080030, 0x90020, 0x90030, 0x20090020, 0x20090030, 0x80820, 0x80830,
0x20080820, 0x20080830, 0x90820, 0x90830, 0x20090820, 0x20090830 },
{ 0, 0x2000000, 8192, 0x2002000, 0x200000, 0x2200000, 0x202000, 0x2202000, 4, 0x2000004, 8196, 0x2002004,
0x200004, 0x2200004, 0x202004, 0x2202004, 1024, 0x2000400, 9216, 0x2002400, 0x200400, 0x2200400,
0x202400, 0x2202400, 1028, 0x2000404, 9220, 0x2002404, 0x200404, 0x2200404, 0x202404, 0x2202404,
0x10000000, 0x12000000, 0x10002000, 0x12002000, 0x10200000, 0x12200000, 0x10202000, 0x12202000,
0x10000004, 0x12000004, 0x10002004, 0x12002004, 0x10200004, 0x12200004, 0x10202004, 0x12202004,
0x10000400, 0x12000400, 0x10002400, 0x12002400, 0x10200400, 0x12200400, 0x10202400, 0x12202400,
0x10000404, 0x12000404, 0x10002404, 0x12002404, 0x10200404, 0x12200404, 0x10202404, 0x12202404 },
{ 0, 1, 0x40000, 0x40001, 0x1000000, 0x1000001, 0x1040000, 0x1040001, 2, 3, 0x40002, 0x40003, 0x1000002,
0x1000003, 0x1040002, 0x1040003, 512, 513, 0x40200, 0x40201, 0x1000200, 0x1000201, 0x1040200,
0x1040201, 514, 515, 0x40202, 0x40203, 0x1000202, 0x1000203, 0x1040202, 0x1040203, 0x8000000,
0x8000001, 0x8040000, 0x8040001, 0x9000000, 0x9000001, 0x9040000, 0x9040001, 0x8000002, 0x8000003,
0x8040002, 0x8040003, 0x9000002, 0x9000003, 0x9040002, 0x9040003, 0x8000200, 0x8000201, 0x8040200,
0x8040201, 0x9000200, 0x9000201, 0x9040200, 0x9040201, 0x8000202, 0x8000203, 0x8040202, 0x8040203,
0x9000202, 0x9000203, 0x9040202, 0x9040203 },
{ 0, 0x100000, 256, 0x100100, 8, 0x100008, 264, 0x100108, 4096, 0x101000, 4352, 0x101100, 4104, 0x101008,
4360, 0x101108, 0x4000000, 0x4100000, 0x4000100, 0x4100100, 0x4000008, 0x4100008, 0x4000108,
0x4100108, 0x4001000, 0x4101000, 0x4001100, 0x4101100, 0x4001008, 0x4101008, 0x4001108, 0x4101108,
0x20000, 0x120000, 0x20100, 0x120100, 0x20008, 0x120008, 0x20108, 0x120108, 0x21000, 0x121000,
0x21100, 0x121100, 0x21008, 0x121008, 0x21108, 0x121108, 0x4020000, 0x4120000, 0x4020100,
0x4120100, 0x4020008, 0x4120008, 0x4020108, 0x4120108, 0x4021000, 0x4121000, 0x4021100, 0x4121100,
0x4021008, 0x4121008, 0x4021108, 0x4121108 },
{ 0, 0x10000000, 0x10000, 0x10010000, 4, 0x10000004, 0x10004, 0x10010004, 0x20000000, 0x30000000,
0x20010000, 0x30010000, 0x20000004, 0x30000004, 0x20010004, 0x30010004, 0x100000, 0x10100000,
0x110000, 0x10110000, 0x100004, 0x10100004, 0x110004, 0x10110004, 0x20100000, 0x30100000,
0x20110000, 0x30110000, 0x20100004, 0x30100004, 0x20110004, 0x30110004, 4096, 0x10001000, 0x11000,
0x10011000, 4100, 0x10001004, 0x11004, 0x10011004, 0x20001000, 0x30001000, 0x20011000, 0x30011000,
0x20001004, 0x30001004, 0x20011004, 0x30011004, 0x101000, 0x10101000, 0x111000, 0x10111000,
0x101004, 0x10101004, 0x111004, 0x10111004, 0x20101000, 0x30101000, 0x20111000, 0x30111000,
0x20101004, 0x30101004, 0x20111004, 0x30111004 },
{ 0, 0x8000000, 8, 0x8000008, 1024, 0x8000400, 1032, 0x8000408, 0x20000, 0x8020000, 0x20008, 0x8020008,
0x20400, 0x8020400, 0x20408, 0x8020408, 1, 0x8000001, 9, 0x8000009, 1025, 0x8000401, 1033,
0x8000409, 0x20001, 0x8020001, 0x20009, 0x8020009, 0x20401, 0x8020401, 0x20409, 0x8020409,
0x2000000, 0xa000000, 0x2000008, 0xa000008, 0x2000400, 0xa000400, 0x2000408, 0xa000408, 0x2020000,
0xa020000, 0x2020008, 0xa020008, 0x2020400, 0xa020400, 0x2020408, 0xa020408, 0x2000001, 0xa000001,
0x2000009, 0xa000009, 0x2000401, 0xa000401, 0x2000409, 0xa000409, 0x2020001, 0xa020001, 0x2020009,
0xa020009, 0x2020401, 0xa020401, 0x2020409, 0xa020409 },
{ 0, 256, 0x80000, 0x80100, 0x1000000, 0x1000100, 0x1080000, 0x1080100, 16, 272, 0x80010, 0x80110,
0x1000010, 0x1000110, 0x1080010, 0x1080110, 0x200000, 0x200100, 0x280000, 0x280100, 0x1200000,
0x1200100, 0x1280000, 0x1280100, 0x200010, 0x200110, 0x280010, 0x280110, 0x1200010, 0x1200110,
0x1280010, 0x1280110, 512, 768, 0x80200, 0x80300, 0x1000200, 0x1000300, 0x1080200, 0x1080300, 528,
784, 0x80210, 0x80310, 0x1000210, 0x1000310, 0x1080210, 0x1080310, 0x200200, 0x200300, 0x280200,
0x280300, 0x1200200, 0x1200300, 0x1280200, 0x1280300, 0x200210, 0x200310, 0x280210, 0x280310,
0x1200210, 0x1200310, 0x1280210, 0x1280310 },
{ 0, 0x4000000, 0x40000, 0x4040000, 2, 0x4000002, 0x40002, 0x4040002, 8192, 0x4002000, 0x42000, 0x4042000,
8194, 0x4002002, 0x42002, 0x4042002, 32, 0x4000020, 0x40020, 0x4040020, 34, 0x4000022, 0x40022,
0x4040022, 8224, 0x4002020, 0x42020, 0x4042020, 8226, 0x4002022, 0x42022, 0x4042022, 2048,
0x4000800, 0x40800, 0x4040800, 2050, 0x4000802, 0x40802, 0x4040802, 10240, 0x4002800, 0x42800,
0x4042800, 10242, 0x4002802, 0x42802, 0x4042802, 2080, 0x4000820, 0x40820, 0x4040820, 2082,
0x4000822, 0x40822, 0x4040822, 10272, 0x4002820, 0x42820, 0x4042820, 10274, 0x4002822, 0x42822,
0x4042822 } };
private static final int SPTRANS[][] = {
{ 0x820200, 0x20000, 0x80800000, 0x80820200, 0x800000, 0x80020200, 0x80020000, 0x80800000, 0x80020200,
0x820200, 0x820000, 0x80000200, 0x80800200, 0x800000, 0, 0x80020000, 0x20000, 0x80000000,
0x800200, 0x20200, 0x80820200, 0x820000, 0x80000200, 0x800200, 0x80000000, 512, 0x20200,
0x80820000, 512, 0x80800200, 0x80820000, 0, 0, 0x80820200, 0x800200, 0x80020000, 0x820200,
0x20000, 0x80000200, 0x800200, 0x80820000, 512, 0x20200, 0x80800000, 0x80020200, 0x80000000,
0x80800000, 0x820000, 0x80820200, 0x20200, 0x820000, 0x80800200, 0x800000, 0x80000200, 0x80020000,
0, 0x20000, 0x800000, 0x80800200, 0x820200, 0x80000000, 0x80820000, 512, 0x80020200 },
{ 0x10042004, 0, 0x42000, 0x10040000, 0x10000004, 8196, 0x10002000, 0x42000, 8192, 0x10040004, 4,
0x10002000, 0x40004, 0x10042000, 0x10040000, 4, 0x40000, 0x10002004, 0x10040004, 8192, 0x42004,
0x10000000, 0, 0x40004, 0x10002004, 0x42004, 0x10042000, 0x10000004, 0x10000000, 0x40000, 8196,
0x10042004, 0x40004, 0x10042000, 0x10002000, 0x42004, 0x10042004, 0x40004, 0x10000004, 0,
0x10000000, 8196, 0x40000, 0x10040004, 8192, 0x10000000, 0x42004, 0x10002004, 0x10042000, 8192, 0,
0x10000004, 4, 0x10042004, 0x42000, 0x10040000, 0x10040004, 0x40000, 8196, 0x10002000, 0x10002004,
4, 0x10040000, 0x42000 },
{ 0x41000000, 0x1010040, 64, 0x41000040, 0x40010000, 0x1000000, 0x41000040, 0x10040, 0x1000040, 0x10000,
0x1010000, 0x40000000, 0x41010040, 0x40000040, 0x40000000, 0x41010000, 0, 0x40010000, 0x1010040,
64, 0x40000040, 0x41010040, 0x10000, 0x41000000, 0x41010000, 0x1000040, 0x40010040, 0x1010000,
0x10040, 0, 0x1000000, 0x40010040, 0x1010040, 64, 0x40000000, 0x10000, 0x40000040, 0x40010000,
0x1010000, 0x41000040, 0, 0x1010040, 0x10040, 0x41010000, 0x40010000, 0x1000000, 0x41010040,
0x40000000, 0x40010040, 0x41000000, 0x1000000, 0x41010040, 0x10000, 0x1000040, 0x41000040,
0x10040, 0x1000040, 0, 0x41010000, 0x40000040, 0x41000000, 0x40010040, 64, 0x1010000 },
{ 0x100402, 0x4000400, 2, 0x4100402, 0, 0x4100000, 0x4000402, 0x100002, 0x4100400, 0x4000002, 0x4000000,
1026, 0x4000002, 0x100402, 0x100000, 0x4000000, 0x4100002, 0x100400, 1024, 2, 0x100400, 0x4000402,
0x4100000, 1024, 1026, 0, 0x100002, 0x4100400, 0x4000400, 0x4100002, 0x4100402, 0x100000,
0x4100002, 1026, 0x100000, 0x4000002, 0x100400, 0x4000400, 2, 0x4100000, 0x4000402, 0, 1024,
0x100002, 0, 0x4100002, 0x4100400, 1024, 0x4000000, 0x4100402, 0x100402, 0x100000, 0x4100402, 2,
0x4000400, 0x100402, 0x100002, 0x100400, 0x4100000, 0x4000402, 1026, 0x4000000, 0x4000002,
0x4100400 },
{ 0x2000000, 16384, 256, 0x2004108, 0x2004008, 0x2000100, 16648, 0x2004000, 16384, 8, 0x2000008, 16640,
0x2000108, 0x2004008, 0x2004100, 0, 16640, 0x2000000, 16392, 264, 0x2000100, 16648, 0, 0x2000008,
8, 0x2000108, 0x2004108, 16392, 0x2004000, 256, 264, 0x2004100, 0x2004100, 0x2000108, 16392,
0x2004000, 16384, 8, 0x2000008, 0x2000100, 0x2000000, 16640, 0x2004108, 0, 16648, 0x2000000, 256,
16392, 0x2000108, 256, 0, 0x2004108, 0x2004008, 0x2004100, 264, 16384, 16640, 0x2004008,
0x2000100, 264, 8, 16648, 0x2004000, 0x2000008 },
{ 0x20000010, 0x80010, 0, 0x20080800, 0x80010, 2048, 0x20000810, 0x80000, 2064, 0x20080810, 0x80800,
0x20000000, 0x20000800, 0x20000010, 0x20080000, 0x80810, 0x80000, 0x20000810, 0x20080010, 0, 2048,
16, 0x20080800, 0x20080010, 0x20080810, 0x20080000, 0x20000000, 2064, 16, 0x80800, 0x80810,
0x20000800, 2064, 0x20000000, 0x20000800, 0x80810, 0x20080800, 0x80010, 0, 0x20000800, 0x20000000,
2048, 0x20080010, 0x80000, 0x80010, 0x20080810, 0x80800, 16, 0x20080810, 0x80800, 0x80000,
0x20000810, 0x20000010, 0x20080000, 0x80810, 0, 2048, 0x20000010, 0x20000810, 0x20080800,
0x20080000, 2064, 16, 0x20080010 },
{ 4096, 128, 0x400080, 0x400001, 0x401081, 4097, 4224, 0, 0x400000, 0x400081, 129, 0x401000, 1, 0x401080,
0x401000, 129, 0x400081, 4096, 4097, 0x401081, 0, 0x400080, 0x400001, 4224, 0x401001, 4225,
0x401080, 1, 4225, 0x401001, 128, 0x400000, 4225, 0x401000, 0x401001, 129, 4096, 128, 0x400000,
0x401001, 0x400081, 4225, 4224, 0, 128, 0x400001, 1, 0x400080, 0, 0x400081, 0x400080, 4224, 129,
4096, 0x401081, 0x400000, 0x401080, 1, 4097, 0x401081, 0x400001, 0x401080, 0x401000, 4097 },
{ 0x8200020, 0x8208000, 32800, 0, 0x8008000, 0x200020, 0x8200000, 0x8208020, 32, 0x8000000, 0x208000,
32800, 0x208020, 0x8008020, 0x8000020, 0x8200000, 32768, 0x208020, 0x200020, 0x8008000, 0x8208020,
0x8000020, 0, 0x208000, 0x8000000, 0x200000, 0x8008020, 0x8200020, 0x200000, 32768, 0x8208000, 32,
0x200000, 32768, 0x8000020, 0x8208020, 32800, 0x8000000, 0, 0x208000, 0x8200020, 0x8008020,
0x8008000, 0x200020, 0x8208000, 32, 0x200020, 0x8008000, 0x8208020, 0x200000, 0x8200000,
0x8000020, 0x208000, 32800, 0x8008020, 0x8200000, 32, 0x8208000, 0x208020, 0, 0x8000000,
0x8200020, 32768, 0x208020 } };
/**
* Generates a crypt(3) compatible hash using the DES algorithm.
* <p>
* As no salt is given, a random one will be used.
*
* @param original
* plaintext password
* @return a 13 character string starting with the salt string
*/
public static String crypt(final byte[] original) {
return crypt(original, null);
}
/**
* Generates a crypt(3) compatible hash using the DES algorithm.
* <p>
* Using unspecified characters as salt results incompatible hash values.
*
* @param original
* plaintext password
* @param salt
* a two character string drawn from [a-zA-Z0-9./] or null for a random one
* @return a 13 character string starting with the salt string
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
*/
public static String crypt(final byte[] original, String salt) {
if (salt == null) {
final Random randomGenerator = new Random();
final int numSaltChars = SALT_CHARS.length;
salt = "" + SALT_CHARS[randomGenerator.nextInt(numSaltChars)] +
SALT_CHARS[randomGenerator.nextInt(numSaltChars)];
} else if (!salt.matches("^[" + B64.B64T + "]{2,}$")) {
throw new IllegalArgumentException("Invalid salt value: " + salt);
}
final StringBuilder buffer = new StringBuilder(" ");
final char charZero = salt.charAt(0);
final char charOne = salt.charAt(1);
buffer.setCharAt(0, charZero);
buffer.setCharAt(1, charOne);
final int eSwap0 = CON_SALT[charZero];
final int eSwap1 = CON_SALT[charOne] << 4;
final byte key[] = new byte[8];
for (int i = 0; i < key.length; i++) {
key[i] = 0;
}
for (int i = 0; i < key.length && i < original.length; i++) {
final int iChar = original[i];
key[i] = (byte) (iChar << 1);
}
final int schedule[] = desSetKey(key);
final int out[] = body(schedule, eSwap0, eSwap1);
final byte b[] = new byte[9];
intToFourBytes(out[0], b, 0);
intToFourBytes(out[1], b, 4);
b[8] = 0;
int i = 2;
int y = 0;
int u = 128;
for (; i < 13; i++) {
int j = 0;
int c = 0;
for (; j < 6; j++) {
c <<= 1;
if ((b[y] & u) != 0) {
c |= 0x1;
}
u >>>= 1;
if (u == 0) {
y++;
u = 128;
}
buffer.setCharAt(i, (char) COV2CHAR[c]);
}
}
return buffer.toString();
}
/**
* Generates a crypt(3) compatible hash using the DES algorithm.
* <p>
* As no salt is given, a random one is used.
*
* @param original
* plaintext password
* @return a 13 character string starting with the salt string
*/
public static String crypt(final String original) {
return crypt(original.getBytes(Charsets.UTF_8));
}
/**
* Generates a crypt(3) compatible hash using the DES algorithm.
*
* @param original
* plaintext password
* @param salt
* a two character string drawn from [a-zA-Z0-9./] or null for a random one
* @return a 13 character string starting with the salt string
* @throws IllegalArgumentException
* if the salt does not match the allowed pattern
*/
public static String crypt(final String original, final String salt) {
return crypt(original.getBytes(Charsets.UTF_8), salt);
}
private static int[] body(final int schedule[], final int eSwap0, final int eSwap1) {
int left = 0;
int right = 0;
int t = 0;
for (int j = 0; j < 25; j++) {
for (int i = 0; i < 32; i += 4) {
left = dEncrypt(left, right, i, eSwap0, eSwap1, schedule);
right = dEncrypt(right, left, i + 2, eSwap0, eSwap1, schedule);
}
t = left;
left = right;
right = t;
}
t = right;
right = left >>> 1 | left << 31;
left = t >>> 1 | t << 31;
final int results[] = new int[2];
permOp(right, left, 1, 0x55555555, results);
right = results[0];
left = results[1];
permOp(left, right, 8, 0xff00ff, results);
left = results[0];
right = results[1];
permOp(right, left, 2, 0x33333333, results);
right = results[0];
left = results[1];
permOp(left, right, 16, 65535, results);
left = results[0];
right = results[1];
permOp(right, left, 4, 0xf0f0f0f, results);
right = results[0];
left = results[1];
final int out[] = new int[2];
out[0] = left;
out[1] = right;
return out;
}
private static int byteToUnsigned(final byte b) {
final int value = b;
return value < 0 ? value + 256 : value;
}
private static int dEncrypt(int el, final int r, final int s, final int e0, final int e1, final int sArr[]) {
int v = r ^ r >>> 16;
int u = v & e0;
v &= e1;
u = u ^ u << 16 ^ r ^ sArr[s];
int t = v ^ v << 16 ^ r ^ sArr[s + 1];
t = t >>> 4 | t << 28;
el ^= SPTRANS[1][t & 0x3f] | SPTRANS[3][t >>> 8 & 0x3f] | SPTRANS[5][t >>> 16 & 0x3f] |
SPTRANS[7][t >>> 24 & 0x3f] | SPTRANS[0][u & 0x3f] | SPTRANS[2][u >>> 8 & 0x3f] |
SPTRANS[4][u >>> 16 & 0x3f] | SPTRANS[6][u >>> 24 & 0x3f];
return el;
}
private static int[] desSetKey(final byte key[]) {
final int schedule[] = new int[32];
int c = fourBytesToInt(key, 0);
int d = fourBytesToInt(key, 4);
final int results[] = new int[2];
permOp(d, c, 4, 0xf0f0f0f, results);
d = results[0];
c = results[1];
c = hPermOp(c, -2, 0xcccc0000);
d = hPermOp(d, -2, 0xcccc0000);
permOp(d, c, 1, 0x55555555, results);
d = results[0];
c = results[1];
permOp(c, d, 8, 0xff00ff, results);
c = results[0];
d = results[1];
permOp(d, c, 1, 0x55555555, results);
d = results[0];
c = results[1];
d = (d & 0xff) << 16 | d & 0xff00 | (d & 0xff0000) >>> 16 | (c & 0xf0000000) >>> 4;
c &= 0xfffffff;
int j = 0;
for (int i = 0; i < 16; i++) {
if (SHIFT2[i]) {
c = c >>> 2 | c << 26;
d = d >>> 2 | d << 26;
} else {
c = c >>> 1 | c << 27;
d = d >>> 1 | d << 27;
}
c &= 0xfffffff;
d &= 0xfffffff;
int s = SKB[0][c & 0x3f] | SKB[1][c >>> 6 & 0x3 | c >>> 7 & 0x3c] |
SKB[2][c >>> 13 & 0xf | c >>> 14 & 0x30] |
SKB[3][c >>> 20 & 0x1 | c >>> 21 & 0x6 | c >>> 22 & 0x38];
final int t = SKB[4][d & 0x3f] | SKB[5][d >>> 7 & 0x3 | d >>> 8 & 0x3c] | SKB[6][d >>> 15 & 0x3f] |
SKB[7][d >>> 21 & 0xf | d >>> 22 & 0x30];
schedule[j++] = (t << 16 | s & 0xffff);
s = s >>> 16 | t & 0xffff0000;
s = s << 4 | s >>> 28;
schedule[j++] = s;
}
return schedule;
}
private static int fourBytesToInt(final byte b[], int offset) {
int value = byteToUnsigned(b[offset++]);
value |= byteToUnsigned(b[offset++]) << 8;
value |= byteToUnsigned(b[offset++]) << 16;
value |= byteToUnsigned(b[offset++]) << 24;
return value;
}
private static int hPermOp(int a, final int n, final int m) {
final int t = (a << 16 - n ^ a) & m;
a = a ^ t ^ t >>> 16 - n;
return a;
}
private static void intToFourBytes(final int iValue, final byte b[], int offset) {
b[offset++] = (byte) (iValue & 0xff);
b[offset++] = (byte) (iValue >>> 8 & 0xff);
b[offset++] = (byte) (iValue >>> 16 & 0xff);
b[offset++] = (byte) (iValue >>> 24 & 0xff);
}
private static void permOp(int a, int b, final int n, final int m, final int results[]) {
final int t = (a >>> n ^ b) & m;
a ^= t << n;
b ^= t;
results[0] = a;
results[1] = b;
}
}

View File

@ -0,0 +1,24 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Simplifies common {@link java.security.MessageDigest} tasks and
includes a libc crypt(3) compatible crypt method that supports DES,
MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
specific "$apr1$" variant.
</body>
</html>

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Caverphone value.
*
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
* algorithm:
*
* <p>This class is immutable and thread-safe.</p>
*
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
* @since 1.5
*/
public abstract class AbstractCaverphone implements StringEncoder {
/**
* Creates an instance of the Caverphone encoder
*/
public AbstractCaverphone() {
super();
}
/**
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
*
* @param source
* Object to encode
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
* supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
*/
@Override
public Object encode(final Object source) throws EncoderException {
if (!(source instanceof String)) {
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
}
return this.encode((String) source);
}
/**
* Tests if the encodings of two strings are equal.
*
* This method might be promoted to a new AbstractStringEncoder superclass.
*
* @param str1
* First of two strings to compare
* @param str2
* Second of two strings to compare
* @return <code>true</code> if the encodings of these strings are identical, <code>false</code> otherwise.
* @throws EncoderException
* thrown if there is an error condition during the encoding process.
*/
public boolean isEncodeEqual(final String str1, final String str2) throws EncoderException {
return this.encode(str1).equals(this.encode(str2));
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Caverphone 2.0 value. Delegate to a {@link Caverphone2} instance.
*
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
* algorithm:
*
* @version $Id: Caverphone.java 1079535 2011-03-08 20:54:37Z ggregory $
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a>
* @since 1.4
* @deprecated 1.5 Replaced by {@link Caverphone2}, will be removed in 2.0.
*/
@Deprecated
public class Caverphone implements StringEncoder {
/**
* Delegate to a {@link Caverphone2} instance to avoid code duplication.
*/
final private Caverphone2 encoder = new Caverphone2();
/**
* Creates an instance of the Caverphone encoder
*/
public Caverphone() {
super();
}
/**
* Encodes the given String into a Caverphone value.
*
* @param source
* String the source string
* @return A caverphone code for the given String
*/
public String caverphone(final String source) {
return this.encoder.encode(source);
}
/**
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
*
* @param obj
* Object to encode
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
* supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
}
return this.caverphone((String) obj);
}
/**
* Encodes a String using the Caverphone algorithm.
*
* @param str
* String object to encode
* @return The caverphone code corresponding to the String supplied
*/
@Override
public String encode(final String str) {
return this.caverphone(str);
}
/**
* Tests if the caverphones of two strings are identical.
*
* @param str1
* First of two strings to compare
* @param str2
* Second of two strings to compare
* @return <code>true</code> if the caverphones of these strings are identical, <code>false</code> otherwise.
*/
public boolean isCaverphoneEqual(final String str1, final String str2) {
return this.caverphone(str1).equals(this.caverphone(str2));
}
}

View File

@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
/**
* Encodes a string into a Caverphone 1.0 value.
*
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 1.0
* algorithm:
*
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp060902.pdf">Caverphone 1.0 specification</a>
* @since 1.5
*
* <p>This class is immutable and thread-safe.</p>
*/
public class Caverphone1 extends AbstractCaverphone {
private static final String SIX_1 = "111111";
/**
* Encodes the given String into a Caverphone value.
*
* @param source
* String the source string
* @return A caverphone code for the given String
*/
@Override
public String encode(final String source) {
String txt = source;
if (txt == null || txt.length() == 0) {
return SIX_1;
}
// 1. Convert to lowercase
txt = txt.toLowerCase(java.util.Locale.ENGLISH);
// 2. Remove anything not A-Z
txt = txt.replaceAll("[^a-z]", "");
// 3. Handle various start options
// 2 is a temporary placeholder to indicate a consonant which we are no longer interested in.
txt = txt.replaceAll("^cough", "cou2f");
txt = txt.replaceAll("^rough", "rou2f");
txt = txt.replaceAll("^tough", "tou2f");
txt = txt.replaceAll("^enough", "enou2f");
txt = txt.replaceAll("^gn", "2n");
// End
txt = txt.replaceAll("mb$", "m2");
// 4. Handle replacements
txt = txt.replaceAll("cq", "2q");
txt = txt.replaceAll("ci", "si");
txt = txt.replaceAll("ce", "se");
txt = txt.replaceAll("cy", "sy");
txt = txt.replaceAll("tch", "2ch");
txt = txt.replaceAll("c", "k");
txt = txt.replaceAll("q", "k");
txt = txt.replaceAll("x", "k");
txt = txt.replaceAll("v", "f");
txt = txt.replaceAll("dg", "2g");
txt = txt.replaceAll("tio", "sio");
txt = txt.replaceAll("tia", "sia");
txt = txt.replaceAll("d", "t");
txt = txt.replaceAll("ph", "fh");
txt = txt.replaceAll("b", "p");
txt = txt.replaceAll("sh", "s2");
txt = txt.replaceAll("z", "s");
txt = txt.replaceAll("^[aeiou]", "A");
// 3 is a temporary placeholder marking a vowel
txt = txt.replaceAll("[aeiou]", "3");
txt = txt.replaceAll("3gh3", "3kh3");
txt = txt.replaceAll("gh", "22");
txt = txt.replaceAll("g", "k");
txt = txt.replaceAll("s+", "S");
txt = txt.replaceAll("t+", "T");
txt = txt.replaceAll("p+", "P");
txt = txt.replaceAll("k+", "K");
txt = txt.replaceAll("f+", "F");
txt = txt.replaceAll("m+", "M");
txt = txt.replaceAll("n+", "N");
txt = txt.replaceAll("w3", "W3");
txt = txt.replaceAll("wy", "Wy"); // 1.0 only
txt = txt.replaceAll("wh3", "Wh3");
txt = txt.replaceAll("why", "Why"); // 1.0 only
txt = txt.replaceAll("w", "2");
txt = txt.replaceAll("^h", "A");
txt = txt.replaceAll("h", "2");
txt = txt.replaceAll("r3", "R3");
txt = txt.replaceAll("ry", "Ry"); // 1.0 only
txt = txt.replaceAll("r", "2");
txt = txt.replaceAll("l3", "L3");
txt = txt.replaceAll("ly", "Ly"); // 1.0 only
txt = txt.replaceAll("l", "2");
txt = txt.replaceAll("j", "y"); // 1.0 only
txt = txt.replaceAll("y3", "Y3"); // 1.0 only
txt = txt.replaceAll("y", "2"); // 1.0 only
// 5. Handle removals
txt = txt.replaceAll("2", "");
txt = txt.replaceAll("3", "");
// 6. put ten 1s on the end
txt = txt + SIX_1;
// 7. take the first six characters as the code
return txt.substring(0, SIX_1.length());
}
}

View File

@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
/**
* Encodes a string into a Caverphone 2.0 value.
*
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
* algorithm:
*
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a>
* @since 1.5
*
* <p>This class is immutable and thread-safe.</p>
*/
public class Caverphone2 extends AbstractCaverphone {
private static final String TEN_1 = "1111111111";
/**
* Encodes the given String into a Caverphone 2.0 value.
*
* @param source
* String the source string
* @return A caverphone code for the given String
*/
@Override
public String encode(final String source) {
String txt = source;
if (txt == null || txt.length() == 0) {
return TEN_1;
}
// 1. Convert to lowercase
txt = txt.toLowerCase(java.util.Locale.ENGLISH);
// 2. Remove anything not A-Z
txt = txt.replaceAll("[^a-z]", "");
// 2.5. Remove final e
txt = txt.replaceAll("e$", ""); // 2.0 only
// 3. Handle various start options
txt = txt.replaceAll("^cough", "cou2f");
txt = txt.replaceAll("^rough", "rou2f");
txt = txt.replaceAll("^tough", "tou2f");
txt = txt.replaceAll("^enough", "enou2f"); // 2.0 only
txt = txt.replaceAll("^trough", "trou2f"); // 2.0 only
// note the spec says ^enough here again, c+p error I assume
txt = txt.replaceAll("^gn", "2n");
// End
txt = txt.replaceAll("mb$", "m2");
// 4. Handle replacements
txt = txt.replaceAll("cq", "2q");
txt = txt.replaceAll("ci", "si");
txt = txt.replaceAll("ce", "se");
txt = txt.replaceAll("cy", "sy");
txt = txt.replaceAll("tch", "2ch");
txt = txt.replaceAll("c", "k");
txt = txt.replaceAll("q", "k");
txt = txt.replaceAll("x", "k");
txt = txt.replaceAll("v", "f");
txt = txt.replaceAll("dg", "2g");
txt = txt.replaceAll("tio", "sio");
txt = txt.replaceAll("tia", "sia");
txt = txt.replaceAll("d", "t");
txt = txt.replaceAll("ph", "fh");
txt = txt.replaceAll("b", "p");
txt = txt.replaceAll("sh", "s2");
txt = txt.replaceAll("z", "s");
txt = txt.replaceAll("^[aeiou]", "A");
txt = txt.replaceAll("[aeiou]", "3");
txt = txt.replaceAll("j", "y"); // 2.0 only
txt = txt.replaceAll("^y3", "Y3"); // 2.0 only
txt = txt.replaceAll("^y", "A"); // 2.0 only
txt = txt.replaceAll("y", "3"); // 2.0 only
txt = txt.replaceAll("3gh3", "3kh3");
txt = txt.replaceAll("gh", "22");
txt = txt.replaceAll("g", "k");
txt = txt.replaceAll("s+", "S");
txt = txt.replaceAll("t+", "T");
txt = txt.replaceAll("p+", "P");
txt = txt.replaceAll("k+", "K");
txt = txt.replaceAll("f+", "F");
txt = txt.replaceAll("m+", "M");
txt = txt.replaceAll("n+", "N");
txt = txt.replaceAll("w3", "W3");
txt = txt.replaceAll("wh3", "Wh3");
txt = txt.replaceAll("w$", "3"); // 2.0 only
txt = txt.replaceAll("w", "2");
txt = txt.replaceAll("^h", "A");
txt = txt.replaceAll("h", "2");
txt = txt.replaceAll("r3", "R3");
txt = txt.replaceAll("r$", "3"); // 2.0 only
txt = txt.replaceAll("r", "2");
txt = txt.replaceAll("l3", "L3");
txt = txt.replaceAll("l$", "3"); // 2.0 only
txt = txt.replaceAll("l", "2");
// 5. Handle removals
txt = txt.replaceAll("2", "");
txt = txt.replaceAll("3$", "A"); // 2.0 only
txt = txt.replaceAll("3", "");
// 6. put ten 1s on the end
txt = txt + TEN_1;
// 7. take the first ten characters as the code
return txt.substring(0, TEN_1.length());
}
}

View File

@ -0,0 +1,445 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import java.util.Locale;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Cologne Phonetic value.
* <p>
* Implements the <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">K&ouml;lner Phonetik</a> (Cologne
* Phonetic) algorithm issued by Hans Joachim Postel in 1969.
* </p>
* <p>
* The <i>K&ouml;lner Phonetik</i> is a phonetic algorithm which is optimized for the German language. It is related to
* the well-known soundex algorithm.
* </p>
*
* <h2>Algorithm</h2>
*
* <ul>
*
* <li>
* <h3>Step 1:</h3>
* After preprocessing (conversion to upper case, transcription of <a
* href="http://en.wikipedia.org/wiki/Germanic_umlaut">germanic umlauts</a>, removal of non alphabetical characters) the
* letters of the supplied text are replaced by their phonetic code according to the following table.
* <table border="1">
* <caption style="caption-side: bottom"><small><i>(Source: <a
* href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik#Buchstabencodes">Wikipedia (de): K&ouml;lner Phonetik --
* Buchstabencodes</a>)</i></small></caption> <tbody>
* <tr>
* <th>Letter</th>
* <th>Context</th>
* <th align="center">Code</th>
* </tr>
* <tr>
* <td>A, E, I, J, O, U, Y</td>
* <td></td>
* <td align="center">0</td>
* </tr>
* <tr>
*
* <td>H</td>
* <td></td>
* <td align="center">-</td>
* </tr>
* <tr>
* <td>B</td>
* <td></td>
* <td rowspan="2" align="center">1</td>
* </tr>
* <tr>
* <td>P</td>
* <td>not before H</td>
*
* </tr>
* <tr>
* <td>D, T</td>
* <td>not before C, S, Z</td>
* <td align="center">2</td>
* </tr>
* <tr>
* <td>F, V, W</td>
* <td></td>
* <td rowspan="2" align="center">3</td>
* </tr>
* <tr>
*
* <td>P</td>
* <td>before H</td>
* </tr>
* <tr>
* <td>G, K, Q</td>
* <td></td>
* <td rowspan="3" align="center">4</td>
* </tr>
* <tr>
* <td rowspan="2">C</td>
* <td>at onset before A, H, K, L, O, Q, R, U, X</td>
*
* </tr>
* <tr>
* <td>before A, H, K, O, Q, U, X except after S, Z</td>
* </tr>
* <tr>
* <td>X</td>
* <td>not after C, K, Q</td>
* <td align="center">48</td>
* </tr>
* <tr>
* <td>L</td>
* <td></td>
*
* <td align="center">5</td>
* </tr>
* <tr>
* <td>M, N</td>
* <td></td>
* <td align="center">6</td>
* </tr>
* <tr>
* <td>R</td>
* <td></td>
* <td align="center">7</td>
* </tr>
*
* <tr>
* <td>S, Z</td>
* <td></td>
* <td rowspan="6" align="center">8</td>
* </tr>
* <tr>
* <td rowspan="3">C</td>
* <td>after S, Z</td>
* </tr>
* <tr>
* <td>at onset except before A, H, K, L, O, Q, R, U, X</td>
* </tr>
*
* <tr>
* <td>not before A, H, K, O, Q, U, X</td>
* </tr>
* <tr>
* <td>D, T</td>
* <td>before C, S, Z</td>
* </tr>
* <tr>
* <td>X</td>
* <td>after C, K, Q</td>
* </tr>
* </tbody>
* </table>
*
* <h4>Example:</h4>
*
* <code>"M</code>&uuml;<code>ller-L</code>&uuml;
* <code>denscheidt" =&gt; "MULLERLUDENSCHEIDT" =&gt; "6005507500206880022"</code>
*
* </li>
*
* <li>
* <h3>Step 2:</h3>
* Collapse of all multiple consecutive code digits.
* <h4>Example:</h4>
* <code>"6005507500206880022" =&gt; "6050750206802"</code></li>
*
* <li>
* <h3>Step 3:</h3>
* Removal of all codes "0" except at the beginning. This means that two or more identical consecutive digits can occur
* if they occur after removing the "0" digits.
*
* <h4>Example:</h4>
* <code>"6050750206802" =&gt; "65752682"</code></li>
*
* </ul>
*
* <p>
* This class is thread-safe.
* </p>
*
* @see <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Wikipedia (de): K&ouml;lner Phonetik (in German)</a>
* @since 1.5
*/
public class ColognePhonetic implements StringEncoder {
// Predefined char arrays for better performance and less GC load
private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
private static final char[] SCZ = new char[] { 'S', 'C', 'Z' };
private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' };
private static final char[] GKQ = new char[] { 'G', 'K', 'Q' };
private static final char[] CKQ = new char[] { 'C', 'K', 'Q' };
private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
private static final char[] SZ = new char[] { 'S', 'Z' };
private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' };
private static final char[] TDX = new char[] { 'T', 'D', 'X' };
/**
* This class is not thread-safe; the field {@link #length} is mutable.
* However, it is not shared between threads, as it is constructed on demand
* by the method {@link ColognePhonetic#colognePhonetic(String)}
*/
private abstract class CologneBuffer {
protected final char[] data;
protected int length = 0;
public CologneBuffer(final char[] data) {
this.data = data;
this.length = data.length;
}
public CologneBuffer(final int buffSize) {
this.data = new char[buffSize];
this.length = 0;
}
protected abstract char[] copyData(int start, final int length);
public int length() {
return length;
}
@Override
public String toString() {
return new String(copyData(0, length));
}
}
private class CologneOutputBuffer extends CologneBuffer {
public CologneOutputBuffer(final int buffSize) {
super(buffSize);
}
public void addRight(final char chr) {
data[length] = chr;
length++;
}
@Override
protected char[] copyData(final int start, final int length) {
final char[] newData = new char[length];
System.arraycopy(data, start, newData, 0, length);
return newData;
}
}
private class CologneInputBuffer extends CologneBuffer {
public CologneInputBuffer(final char[] data) {
super(data);
}
public void addLeft(final char ch) {
length++;
data[getNextPos()] = ch;
}
@Override
protected char[] copyData(final int start, final int length) {
final char[] newData = new char[length];
System.arraycopy(data, data.length - this.length + start, newData, 0, length);
return newData;
}
public char getNextChar() {
return data[getNextPos()];
}
protected int getNextPos() {
return data.length - length;
}
public char removeNext() {
final char ch = getNextChar();
length--;
return ch;
}
}
/**
* Maps some Germanic characters to plain for internal processing. The following characters are mapped:
* <ul>
* <li>capital a, umlaut mark</li>
* <li>capital u, umlaut mark</li>
* <li>capital o, umlaut mark</li>
* <li>small sharp s, German</li>
* </ul>
*/
private static final char[][] PREPROCESS_MAP = new char[][]{
{'\u00C4', 'A'}, // capital a, umlaut mark
{'\u00DC', 'U'}, // capital u, umlaut mark
{'\u00D6', 'O'}, // capital o, umlaut mark
{'\u00DF', 'S'} // small sharp s, German
};
/*
* Returns whether the array contains the key, or not.
*/
private static boolean arrayContains(final char[] arr, final char key) {
for (final char element : arr) {
if (element == key) {
return true;
}
}
return false;
}
/**
* <p>
* Implements the <i>K&ouml;lner Phonetik</i> algorithm.
* </p>
* <p>
* In contrast to the initial description of the algorithm, this implementation does the encoding in one pass.
* </p>
*
* @param text The source text to encode
* @return the corresponding encoding according to the <i>K&ouml;lner Phonetik</i> algorithm
*/
public String colognePhonetic(String text) {
if (text == null) {
return null;
}
text = preprocess(text);
final CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2);
final CologneInputBuffer input = new CologneInputBuffer(text.toCharArray());
char nextChar;
char lastChar = '-';
char lastCode = '/';
char code;
char chr;
int rightLength = input.length();
while (rightLength > 0) {
chr = input.removeNext();
if ((rightLength = input.length()) > 0) {
nextChar = input.getNextChar();
} else {
nextChar = '-';
}
if (arrayContains(AEIJOUY, chr)) {
code = '0';
} else if (chr == 'H' || chr < 'A' || chr > 'Z') {
if (lastCode == '/') {
continue;
}
code = '-';
} else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
code = '1';
} else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) {
code = '2';
} else if (arrayContains(WFPV, chr)) {
code = '3';
} else if (arrayContains(GKQ, chr)) {
code = '4';
} else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
code = '4';
input.addLeft('S');
rightLength++;
} else if (chr == 'S' || chr == 'Z') {
code = '8';
} else if (chr == 'C') {
if (lastCode == '/') {
if (arrayContains(AHKLOQRUX, nextChar)) {
code = '4';
} else {
code = '8';
}
} else {
if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) {
code = '8';
} else {
code = '4';
}
}
} else if (arrayContains(TDX, chr)) {
code = '8';
} else if (chr == 'R') {
code = '7';
} else if (chr == 'L') {
code = '5';
} else if (chr == 'M' || chr == 'N') {
code = '6';
} else {
code = chr;
}
if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) {
output.addRight(code);
}
lastChar = chr;
lastCode = code;
}
return output.toString();
}
@Override
public Object encode(final Object object) throws EncoderException {
if (!(object instanceof String)) {
throw new EncoderException("This method's parameter was expected to be of the type " +
String.class.getName() +
". But actually it was of the type " +
object.getClass().getName() +
".");
}
return encode((String) object);
}
@Override
public String encode(final String text) {
return colognePhonetic(text);
}
public boolean isEncodeEqual(final String text1, final String text2) {
return colognePhonetic(text1).equals(colognePhonetic(text2));
}
/**
* Converts the string to upper case and replaces germanic characters as defined in {@link #PREPROCESS_MAP}.
*/
private String preprocess(String text) {
text = text.toUpperCase(Locale.GERMAN);
final char[] chrs = text.toCharArray();
for (int index = 0; index < chrs.length; index++) {
if (chrs[index] > 'Z') {
for (final char[] element : PREPROCESS_MAP) {
if (chrs[index] == element[0]) {
chrs[index] = element[1];
break;
}
}
}
}
return new String(chrs);
}
}

View File

@ -0,0 +1,561 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Daitch-Mokotoff Soundex value.
* <p>
* The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
* accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
* </p>
* <p>
* The main differences compared to the other soundex variants are:
* </p>
* <ul>
* <li>coded names are 6 digits long
* <li>the initial character of the name is coded
* <li>rules to encoded multi-character n-grams
* <li>multiple possible encodings for the same name (branching)
* </ul>
* <p>
* This implementation supports branching, depending on the used method:
* <ul>
* <li>{@link #encode(String)} - branching disabled, only the first code will be returned
* <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|'
* </ul>
* <p>
* Note: this implementation has additional branching rules compared to the original description of the algorithm. The
* rules can be customized by overriding the default rules contained in the resource file
* {@code org/apache/commons/codec/language/dmrules.txt}.
* </p>
* <p>
* This class is thread-safe.
* </p>
*
* @see Soundex
* @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
* @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
*
* @version $Id$
* @since 1.10
*/
public class DaitchMokotoffSoundex implements StringEncoder {
/**
* Inner class representing a branch during DM soundex encoding.
*/
private static final class Branch {
private final StringBuilder builder;
private String cachedString;
private String lastReplacement;
private Branch() {
builder = new StringBuilder();
lastReplacement = null;
cachedString = null;
}
/**
* Creates a new branch, identical to this branch.
*
* @return a new, identical branch
*/
public Branch createBranch() {
final Branch branch = new Branch();
branch.builder.append(toString());
branch.lastReplacement = this.lastReplacement;
return branch;
}
@Override
public boolean equals(final Object other) {
if (this == other) {
return true;
}
if (!(other instanceof Branch)) {
return false;
}
return toString().equals(((Branch) other).toString());
}
/**
* Finish this branch by appending '0's until the maximum code length has been reached.
*/
public void finish() {
while (builder.length() < MAX_LENGTH) {
builder.append('0');
cachedString = null;
}
}
@Override
public int hashCode() {
return toString().hashCode();
}
/**
* Process the next replacement to be added to this branch.
*
* @param replacement
* the next replacement to append
* @param forceAppend
* indicates if the default processing shall be overridden
*/
public void processNextReplacement(final String replacement, final boolean forceAppend) {
final boolean append = lastReplacement == null || !lastReplacement.endsWith(replacement) || forceAppend;
if (append && builder.length() < MAX_LENGTH) {
builder.append(replacement);
// remove all characters after the maximum length
if (builder.length() > MAX_LENGTH) {
builder.delete(MAX_LENGTH, builder.length());
}
cachedString = null;
}
lastReplacement = replacement;
}
@Override
public String toString() {
if (cachedString == null) {
cachedString = builder.toString();
}
return cachedString;
}
}
/**
* Inner class for storing rules.
*/
private static final class Rule {
private final String pattern;
private final String[] replacementAtStart;
private final String[] replacementBeforeVowel;
private final String[] replacementDefault;
protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel,
final String replacementDefault) {
this.pattern = pattern;
this.replacementAtStart = replacementAtStart.split("\\|");
this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
this.replacementDefault = replacementDefault.split("\\|");
}
public int getPatternLength() {
return pattern.length();
}
public String[] getReplacements(final String context, final boolean atStart) {
if (atStart) {
return replacementAtStart;
}
final int nextIndex = getPatternLength();
final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false;
if (nextCharIsVowel) {
return replacementBeforeVowel;
}
return replacementDefault;
}
private boolean isVowel(final char ch) {
return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u';
}
public boolean matches(final String context) {
return context.startsWith(pattern);
}
@Override
public String toString() {
return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart),
Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault));
}
}
private static final String COMMENT = "//";
private static final String DOUBLE_QUOTE = "\"";
private static final String MULTILINE_COMMENT_END = "*/";
private static final String MULTILINE_COMMENT_START = "/*";
/** The resource file containing the replacement and folding rules */
private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt";
/** The code length of a DM soundex value. */
private static final int MAX_LENGTH = 6;
/** Transformation rules indexed by the first character of their pattern. */
private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>();
/** Folding rules. */
private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>();
static {
final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE);
if (rulesIS == null) {
throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE);
}
final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8);
parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
scanner.close();
// sort RULES by pattern length in descending order
for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
final List<Rule> ruleList = rule.getValue();
Collections.sort(ruleList, new Comparator<Rule>() {
@Override
public int compare(final Rule rule1, final Rule rule2) {
return rule2.getPatternLength() - rule1.getPatternLength();
}
});
}
}
private static void parseRules(final Scanner scanner, final String location,
final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) {
int currentLine = 0;
boolean inMultilineComment = false;
while (scanner.hasNextLine()) {
currentLine++;
final String rawLine = scanner.nextLine();
String line = rawLine;
if (inMultilineComment) {
if (line.endsWith(MULTILINE_COMMENT_END)) {
inMultilineComment = false;
}
continue;
}
if (line.startsWith(MULTILINE_COMMENT_START)) {
inMultilineComment = true;
} else {
// discard comments
final int cmtI = line.indexOf(COMMENT);
if (cmtI >= 0) {
line = line.substring(0, cmtI);
}
// trim leading-trailing whitespace
line = line.trim();
if (line.length() == 0) {
continue; // empty lines can be safely skipped
}
if (line.contains("=")) {
// folding
final String[] parts = line.split("=");
if (parts.length != 2) {
throw new IllegalArgumentException("Malformed folding statement split into " + parts.length +
" parts: " + rawLine + " in " + location);
} else {
final String leftCharacter = parts[0];
final String rightCharacter = parts[1];
if (leftCharacter.length() != 1 || rightCharacter.length() != 1) {
throw new IllegalArgumentException("Malformed folding statement - " +
"patterns are not single characters: " + rawLine + " in " + location);
}
asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0));
}
} else {
// rule
final String[] parts = line.split("\\s+");
if (parts.length != 4) {
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
" parts: " + rawLine + " in " + location);
} else {
try {
final String pattern = stripQuotes(parts[0]);
final String replacement1 = stripQuotes(parts[1]);
final String replacement2 = stripQuotes(parts[2]);
final String replacement3 = stripQuotes(parts[3]);
final Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
final char patternKey = r.pattern.charAt(0);
List<Rule> rules = ruleMapping.get(patternKey);
if (rules == null) {
rules = new ArrayList<Rule>();
ruleMapping.put(patternKey, rules);
}
rules.add(r);
} catch (final IllegalArgumentException e) {
throw new IllegalStateException(
"Problem parsing line '" + currentLine + "' in " + location, e);
}
}
}
}
}
}
private static String stripQuotes(String str) {
if (str.startsWith(DOUBLE_QUOTE)) {
str = str.substring(1);
}
if (str.endsWith(DOUBLE_QUOTE)) {
str = str.substring(0, str.length() - 1);
}
return str;
}
/** Whether to use ASCII folding prior to encoding. */
private final boolean folding;
/**
* Creates a new instance with ASCII-folding enabled.
*/
public DaitchMokotoffSoundex() {
this(true);
}
/**
* Creates a new instance.
* <p>
* With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
* è -&gt; e.
* </p>
*
* @param folding
* if ASCII-folding shall be performed before encoding
*/
public DaitchMokotoffSoundex(final boolean folding) {
this.folding = folding;
}
/**
* Performs a cleanup of the input string before the actual soundex transformation.
* <p>
* Removes all whitespace characters and performs ASCII folding if enabled.
* </p>
*
* @param input
* the input string to cleanup
* @return a cleaned up string
*/
private String cleanup(final String input) {
final StringBuilder sb = new StringBuilder();
for (char ch : input.toCharArray()) {
if (Character.isWhitespace(ch)) {
continue;
}
ch = Character.toLowerCase(ch);
if (folding && FOLDINGS.containsKey(ch)) {
ch = FOLDINGS.get(ch);
}
sb.append(ch);
}
return sb.toString();
}
/**
* Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
* <p>
* This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
* EncoderException if the supplied object is not of type java.lang.String.
* </p>
*
* @see #soundex(String)
*
* @param obj
* Object to encode
* @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
* supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException(
"Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
}
return encode((String) obj);
}
/**
* Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
*
* @see #soundex(String)
*
* @param source
* A String object to encode
* @return A DM Soundex code corresponding to the String supplied
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public String encode(final String source) {
if (source == null) {
return null;
}
return soundex(source, false)[0];
}
/**
* Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
* <p>
* In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
* separated by '|'.
* </p>
* <p>
* Example: the name "AUERBACH" is encoded as both
* </p>
* <ul>
* <li>097400</li>
* <li>097500</li>
* </ul>
* <p>
* Thus the result will be "097400|097500".
* </p>
*
* @param source
* A String object to encode
* @return A string containing a set of DM Soundex codes corresponding to the String supplied
* @throws IllegalArgumentException
* if a character is not mapped
*/
public String soundex(final String source) {
final String[] branches = soundex(source, true);
final StringBuilder sb = new StringBuilder();
int index = 0;
for (final String branch : branches) {
sb.append(branch);
if (++index < branches.length) {
sb.append('|');
}
}
return sb.toString();
}
/**
* Perform the actual DM Soundex algorithm on the input string.
*
* @param source
* A String object to encode
* @param branching
* If branching shall be performed
* @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the
* selected branching mode
*/
private String[] soundex(final String source, final boolean branching) {
if (source == null) {
return null;
}
final String input = cleanup(source);
final Set<Branch> currentBranches = new LinkedHashSet<Branch>();
currentBranches.add(new Branch());
char lastChar = '\0';
for (int index = 0; index < input.length(); index++) {
final char ch = input.charAt(index);
// ignore whitespace inside a name
if (Character.isWhitespace(ch)) {
continue;
}
final String inputContext = input.substring(index);
final List<Rule> rules = RULES.get(ch);
if (rules == null) {
continue;
}
// use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
@SuppressWarnings("unchecked")
final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST;
for (final Rule rule : rules) {
if (rule.matches(inputContext)) {
if (branching) {
nextBranches.clear();
}
final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0');
final boolean branchingRequired = replacements.length > 1 && branching;
for (final Branch branch : currentBranches) {
for (final String nextReplacement : replacements) {
// if we have multiple replacements, always create a new branch
final Branch nextBranch = branchingRequired ? branch.createBranch() : branch;
// special rule: occurrences of mn or nm are treated differently
final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');
nextBranch.processNextReplacement(nextReplacement, force);
if (branching) {
nextBranches.add(nextBranch);
} else {
break;
}
}
}
if (branching) {
currentBranches.clear();
currentBranches.addAll(nextBranches);
}
index += rule.getPatternLength() - 1;
break;
}
}
lastChar = ch;
}
final String[] result = new String[currentBranches.size()];
int index = 0;
for (final Branch branch : currentBranches) {
branch.finish();
result[index++] = branch.toString();
}
return result;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,426 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import java.util.Locale;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977.
*
* This class is immutable and thread-safe.
*
* @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a>
* @since 1.8
*/
public class MatchRatingApproachEncoder implements StringEncoder {
private static final String SPACE = " ";
private static final String EMPTY = "";
/**
* Constants used mainly for the min rating value.
*/
private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, EIGHT = 8,
ELEVEN = 11, TWELVE = 12;
/**
* The plain letter equivalent of the accented letters.
*/
private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave
"AaEeIiOoUuYy" + // acute
"AaEeIiOoUuYy" + // circumflex
"AaOoNn" + // tilde
"AaEeIiOoUuYy" + // umlaut
"Aa" + // ring
"Cc" + // cedilla
"OoUu"; // double acute
/**
* Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc...
*/
private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" +
"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" +
"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" +
"\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" +
"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" +
"\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171";
private static final String[] DOUBLE_CONSONANT =
new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS",
"TT", "VV", "WW", "XX", "YY", "ZZ" };
/**
* Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any
* spaces.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param name
* The name to be cleaned
* @return The cleaned name
*/
String cleanName(final String name) {
String upperName = name.toUpperCase(Locale.ENGLISH);
final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" };
for (final String str : charsToTrim) {
upperName = upperName.replaceAll(str, EMPTY);
}
upperName = removeAccents(upperName);
upperName = upperName.replaceAll("\\s+", EMPTY);
return upperName;
}
/**
* Encodes an Object using the Match Rating Approach algorithm. Method is here to satisfy the requirements of the
* Encoder interface Throws an EncoderException if input object is not of type java.lang.String.
*
* @param pObject
* Object to encode
* @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the
* String supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
*/
@Override
public final Object encode(final Object pObject) throws EncoderException {
if (!(pObject instanceof String)) {
throw new EncoderException(
"Parameter supplied to Match Rating Approach encoder is not of type java.lang.String");
}
return encode((String) pObject);
}
/**
* Encodes a String using the Match Rating Approach (MRA) algorithm.
*
* @param name
* String object to encode
* @return The MRA code corresponding to the String supplied
*/
@Override
public final String encode(String name) {
// Bulletproof for trivial input - NINO
if (name == null || EMPTY.equalsIgnoreCase(name) || SPACE.equalsIgnoreCase(name) || name.length() == 1) {
return EMPTY;
}
// Preprocessing
name = cleanName(name);
// BEGIN: Actual encoding part of the algorithm...
// 1. Delete all vowels unless the vowel begins the word
name = removeVowels(name);
// 2. Remove second consonant from any double consonant
name = removeDoubleConsonants(name);
// 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
name = getFirst3Last3(name);
return name;
}
/**
* Gets the first and last 3 letters of a name (if &gt; 6 characters) Else just returns the name.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param name
* The string to get the substrings from
* @return Annexed first and last 3 letters of input word.
*/
String getFirst3Last3(final String name) {
final int nameLength = name.length();
if (nameLength > SIX) {
final String firstThree = name.substring(0, THREE);
final String lastThree = name.substring(nameLength - THREE, nameLength);
return firstThree + lastThree;
} else {
return name;
}
}
/**
* Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the
* min rating. Values strictly from documentation.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param sumLength
* The length of 2 strings sent down
* @return The min rating value
*/
int getMinRating(final int sumLength) {
int minRating = 0;
if (sumLength <= FOUR) {
minRating = FIVE;
} else if (sumLength >= FIVE && sumLength <= SEVEN) {
minRating = FOUR;
} else if (sumLength >= EIGHT && sumLength <= ELEVEN) {
minRating = THREE;
} else if (sumLength == TWELVE) {
minRating = TWO;
} else {
minRating = ONE; // docs said little here.
}
return minRating;
}
/**
* Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the
* strings are cleaned in the same way as {@link #encode(String)}.
*
* @param name1
* First of the 2 strings (names) to compare
* @param name2
* Second of the 2 names to compare
* @return <code>true</code> if the encodings are identical <code>false</code> otherwise.
*/
public boolean isEncodeEquals(String name1, String name2) {
// Bulletproof for trivial input - NINO
if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) {
return false;
} else if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) {
return false;
} else if (name1.length() == 1 || name2.length() == 1) {
return false;
} else if (name1.equalsIgnoreCase(name2)) {
return true;
}
// Preprocessing
name1 = cleanName(name1);
name2 = cleanName(name2);
// Actual MRA Algorithm
// 1. Remove vowels
name1 = removeVowels(name1);
name2 = removeVowels(name2);
// 2. Remove double consonants
name1 = removeDoubleConsonants(name1);
name2 = removeDoubleConsonants(name2);
// 3. Reduce down to 3 letters
name1 = getFirst3Last3(name1);
name2 = getFirst3Last3(name2);
// 4. Check for length difference - if 3 or greater then no similarity
// comparison is done
if (Math.abs(name1.length() - name2.length()) >= THREE) {
return false;
}
// 5. Obtain the minimum rating value by calculating the length sum of the
// encoded Strings and sending it down.
final int sumLength = Math.abs(name1.length() + name2.length());
int minRating = 0;
minRating = getMinRating(sumLength);
// 6. Process the encoded Strings from left to right and remove any
// identical characters found from both Strings respectively.
final int count = leftToRightThenRightToLeftProcessing(name1, name2);
// 7. Each PNI item that has a similarity rating equal to or greater than
// the min is considered to be a good candidate match
return count >= minRating;
}
/**
* Determines if a letter is a vowel.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param letter
* The letter under investiagtion
* @return True if a vowel, else false
*/
boolean isVowel(final String letter) {
return letter.equalsIgnoreCase("E") || letter.equalsIgnoreCase("A") || letter.equalsIgnoreCase("O") ||
letter.equalsIgnoreCase("I") || letter.equalsIgnoreCase("U");
}
/**
* Processes the names from left to right (first) then right to left removing identical letters in same positions.
* Then subtracts the longer string that remains from 6 and returns this.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param name1
* name2
* @return
*/
int leftToRightThenRightToLeftProcessing(final String name1, final String name2) {
final char[] name1Char = name1.toCharArray();
final char[] name2Char = name2.toCharArray();
final int name1Size = name1.length() - 1;
final int name2Size = name2.length() - 1;
String name1LtRStart = EMPTY;
String name1LtREnd = EMPTY;
String name2RtLStart = EMPTY;
String name2RtLEnd = EMPTY;
for (int i = 0; i < name1Char.length; i++) {
if (i > name2Size) {
break;
}
name1LtRStart = name1.substring(i, i + 1);
name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1);
name2RtLStart = name2.substring(i, i + 1);
name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1);
// Left to right...
if (name1LtRStart.equals(name2RtLStart)) {
name1Char[i] = ' ';
name2Char[i] = ' ';
}
// Right to left...
if (name1LtREnd.equals(name2RtLEnd)) {
name1Char[name1Size - i] = ' ';
name2Char[name2Size - i] = ' ';
}
}
// Char arrays -> string & remove extraneous space
final String strA = new String(name1Char).replaceAll("\\s+", EMPTY);
final String strB = new String(name2Char).replaceAll("\\s+", EMPTY);
// Final bit - subtract longest string from 6 and return this int value
if (strA.length() > strB.length()) {
return Math.abs(SIX - strA.length());
} else {
return Math.abs(SIX - strB.length());
}
}
/**
* Removes accented letters and replaces with non-accented ascii equivalent Case is preserved.
* http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
*
* @param accentedWord
* The word that may have accents in it.
* @return De-accented word
*/
String removeAccents(final String accentedWord) {
if (accentedWord == null) {
return null;
}
final StringBuilder sb = new StringBuilder();
final int n = accentedWord.length();
for (int i = 0; i < n; i++) {
final char c = accentedWord.charAt(i);
final int pos = UNICODE.indexOf(c);
if (pos > -1) {
sb.append(PLAIN_ASCII.charAt(pos));
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* Replaces any double consonant pair with the single letter equivalent.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param name
* String to have double consonants removed
* @return Single consonant word
*/
String removeDoubleConsonants(final String name) {
String replacedName = name.toUpperCase();
for (final String dc : DOUBLE_CONSONANT) {
if (replacedName.contains(dc)) {
final String singleLetter = dc.substring(0, 1);
replacedName = replacedName.replace(dc, singleLetter);
}
}
return replacedName;
}
/**
* Deletes all vowels unless the vowel begins the word.
*
* <h2>API Usage</h2>
* <p>
* Consider this method private, it is package protected for unit testing only.
* </p>
*
* @param name
* The name to have vowels removed
* @return De-voweled word
*/
String removeVowels(String name) {
// Extract first letter
final String firstLetter = name.substring(0, 1);
name = name.replaceAll("A", EMPTY);
name = name.replaceAll("E", EMPTY);
name = name.replaceAll("I", EMPTY);
name = name.replaceAll("O", EMPTY);
name = name.replaceAll("U", EMPTY);
name = name.replaceAll("\\s{2,}\\b", SPACE);
// return isVowel(firstLetter) ? (firstLetter + name) : name;
if (isVowel(firstLetter)) {
return firstLetter + name;
} else {
return name;
}
}
}

View File

@ -0,0 +1,430 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Metaphone value.
* <p>
* Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>.
* Permission given by <CITE>wbrogden</CITE> for code to be used anywhere.
* <p>
* <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990,
* p 39.</CITE>
* <p>
* Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
* </p>
* <ul>
* <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a>
* (broken link 4/30/2013) </li>
* <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a>
* (link checked 4/30/2013) </li>
* </ul>
* <p>
* They have had undocumented changes from the originally published algorithm.
* For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
* <p>
* This class is conditionally thread-safe.
* The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)}
* but is not volatile, and accesses are not synchronized.
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
* is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)}
* after initial setup.
*
* @version $Id$
*/
public class Metaphone implements StringEncoder {
/**
* Five values in the English language
*/
private static final String VOWELS = "AEIOU";
/**
* Variable used in Metaphone algorithm
*/
private static final String FRONTV = "EIY";
/**
* Variable used in Metaphone algorithm
*/
private static final String VARSON = "CSPTG";
/**
* The max code length for metaphone is 4
*/
private int maxCodeLen = 4;
/**
* Creates an instance of the Metaphone encoder
*/
public Metaphone() {
super();
}
/**
* Find the metaphone value of a String. This is similar to the
* soundex algorithm, but better at finding similar sounding words.
* All input is converted to upper case.
* Limitations: Input format is expected to be a single ASCII word
* with only characters in the A - Z range, no punctuation or numbers.
*
* @param txt String to find the metaphone code for
* @return A metaphone code corresponding to the String supplied
*/
public String metaphone(final String txt) {
boolean hard = false;
int txtLength;
if (txt == null || (txtLength = txt.length()) == 0) {
return "";
}
// single character is itself
if (txtLength == 1) {
return txt.toUpperCase(java.util.Locale.ENGLISH);
}
final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray();
final StringBuilder local = new StringBuilder(40); // manipulate
final StringBuilder code = new StringBuilder(10); // output
// handle initial 2 characters exceptions
switch(inwd[0]) {
case 'K':
case 'G':
case 'P': /* looking for KN, etc*/
if (inwd[1] == 'N') {
local.append(inwd, 1, inwd.length - 1);
} else {
local.append(inwd);
}
break;
case 'A': /* looking for AE */
if (inwd[1] == 'E') {
local.append(inwd, 1, inwd.length - 1);
} else {
local.append(inwd);
}
break;
case 'W': /* looking for WR or WH */
if (inwd[1] == 'R') { // WR -> R
local.append(inwd, 1, inwd.length - 1);
break;
}
if (inwd[1] == 'H') {
local.append(inwd, 1, inwd.length - 1);
local.setCharAt(0, 'W'); // WH -> W
} else {
local.append(inwd);
}
break;
case 'X': /* initial X becomes S */
inwd[0] = 'S';
local.append(inwd);
break;
default:
local.append(inwd);
} // now local has working string with initials fixed
final int wdsz = local.length();
int n = 0;
while (code.length() < this.getMaxCodeLen() &&
n < wdsz ) { // max code size of 4 works well
final char symb = local.charAt(n);
// remove duplicate letters except C
if (symb != 'C' && isPreviousChar( local, n, symb ) ) {
n++;
} else { // not dup
switch(symb) {
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
if (n == 0) {
code.append(symb);
}
break; // only use vowel if leading char
case 'B':
if ( isPreviousChar(local, n, 'M') &&
isLastChar(wdsz, n) ) { // B is silent if word ends in MB
break;
}
code.append(symb);
break;
case 'C': // lots of C special cases
/* discard if SCI, SCE or SCY */
if ( isPreviousChar(local, n, 'S') &&
!isLastChar(wdsz, n) &&
FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) {
break;
}
if (regionMatch(local, n, "CIA")) { // "CIA" -> X
code.append('X');
break;
}
if (!isLastChar(wdsz, n) &&
FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
code.append('S');
break; // CI,CE,CY -> S
}
if (isPreviousChar(local, n, 'S') &&
isNextChar(local, n, 'H') ) { // SCH->sk
code.append('K');
break;
}
if (isNextChar(local, n, 'H')) { // detect CH
if (n == 0 &&
wdsz >= 3 &&
isVowel(local,2) ) { // CH consonant -> K consonant
code.append('K');
} else {
code.append('X'); // CHvowel -> X
}
} else {
code.append('K');
}
break;
case 'D':
if (!isLastChar(wdsz, n + 1) &&
isNextChar(local, n, 'G') &&
FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
code.append('J'); n += 2;
} else {
code.append('T');
}
break;
case 'G': // GH silent at end or before consonant
if (isLastChar(wdsz, n + 1) &&
isNextChar(local, n, 'H')) {
break;
}
if (!isLastChar(wdsz, n + 1) &&
isNextChar(local,n,'H') &&
!isVowel(local,n+2)) {
break;
}
if (n > 0 &&
( regionMatch(local, n, "GN") ||
regionMatch(local, n, "GNED") ) ) {
break; // silent G
}
if (isPreviousChar(local, n, 'G')) {
// NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
hard = true;
} else {
hard = false;
}
if (!isLastChar(wdsz, n) &&
FRONTV.indexOf(local.charAt(n + 1)) >= 0 &&
!hard) {
code.append('J');
} else {
code.append('K');
}
break;
case 'H':
if (isLastChar(wdsz, n)) {
break; // terminal H
}
if (n > 0 &&
VARSON.indexOf(local.charAt(n - 1)) >= 0) {
break;
}
if (isVowel(local,n+1)) {
code.append('H'); // Hvowel
}
break;
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
code.append(symb);
break;
case 'K':
if (n > 0) { // not initial
if (!isPreviousChar(local, n, 'C')) {
code.append(symb);
}
} else {
code.append(symb); // initial K
}
break;
case 'P':
if (isNextChar(local,n,'H')) {
// PH -> F
code.append('F');
} else {
code.append(symb);
}
break;
case 'Q':
code.append('K');
break;
case 'S':
if (regionMatch(local,n,"SH") ||
regionMatch(local,n,"SIO") ||
regionMatch(local,n,"SIA")) {
code.append('X');
} else {
code.append('S');
}
break;
case 'T':
if (regionMatch(local,n,"TIA") ||
regionMatch(local,n,"TIO")) {
code.append('X');
break;
}
if (regionMatch(local,n,"TCH")) {
// Silent if in "TCH"
break;
}
// substitute numeral 0 for TH (resembles theta after all)
if (regionMatch(local,n,"TH")) {
code.append('0');
} else {
code.append('T');
}
break;
case 'V':
code.append('F'); break;
case 'W':
case 'Y': // silent if not followed by vowel
if (!isLastChar(wdsz,n) &&
isVowel(local,n+1)) {
code.append(symb);
}
break;
case 'X':
code.append('K');
code.append('S');
break;
case 'Z':
code.append('S');
break;
default:
// do nothing
break;
} // end switch
n++;
} // end else from symb != 'C'
if (code.length() > this.getMaxCodeLen()) {
code.setLength(this.getMaxCodeLen());
}
}
return code.toString();
}
private boolean isVowel(final StringBuilder string, final int index) {
return VOWELS.indexOf(string.charAt(index)) >= 0;
}
private boolean isPreviousChar(final StringBuilder string, final int index, final char c) {
boolean matches = false;
if( index > 0 &&
index < string.length() ) {
matches = string.charAt(index - 1) == c;
}
return matches;
}
private boolean isNextChar(final StringBuilder string, final int index, final char c) {
boolean matches = false;
if( index >= 0 &&
index < string.length() - 1 ) {
matches = string.charAt(index + 1) == c;
}
return matches;
}
private boolean regionMatch(final StringBuilder string, final int index, final String test) {
boolean matches = false;
if( index >= 0 &&
index + test.length() - 1 < string.length() ) {
final String substring = string.substring( index, index + test.length());
matches = substring.equals( test );
}
return matches;
}
private boolean isLastChar(final int wdsz, final int n) {
return n + 1 == wdsz;
}
/**
* Encodes an Object using the metaphone algorithm. This method
* is provided in order to satisfy the requirements of the
* Encoder interface, and will throw an EncoderException if the
* supplied object is not of type java.lang.String.
*
* @param obj Object to encode
* @return An object (or type java.lang.String) containing the
* metaphone code which corresponds to the String supplied.
* @throws EncoderException if the parameter supplied is not
* of type java.lang.String
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
}
return metaphone((String) obj);
}
/**
* Encodes a String using the Metaphone algorithm.
*
* @param str String object to encode
* @return The metaphone code corresponding to the String supplied
*/
@Override
public String encode(final String str) {
return metaphone(str);
}
/**
* Tests is the metaphones of two strings are identical.
*
* @param str1 First of two strings to compare
* @param str2 Second of two strings to compare
* @return <code>true</code> if the metaphones of these strings are identical,
* <code>false</code> otherwise.
*/
public boolean isMetaphoneEqual(final String str1, final String str2) {
return metaphone(str1).equals(metaphone(str2));
}
/**
* Returns the maxCodeLen.
* @return int
*/
public int getMaxCodeLen() { return this.maxCodeLen; }
/**
* Sets the maxCodeLen.
* @param maxCodeLen The maxCodeLen to set
*/
public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
}

View File

@ -0,0 +1,319 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import java.util.regex.Pattern;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate similar names, but can also be used as a
* general purpose scheme to find word with similar phonemes.
* <p>
* NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm.
* <p>
* Algorithm description:
* <pre>
* 1. Transcode first characters of name
* 1a. MAC -&gt; MCC
* 1b. KN -&gt; NN
* 1c. K -&gt; C
* 1d. PH -&gt; FF
* 1e. PF -&gt; FF
* 1f. SCH -&gt; SSS
* 2. Transcode last characters of name
* 2a. EE, IE -&gt; Y
* 2b. DT,RT,RD,NT,ND -&gt; D
* 3. First character of key = first character of name
* 4. Transcode remaining characters by following these rules, incrementing by one character each time
* 4a. EV -&gt; AF else A,E,I,O,U -&gt; A
* 4b. Q -&gt; G
* 4c. Z -&gt; S
* 4d. M -&gt; N
* 4e. KN -&gt; N else K -&gt; C
* 4f. SCH -&gt; SSS
* 4g. PH -&gt; FF
* 4h. H -&gt; If previous or next is nonvowel, previous
* 4i. W -&gt; If previous is vowel, previous
* 4j. Add current to key if current != last key character
* 5. If last character is S, remove it
* 6. If last characters are AY, replace with Y
* 7. If last character is A, remove it
* 8. Collapse all strings of repeated characters
* 9. Add original first character of name as first character of key
* </pre>
* <p>
* This class is immutable and thread-safe.
*
* @see <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a>
* @see <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a>
* @see Soundex
* @since 1.7
* @version $Id$
*/
public class Nysiis implements StringEncoder {
private static final char[] CHARS_A = new char[] { 'A' };
private static final char[] CHARS_AF = new char[] { 'A', 'F' };
private static final char[] CHARS_C = new char[] { 'C' };
private static final char[] CHARS_FF = new char[] { 'F', 'F' };
private static final char[] CHARS_G = new char[] { 'G' };
private static final char[] CHARS_N = new char[] { 'N' };
private static final char[] CHARS_NN = new char[] { 'N', 'N' };
private static final char[] CHARS_S = new char[] { 'S' };
private static final char[] CHARS_SSS = new char[] { 'S', 'S', 'S' };
private static final Pattern PAT_MAC = Pattern.compile("^MAC");
private static final Pattern PAT_KN = Pattern.compile("^KN");
private static final Pattern PAT_K = Pattern.compile("^K");
private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)");
private static final Pattern PAT_SCH = Pattern.compile("^SCH");
private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$");
private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$");
private static final char SPACE = ' ';
private static final int TRUE_LENGTH = 6;
/**
* Tests if the given character is a vowel.
*
* @param c
* the character to test
* @return <code>true</code> if the character is a vowel, <code>false</code> otherwise
*/
private static boolean isVowel(final char c) {
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
}
/**
* Transcodes the remaining parts of the String. The method operates on a sliding window, looking at 4 characters at
* a time: [i-1, i, i+1, i+2].
*
* @param prev
* the previous character
* @param curr
* the current character
* @param next
* the next character
* @param aNext
* the after next character
* @return a transcoded array of characters, starting from the current position
*/
private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) {
// 1. EV -> AF
if (curr == 'E' && next == 'V') {
return CHARS_AF;
}
// A, E, I, O, U -> A
if (isVowel(curr)) {
return CHARS_A;
}
// 2. Q -> G, Z -> S, M -> N
if (curr == 'Q') {
return CHARS_G;
} else if (curr == 'Z') {
return CHARS_S;
} else if (curr == 'M') {
return CHARS_N;
}
// 3. KN -> NN else K -> C
if (curr == 'K') {
if (next == 'N') {
return CHARS_NN;
} else {
return CHARS_C;
}
}
// 4. SCH -> SSS
if (curr == 'S' && next == 'C' && aNext == 'H') {
return CHARS_SSS;
}
// PH -> FF
if (curr == 'P' && next == 'H') {
return CHARS_FF;
}
// 5. H -> If previous or next is a non vowel, previous.
if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) {
return new char[] { prev };
}
// 6. W -> If previous is vowel, previous.
if (curr == 'W' && isVowel(prev)) {
return new char[] { prev };
}
return new char[] { curr };
}
/** Indicates the strict mode. */
private final boolean strict;
/**
* Creates an instance of the {@link Nysiis} encoder with strict mode (original form),
* i.e. encoded strings have a maximum length of 6.
*/
public Nysiis() {
this(true);
}
/**
* Create an instance of the {@link Nysiis} encoder with the specified strict mode:
*
* <ul>
* <li><code>true</code>: encoded strings have a maximum length of 6</li>
* <li><code>false</code>: encoded strings may have arbitrary length</li>
* </ul>
*
* @param strict
* the strict mode
*/
public Nysiis(final boolean strict) {
this.strict = strict;
}
/**
* Encodes an Object using the NYSIIS algorithm. This method is provided in order to satisfy the requirements of the
* Encoder interface, and will throw an {@link EncoderException} if the supplied object is not of type
* {@link String}.
*
* @param obj
* Object to encode
* @return An object (or a {@link String}) containing the NYSIIS code which corresponds to the given String.
* @throws EncoderException
* if the parameter supplied is not of a {@link String}
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String");
}
return this.nysiis((String) obj);
}
/**
* Encodes a String using the NYSIIS algorithm.
*
* @param str
* A String object to encode
* @return A Nysiis code corresponding to the String supplied
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public String encode(final String str) {
return this.nysiis(str);
}
/**
* Indicates the strict mode for this {@link Nysiis} encoder.
*
* @return <code>true</code> if the encoder is configured for strict mode, <code>false</code> otherwise
*/
public boolean isStrict() {
return this.strict;
}
/**
* Retrieves the NYSIIS code for a given String object.
*
* @param str
* String to encode using the NYSIIS algorithm
* @return A NYSIIS code for the String supplied
*/
public String nysiis(String str) {
if (str == null) {
return null;
}
// Use the same clean rules as Soundex
str = SoundexUtils.clean(str);
if (str.length() == 0) {
return str;
}
// Translate first characters of name:
// MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
str = PAT_MAC.matcher(str).replaceFirst("MCC");
str = PAT_KN.matcher(str).replaceFirst("NN");
str = PAT_K.matcher(str).replaceFirst("C");
str = PAT_PH_PF.matcher(str).replaceFirst("FF");
str = PAT_SCH.matcher(str).replaceFirst("SSS");
// Translate last characters of name:
// EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
str = PAT_EE_IE.matcher(str).replaceFirst("Y");
str = PAT_DT_ETC.matcher(str).replaceFirst("D");
// First character of key = first character of name.
final StringBuilder key = new StringBuilder(str.length());
key.append(str.charAt(0));
// Transcode remaining characters, incrementing by one character each time
final char[] chars = str.toCharArray();
final int len = chars.length;
for (int i = 1; i < len; i++) {
final char next = i < len - 1 ? chars[i + 1] : SPACE;
final char aNext = i < len - 2 ? chars[i + 2] : SPACE;
final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext);
System.arraycopy(transcoded, 0, chars, i, transcoded.length);
// only append the current char to the key if it is different from the last one
if (chars[i] != chars[i - 1]) {
key.append(chars[i]);
}
}
if (key.length() > 1) {
char lastChar = key.charAt(key.length() - 1);
// If last character is S, remove it.
if (lastChar == 'S') {
key.deleteCharAt(key.length() - 1);
lastChar = key.charAt(key.length() - 1);
}
if (key.length() > 2) {
final char last2Char = key.charAt(key.length() - 2);
// If last characters are AY, replace with Y.
if (last2Char == 'A' && lastChar == 'Y') {
key.deleteCharAt(key.length() - 2);
}
}
// If last character is A, remove it.
if (lastChar == 'A') {
key.deleteCharAt(key.length() - 1);
}
}
final String string = key.toString();
return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string;
}
}

View File

@ -0,0 +1,205 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Refined Soundex value. A refined soundex code is
* optimized for spell checking words. Soundex method originally developed by
* <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>.
*
* <p>This class is immutable and thread-safe.</p>
*
* @version $Id$
*/
public class RefinedSoundex implements StringEncoder {
/**
* @since 1.4
*/
public static final String US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";
/**
* RefinedSoundex is *refined* for a number of reasons one being that the
* mappings have been altered. This implementation contains default
* mappings for US English.
*/
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray();
/**
* Every letter of the alphabet is "mapped" to a numerical value. This char
* array holds the values to which each letter is mapped. This
* implementation contains a default map for US_ENGLISH
*/
private final char[] soundexMapping;
/**
* This static variable contains an instance of the RefinedSoundex using
* the US_ENGLISH mapping.
*/
public static final RefinedSoundex US_ENGLISH = new RefinedSoundex();
/**
* Creates an instance of the RefinedSoundex object using the default US
* English mapping.
*/
public RefinedSoundex() {
this.soundexMapping = US_ENGLISH_MAPPING;
}
/**
* Creates a refined soundex instance using a custom mapping. This
* constructor can be used to customize the mapping, and/or possibly
* provide an internationalized mapping for a non-Western character set.
*
* @param mapping
* Mapping array to use when finding the corresponding code for
* a given character
*/
public RefinedSoundex(final char[] mapping) {
this.soundexMapping = new char[mapping.length];
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length);
}
/**
* Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping,
* and/or possibly provide an internationalized mapping for a non-Western character set.
*
* @param mapping
* Mapping string to use when finding the corresponding code for a given character
* @since 1.4
*/
public RefinedSoundex(final String mapping) {
this.soundexMapping = mapping.toCharArray();
}
/**
* Returns the number of characters in the two encoded Strings that are the
* same. This return value ranges from 0 to the length of the shortest
* encoded String: 0 indicates little or no similarity, and 4 out of 4 (for
* example) indicates strong similarity or identical values. For refined
* Soundex, the return value can be greater than 4.
*
* @param s1
* A String that will be encoded and compared.
* @param s2
* A String that will be encoded and compared.
* @return The number of characters in the two encoded Strings that are the
* same from 0 to to the length of the shortest encoded String.
*
* @see SoundexUtils#difference(StringEncoder,String,String)
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
* MS T-SQL DIFFERENCE</a>
*
* @throws EncoderException
* if an error occurs encoding one of the strings
* @since 1.3
*/
public int difference(final String s1, final String s2) throws EncoderException {
return SoundexUtils.difference(this, s1, s2);
}
/**
* Encodes an Object using the refined soundex algorithm. This method is
* provided in order to satisfy the requirements of the Encoder interface,
* and will throw an EncoderException if the supplied object is not of type
* java.lang.String.
*
* @param obj
* Object to encode
* @return An object (or type java.lang.String) containing the refined
* soundex code which corresponds to the String supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String");
}
return soundex((String) obj);
}
/**
* Encodes a String using the refined soundex algorithm.
*
* @param str
* A String object to encode
* @return A Soundex code corresponding to the String supplied
*/
@Override
public String encode(final String str) {
return soundex(str);
}
/**
* Returns the mapping code for a given character. The mapping codes are
* maintained in an internal char array named soundexMapping, and the
* default values of these mappings are US English.
*
* @param c
* char to get mapping for
* @return A character (really a numeral) to return for the given char
*/
char getMappingCode(final char c) {
if (!Character.isLetter(c)) {
return 0;
}
return this.soundexMapping[Character.toUpperCase(c) - 'A'];
}
/**
* Retrieves the Refined Soundex code for a given String object.
*
* @param str
* String to encode using the Refined Soundex algorithm
* @return A soundex code for the String supplied
*/
public String soundex(String str) {
if (str == null) {
return null;
}
str = SoundexUtils.clean(str);
if (str.length() == 0) {
return str;
}
final StringBuilder sBuf = new StringBuilder();
sBuf.append(str.charAt(0));
char last, current;
last = '*';
for (int i = 0; i < str.length(); i++) {
current = getMappingCode(str.charAt(i));
if (current == last) {
continue;
} else if (current != 0) {
sBuf.append(current);
}
last = current;
}
return sBuf.toString();
}
}

View File

@ -0,0 +1,254 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a
* general purpose scheme to find word with similar phonemes.
*
* This class is thread-safe.
* Although not strictly immutable, the {@link #maxLength} field is not actually used.
*
* @version $Id$
*/
public class Soundex implements StringEncoder {
/**
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
* means do not encode.
* <p>
* (This constant is provided as both an implementation convenience and to allow Javadoc to pick
* up the value for the constant values page.)
* </p>
*
* @see #US_ENGLISH_MAPPING
*/
public static final String US_ENGLISH_MAPPING_STRING = "0123012#02245501262301#202";
/**
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
* means do not encode.
*
* @see Soundex#Soundex(char[])
*/
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray();
/**
* An instance of Soundex using the US_ENGLISH_MAPPING mapping.
*
* @see #US_ENGLISH_MAPPING
*/
public static final Soundex US_ENGLISH = new Soundex();
/**
* The maximum length of a Soundex code - Soundex codes are only four characters by definition.
*
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
*/
@Deprecated
private int maxLength = 4;
/**
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
* letter is mapped. This implementation contains a default map for US_ENGLISH
*/
private final char[] soundexMapping;
/**
* Creates an instance using US_ENGLISH_MAPPING
*
* @see Soundex#Soundex(char[])
* @see Soundex#US_ENGLISH_MAPPING
*/
public Soundex() {
this.soundexMapping = US_ENGLISH_MAPPING;
}
/**
* Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized
* mapping for a non-Western character set.
*
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
* letter is mapped. This implementation contains a default map for US_ENGLISH
*
* @param mapping
* Mapping array to use when finding the corresponding code for a given character
*/
public Soundex(final char[] mapping) {
this.soundexMapping = new char[mapping.length];
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length);
}
/**
* Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping,
* and/or possibly provide an internationalized mapping for a non-Western character set.
*
* @param mapping
* Mapping string to use when finding the corresponding code for a given character
* @since 1.4
*/
public Soundex(final String mapping) {
this.soundexMapping = mapping.toCharArray();
}
/**
* Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This
* return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or
* identical values.
*
* @param s1
* A String that will be encoded and compared.
* @param s2
* A String that will be encoded and compared.
* @return The number of characters in the two encoded Strings that are the same from 0 to 4.
*
* @see SoundexUtils#difference(StringEncoder,String,String)
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS
* T-SQL DIFFERENCE </a>
*
* @throws EncoderException
* if an error occurs encoding one of the strings
* @since 1.3
*/
public int difference(final String s1, final String s2) throws EncoderException {
return SoundexUtils.difference(this, s1, s2);
}
/**
* Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
*
* @param obj
* Object to encode
* @return An object (or type java.lang.String) containing the soundex code which corresponds to the String
* supplied.
* @throws EncoderException
* if the parameter supplied is not of type java.lang.String
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String");
}
return soundex((String) obj);
}
/**
* Encodes a String using the soundex algorithm.
*
* @param str
* A String object to encode
* @return A Soundex code corresponding to the String supplied
* @throws IllegalArgumentException
* if a character is not mapped
*/
@Override
public String encode(final String str) {
return soundex(str);
}
/**
* Returns the maxLength. Standard Soundex
*
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
* @return int
*/
@Deprecated
public int getMaxLength() {
return this.maxLength;
}
/**
* Returns the soundex mapping.
*
* @return soundexMapping.
*/
private char[] getSoundexMapping() {
return this.soundexMapping;
}
/**
* Maps the given upper-case character to its Soundex code.
*
* @param ch
* An upper-case character.
* @return A Soundex code.
* @throws IllegalArgumentException
* Thrown if <code>ch</code> is not mapped.
*/
private char map(final char ch) {
final int index = ch - 'A';
if (index < 0 || index >= this.getSoundexMapping().length) {
throw new IllegalArgumentException("The character is not mapped: " + ch);
}
return this.getSoundexMapping()[index];
}
/**
* Sets the maxLength.
*
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
* @param maxLength
* The maxLength to set
*/
@Deprecated
public void setMaxLength(final int maxLength) {
this.maxLength = maxLength;
}
/**
* Retrieves the Soundex code for a given String object.
*
* @param str
* String to encode using the Soundex algorithm
* @return A soundex code for the String supplied
* @throws IllegalArgumentException
* if a character is not mapped
*/
public String soundex(String str) {
if (str == null) {
return null;
}
str = SoundexUtils.clean(str);
if (str.length() == 0) {
return str;
}
final char out[] = {'0', '0', '0', '0'};
char last, mapped;
int incount = 1, count = 1;
out[0] = str.charAt(0);
// map() throws IllegalArgumentException
last = this.map(str.charAt(0));
while (incount < str.length() && count < out.length) {
mapped = this.map(str.charAt(incount++));
if (mapped == '0') {
last = mapped;
} else if (mapped != '#' && mapped != last) {
out[count++] = mapped;
last = mapped;
}
}
return new String(out);
}
}

View File

@ -0,0 +1,124 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Utility methods for {@link Soundex} and {@link RefinedSoundex} classes.
*
* <p>This class is immutable and thread-safe.</p>
*
* @version $Id$
* @since 1.3
*/
final class SoundexUtils {
/**
* Cleans up the input string before Soundex processing by only returning
* upper case letters.
*
* @param str
* The String to clean.
* @return A clean String.
*/
static String clean(final String str) {
if (str == null || str.length() == 0) {
return str;
}
final int len = str.length();
final char[] chars = new char[len];
int count = 0;
for (int i = 0; i < len; i++) {
if (Character.isLetter(str.charAt(i))) {
chars[count++] = str.charAt(i);
}
}
if (count == len) {
return str.toUpperCase(java.util.Locale.ENGLISH);
}
return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH);
}
/**
* Encodes the Strings and returns the number of characters in the two
* encoded Strings that are the same.
* <ul>
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
* little or no similarity, and 4 indicates strong similarity or identical
* values.</li>
* <li>For refined Soundex, the return value can be greater than 4.</li>
* </ul>
*
* @param encoder
* The encoder to use to encode the Strings.
* @param s1
* A String that will be encoded and compared.
* @param s2
* A String that will be encoded and compared.
* @return The number of characters in the two Soundex encoded Strings that
* are the same.
*
* @see #differenceEncoded(String,String)
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
* MS T-SQL DIFFERENCE</a>
*
* @throws EncoderException
* if an error occurs encoding one of the strings
*/
static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException {
return differenceEncoded(encoder.encode(s1), encoder.encode(s2));
}
/**
* Returns the number of characters in the two Soundex encoded Strings that
* are the same.
* <ul>
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
* little or no similarity, and 4 indicates strong similarity or identical
* values.</li>
* <li>For refined Soundex, the return value can be greater than 4.</li>
* </ul>
*
* @param es1
* An encoded String.
* @param es2
* An encoded String.
* @return The number of characters in the two Soundex encoded Strings that
* are the same.
*
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
* MS T-SQL DIFFERENCE</a>
*/
static int differenceEncoded(final String es1, final String es2) {
if (es1 == null || es2 == null) {
return 0;
}
final int lengthToMatch = Math.min(es1.length(), es2.length());
int diff = 0;
for (int i = 0; i < lengthToMatch; i++) {
if (es1.charAt(i) == es2.charAt(i)) {
diff++;
}
}
return diff;
}
}

View File

@ -0,0 +1,181 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder;
/**
* Encodes strings into their Beider-Morse phonetic encoding.
* <p>
* Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range of
* words.
* <p>
* This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it is mutable,
* and may not be thread-safe. If you require a guaranteed thread-safe encoding then use {@link PhoneticEngine}
* directly.
* <p>
* <b>Encoding overview</b>
* <p>
* Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what
* language the word comes from. For example, if it ends in "<code>ault</code>" then it infers that the word is French.
* Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some runs of
* letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up into phonemes at
* different places, so this stage results in a set of possible language-specific phonetic representations. Lastly, this
* language-specific phonetic representation is processed by a table of rules that re-writes it phonetically taking into
* account systematic pronunciation differences between languages, to move it towards a pan-indo-european phonetic
* representation. Again, sometimes there are multiple ways this could be done and sometimes things that can be
* pronounced in several ways in the source language have only one way to represent them in this average phonetic
* language, so the result is again a set of phonetic spellings.
* <p>
* Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated. In
* this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final encoding.
* Secondly, some names have standard prefixes, for example, "<code>Mac/Mc</code>" in Scottish (English) names. As
* sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word is encoded once
* with the prefix and once without it. The resulting encoding contains one and then the other result.
* <p>
* <b>Encoding format</b>
* <p>
* Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where there
* are multiple possible phonetic representations, these are joined with a pipe (<code>|</code>) character. If multiple
* hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed in elipses and
* these blocks are then joined with hyphens. For example, "<code>d'ortley</code>" has a possible prefix. The form
* without prefix encodes to "<code>ortlaj|ortlej</code>", while the form with prefix encodes to "
* <code>dortlaj|dortlej</code>". Thus, the full, combined encoding is "<code>(ortlaj|ortlej)-(dortlaj|dortlej)</code>".
* <p>
* The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many
* potential phonetic interpretations. For example, "<code>Renault</code>" encodes to "
* <code>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</code>". The <code>APPROX</code> rules will tend to produce larger
* encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word.
* Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
* splitting on pipe (<code>|</code>) and indexing under each of these alternatives.
* <p>
* <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation.
*
* @see <a href="http://stevemorse.org/phonetics/bmpm.htm">Beider-Morse Phonetic Matching</a>
* @see <a href="http://stevemorse.org/phoneticinfo.htm">Reference implementation</a>
*
* @since 1.6
* @version $Id$
*/
public class BeiderMorseEncoder implements StringEncoder {
// Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration
// of an immutable PhoneticEngine instance that will be delegated to for the actual encoding.
// a cached object
private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
@Override
public Object encode(final Object source) throws EncoderException {
if (!(source instanceof String)) {
throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String");
}
return encode((String) source);
}
@Override
public String encode(final String source) throws EncoderException {
if (source == null) {
return null;
}
return this.engine.encode(source);
}
/**
* Gets the name type currently in operation.
*
* @return the NameType currently being used
*/
public NameType getNameType() {
return this.engine.getNameType();
}
/**
* Gets the rule type currently in operation.
*
* @return the RuleType currently being used
*/
public RuleType getRuleType() {
return this.engine.getRuleType();
}
/**
* Discovers if multiple possible encodings are concatenated.
*
* @return true if multiple encodings are concatenated, false if just the first one is returned
*/
public boolean isConcat() {
return this.engine.isConcat();
}
/**
* Sets how multiple possible phonetic encodings are combined.
*
* @param concat
* true if multiple encodings are to be combined with a '|', false if just the first one is
* to be considered
*/
public void setConcat(final boolean concat) {
this.engine = new PhoneticEngine(this.engine.getNameType(),
this.engine.getRuleType(),
concat,
this.engine.getMaxPhonemes());
}
/**
* Sets the type of name. Use {@link NameType#GENERIC} unless you specifically want phonetic encodings
* optimized for Ashkenazi or Sephardic Jewish family names.
*
* @param nameType
* the NameType in use
*/
public void setNameType(final NameType nameType) {
this.engine = new PhoneticEngine(nameType,
this.engine.getRuleType(),
this.engine.isConcat(),
this.engine.getMaxPhonemes());
}
/**
* Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered.
*
* @param ruleType
* {@link RuleType#APPROX} or {@link RuleType#EXACT} for approximate or exact phonetic matches
*/
public void setRuleType(final RuleType ruleType) {
this.engine = new PhoneticEngine(this.engine.getNameType(),
ruleType,
this.engine.isConcat(),
this.engine.getMaxPhonemes());
}
/**
* Sets the number of maximum of phonemes that shall be considered by the engine.
*
* @param maxPhonemes
* the maximum number of phonemes returned by the engine
* @since 1.7
*/
public void setMaxPhonemes(final int maxPhonemes) {
this.engine = new PhoneticEngine(this.engine.getNameType(),
this.engine.getRuleType(),
this.engine.isConcat(),
maxPhonemes);
}
}

View File

@ -0,0 +1,231 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Pattern;
/**
* Language guessing utility.
* <p>
* This class encapsulates rules used to guess the possible languages that a word originates from. This is
* done by reference to a whole series of rules distributed in resource files.
* <p>
* Instances of this class are typically managed through the static factory method instance().
* Unless you are developing your own language guessing rules, you will not need to interact with this class directly.
* <p>
* This class is intended to be immutable and thread-safe.
* <p>
* <b>Lang resources</b>
* <p>
* Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files.
* They are systematically named following the pattern:
* <blockquote>org/apache/commons/codec/language/bm/lang.txt</blockquote>
* The format of these resources is the following:
* <ul>
* <li><b>Rules:</b> whitespace separated strings.
* There should be 3 columns to each row, and these will be interpreted as:
* <ol>
* <li>pattern: a regular expression.</li>
* <li>languages: a '+'-separated list of languages.</li>
* <li>acceptOnMatch: 'true' or 'false' indicating if a match rules in or rules out the language.</li>
* </ol>
* </li>
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
* discarded as a comment.</li>
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode.
* This will skip all content until a line ending in '*' and '/' is found.</li>
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
* </ul>
* <p>
* Port of lang.php
*
* @since 1.6
* @version $Id$
*/
public class Lang {
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
// exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that
// encapsulate a particular language-guessing rule table and the language guessing itself.
//
// It may make sense in the future to expose the private constructor to allow power users to build custom language-
// guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users
// should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances.
private static final class LangRule {
private final boolean acceptOnMatch;
private final Set<String> languages;
private final Pattern pattern;
private LangRule(final Pattern pattern, final Set<String> languages, final boolean acceptOnMatch) {
this.pattern = pattern;
this.languages = languages;
this.acceptOnMatch = acceptOnMatch;
}
public boolean matches(final String txt) {
return this.pattern.matcher(txt).find();
}
}
private static final Map<NameType, Lang> Langs = new EnumMap<NameType, Lang>(NameType.class);
private static final String LANGUAGE_RULES_RN = "org/apache/commons/codec/language/bm/%s_lang.txt";
static {
for (final NameType s : NameType.values()) {
Langs.put(s, loadFromResource(String.format(LANGUAGE_RULES_RN, s.getName()), Languages.getInstance(s)));
}
}
/**
* Gets a Lang instance for one of the supported NameTypes.
*
* @param nameType
* the NameType to look up
* @return a Lang encapsulating the language guessing rules for that name type
*/
public static Lang instance(final NameType nameType) {
return Langs.get(nameType);
}
/**
* Loads language rules from a resource.
* <p>
* In normal use, you will obtain instances of Lang through the {@link #instance(NameType)} method.
* You will only need to call this yourself if you are developing custom language mapping rules.
*
* @param languageRulesResourceName
* the fully-qualified resource name to load
* @param languages
* the languages that these rules will support
* @return a Lang encapsulating the loaded language-guessing rules.
*/
public static Lang loadFromResource(final String languageRulesResourceName, final Languages languages) {
final List<LangRule> rules = new ArrayList<LangRule>();
final InputStream lRulesIS = Lang.class.getClassLoader().getResourceAsStream(languageRulesResourceName);
if (lRulesIS == null) {
throw new IllegalStateException("Unable to resolve required resource:" + LANGUAGE_RULES_RN);
}
final Scanner scanner = new Scanner(lRulesIS, ResourceConstants.ENCODING);
try {
boolean inExtendedComment = false;
while (scanner.hasNextLine()) {
final String rawLine = scanner.nextLine();
String line = rawLine;
if (inExtendedComment) {
// check for closing comment marker, otherwise discard doc comment line
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
inExtendedComment = false;
}
} else {
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
inExtendedComment = true;
} else {
// discard comments
final int cmtI = line.indexOf(ResourceConstants.CMT);
if (cmtI >= 0) {
line = line.substring(0, cmtI);
}
// trim leading-trailing whitespace
line = line.trim();
if (line.length() == 0) {
continue; // empty lines can be safely skipped
}
// split it up
final String[] parts = line.split("\\s+");
if (parts.length != 3) {
throw new IllegalArgumentException("Malformed line '" + rawLine +
"' in language resource '" + languageRulesResourceName + "'");
}
final Pattern pattern = Pattern.compile(parts[0]);
final String[] langs = parts[1].split("\\+");
final boolean accept = parts[2].equals("true");
rules.add(new LangRule(pattern, new HashSet<String>(Arrays.asList(langs)), accept));
}
}
}
} finally {
scanner.close();
}
return new Lang(rules, languages);
}
private final Languages languages;
private final List<LangRule> rules;
private Lang(final List<LangRule> rules, final Languages languages) {
this.rules = Collections.unmodifiableList(rules);
this.languages = languages;
}
/**
* Guesses the language of a word.
*
* @param text
* the word
* @return the language that the word originates from or {@link Languages#ANY} if there was no unique match
*/
public String guessLanguage(final String text) {
final Languages.LanguageSet ls = guessLanguages(text);
return ls.isSingleton() ? ls.getAny() : Languages.ANY;
}
/**
* Guesses the languages of a word.
*
* @param input
* the word
* @return a Set of Strings of language names that are potential matches for the input word
*/
public Languages.LanguageSet guessLanguages(final String input) {
final String text = input.toLowerCase(Locale.ENGLISH);
final Set<String> langs = new HashSet<String>(this.languages.getLanguages());
for (final LangRule rule : this.rules) {
if (rule.matches(text)) {
if (rule.acceptOnMatch) {
langs.retainAll(rule.languages);
} else {
langs.removeAll(rule.languages);
}
}
}
final Languages.LanguageSet ls = Languages.LanguageSet.from(langs);
return ls.equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls;
}
}

View File

@ -0,0 +1,295 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import java.io.InputStream;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.Set;
/**
* Language codes.
* <p>
* Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are
* systematically named following the pattern:
* <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote>
* <p>
* The format of these resources is the following:
* <ul>
* <li><b>Language:</b> a single string containing no whitespace</li>
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
* discarded as a comment.</li>
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode.
* This will skip all content until a line ending in '*' and '/' is found.</li>
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
* </ul>
* <p>
* Ported from language.php
* <p>
* This class is immutable and thread-safe.
*
* @since 1.6
* @version $Id$
*/
public class Languages {
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
// exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
// languages, and a second part that provides instance methods for accessing this set for supported languages.
/**
* A set of languages.
*/
public static abstract class LanguageSet {
public static LanguageSet from(final Set<String> langs) {
return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);
}
public abstract boolean contains(String language);
public abstract String getAny();
public abstract boolean isEmpty();
public abstract boolean isSingleton();
public abstract LanguageSet restrictTo(LanguageSet other);
abstract LanguageSet merge(LanguageSet other);
}
/**
* Some languages, explicitly enumerated.
*/
public static final class SomeLanguages extends LanguageSet {
private final Set<String> languages;
private SomeLanguages(final Set<String> languages) {
this.languages = Collections.unmodifiableSet(languages);
}
@Override
public boolean contains(final String language) {
return this.languages.contains(language);
}
@Override
public String getAny() {
return this.languages.iterator().next();
}
public Set<String> getLanguages() {
return this.languages;
}
@Override
public boolean isEmpty() {
return this.languages.isEmpty();
}
@Override
public boolean isSingleton() {
return this.languages.size() == 1;
}
@Override
public LanguageSet restrictTo(final LanguageSet other) {
if (other == NO_LANGUAGES) {
return other;
} else if (other == ANY_LANGUAGE) {
return this;
} else {
final SomeLanguages sl = (SomeLanguages) other;
final Set<String> ls = new HashSet<String>(Math.min(languages.size(), sl.languages.size()));
for (String lang : languages) {
if (sl.languages.contains(lang)) {
ls.add(lang);
}
}
return from(ls);
}
}
@Override
public LanguageSet merge(final LanguageSet other) {
if (other == NO_LANGUAGES) {
return this;
} else if (other == ANY_LANGUAGE) {
return other;
} else {
final SomeLanguages sl = (SomeLanguages) other;
final Set<String> ls = new HashSet<String>(languages);
for (String lang : sl.languages) {
ls.add(lang);
}
return from(ls);
}
}
@Override
public String toString() {
return "Languages(" + languages.toString() + ")";
}
}
public static final String ANY = "any";
private static final Map<NameType, Languages> LANGUAGES = new EnumMap<NameType, Languages>(NameType.class);
static {
for (final NameType s : NameType.values()) {
LANGUAGES.put(s, getInstance(langResourceName(s)));
}
}
public static Languages getInstance(final NameType nameType) {
return LANGUAGES.get(nameType);
}
public static Languages getInstance(final String languagesResourceName) {
// read languages list
final Set<String> ls = new HashSet<String>();
final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName);
if (langIS == null) {
throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName);
}
final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING);
try {
boolean inExtendedComment = false;
while (lsScanner.hasNextLine()) {
final String line = lsScanner.nextLine().trim();
if (inExtendedComment) {
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
inExtendedComment = false;
}
} else {
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
inExtendedComment = true;
} else if (line.length() > 0) {
ls.add(line);
}
}
}
} finally {
lsScanner.close();
}
return new Languages(Collections.unmodifiableSet(ls));
}
private static String langResourceName(final NameType nameType) {
return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName());
}
private final Set<String> languages;
/**
* No languages at all.
*/
public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
@Override
public boolean contains(final String language) {
return false;
}
@Override
public String getAny() {
throw new NoSuchElementException("Can't fetch any language from the empty language set.");
}
@Override
public boolean isEmpty() {
return true;
}
@Override
public boolean isSingleton() {
return false;
}
@Override
public LanguageSet restrictTo(final LanguageSet other) {
return this;
}
@Override
public LanguageSet merge(final LanguageSet other) {
return other;
}
@Override
public String toString() {
return "NO_LANGUAGES";
}
};
/**
* Any/all languages.
*/
public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
@Override
public boolean contains(final String language) {
return true;
}
@Override
public String getAny() {
throw new NoSuchElementException("Can't fetch any language from the any language set.");
}
@Override
public boolean isEmpty() {
return false;
}
@Override
public boolean isSingleton() {
return false;
}
@Override
public LanguageSet restrictTo(final LanguageSet other) {
return other;
}
@Override
public LanguageSet merge(final LanguageSet other) {
return other;
}
@Override
public String toString() {
return "ANY_LANGUAGE";
}
};
private Languages(final Set<String> languages) {
this.languages = languages;
}
public Set<String> getLanguages() {
return this.languages;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
/**
* Supported types of names. Unless you are matching particular family names, use {@link #GENERIC}. The
* <code>GENERIC</code> NameType should work reasonably well for non-name words. The other encodings are
* specifically tuned to family names, and may not work well at all for general text.
*
* @since 1.6
* @version $Id$
*/
public enum NameType {
/** Ashkenazi family names */
ASHKENAZI("ash"),
/** Generic names and words */
GENERIC("gen"),
/** Sephardic family names */
SEPHARDIC("sep");
private final String name;
NameType(final String name) {
this.name = name;
}
/**
* Gets the short version of the name type.
*
* @return the NameType short string
*/
public String getName() {
return this.name;
}
}

View File

@ -0,0 +1,529 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
import org.apache.commons.codec.language.bm.Rule.Phoneme;
/**
* Converts words into potential phonetic representations.
* <p>
* This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes
* into account the likely source language. Next, this phonetic representation is converted into a
* pan-European 'average' representation, allowing comparison between different versions of essentially
* the same word from different languages.
* <p>
* This class is intentionally immutable and thread-safe.
* If you wish to alter the settings for a PhoneticEngine, you
* must make a new one with the updated settings.
* <p>
* Ported from phoneticengine.php
*
* @since 1.6
* @version $Id$
*/
public class PhoneticEngine {
/**
* Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside
* this package, and probably not outside the {@link PhoneticEngine} class.
*
* @since 1.6
*/
static final class PhonemeBuilder {
/**
* An empty builder where all phonemes must come from some set of languages. This will contain a single
* phoneme of zero characters. This can then be appended to. This should be the only way to create a new
* phoneme from scratch.
*
* @param languages the set of languages
* @return a new, empty phoneme builder
*/
public static PhonemeBuilder empty(final Languages.LanguageSet languages) {
return new PhonemeBuilder(new Rule.Phoneme("", languages));
}
private final Set<Rule.Phoneme> phonemes;
private PhonemeBuilder(final Rule.Phoneme phoneme) {
this.phonemes = new LinkedHashSet<Rule.Phoneme>();
this.phonemes.add(phoneme);
}
private PhonemeBuilder(final Set<Rule.Phoneme> phonemes) {
this.phonemes = phonemes;
}
/**
* Creates a new phoneme builder containing all phonemes in this one extended by <code>str</code>.
*
* @param str the characters to append to the phonemes
*/
public void append(final CharSequence str) {
for (final Rule.Phoneme ph : this.phonemes) {
ph.append(str);
}
}
/**
* Applies the given phoneme expression to all phonemes in this phoneme builder.
* <p>
* This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
* incompatible.
*
* @param phonemeExpr the expression to apply
* @param maxPhonemes the maximum number of phonemes to build up
*/
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<Rule.Phoneme>(maxPhonemes);
EXPR: for (final Rule.Phoneme left : this.phonemes) {
for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
if (!languages.isEmpty()) {
final Rule.Phoneme join = new Phoneme(left, right, languages);
if (newPhonemes.size() < maxPhonemes) {
newPhonemes.add(join);
if (newPhonemes.size() >= maxPhonemes) {
break EXPR;
}
}
}
}
}
this.phonemes.clear();
this.phonemes.addAll(newPhonemes);
}
/**
* Gets underlying phoneme set. Please don't mutate.
*
* @return the phoneme set
*/
public Set<Rule.Phoneme> getPhonemes() {
return this.phonemes;
}
/**
* Stringifies the phoneme set. This produces a single string of the strings of each phoneme,
* joined with a pipe. This is explicitly provided in place of toString as it is a potentially
* expensive operation, which should be avoided when debugging.
*
* @return the stringified phoneme set
*/
public String makeString() {
final StringBuilder sb = new StringBuilder();
for (final Rule.Phoneme ph : this.phonemes) {
if (sb.length() > 0) {
sb.append("|");
}
sb.append(ph.getPhonemeText());
}
return sb.toString();
}
}
/**
* A function closure capturing the application of a list of rules to an input sequence at a particular offset.
* After invocation, the values <code>i</code> and <code>found</code> are updated. <code>i</code> points to the
* index of the next char in <code>input</code> that must be processed next (the input up to that index having been
* processed already), and <code>found</code> indicates if a matching rule was found or not. In the case where a
* matching rule was found, <code>phonemeBuilder</code> is replaced with a new builder containing the phonemes
* updated by the matching rule.
*
* Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads
* as it is constructed as needed by the calling methods.
* @since 1.6
*/
private static final class RulesApplication {
private final Map<String, List<Rule>> finalRules;
private final CharSequence input;
private PhonemeBuilder phonemeBuilder;
private int i;
private final int maxPhonemes;
private boolean found;
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
if (finalRules == null) {
throw new NullPointerException("The finalRules argument must not be null");
}
this.finalRules = finalRules;
this.phonemeBuilder = phonemeBuilder;
this.input = input;
this.i = i;
this.maxPhonemes = maxPhonemes;
}
public int getI() {
return this.i;
}
public PhonemeBuilder getPhonemeBuilder() {
return this.phonemeBuilder;
}
/**
* Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
* and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
* match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling.
*
* @return <code>this</code>
*/
public RulesApplication invoke() {
this.found = false;
int patternLength = 1;
final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
if (rules != null) {
for (final Rule rule : rules) {
final String pattern = rule.getPattern();
patternLength = pattern.length();
if (rule.patternAndContextMatches(this.input, this.i)) {
this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
this.found = true;
break;
}
}
}
if (!this.found) {
patternLength = 1;
}
this.i += patternLength;
return this;
}
public boolean isFound() {
return this.found;
}
}
private static final Map<NameType, Set<String>> NAME_PREFIXES = new EnumMap<NameType, Set<String>>(NameType.class);
static {
NAME_PREFIXES.put(NameType.ASHKENAZI,
Collections.unmodifiableSet(
new HashSet<String>(Arrays.asList("bar", "ben", "da", "de", "van", "von"))));
NAME_PREFIXES.put(NameType.SEPHARDIC,
Collections.unmodifiableSet(
new HashSet<String>(Arrays.asList("al", "el", "da", "dal", "de", "del", "dela", "de la",
"della", "des", "di", "do", "dos", "du", "van", "von"))));
NAME_PREFIXES.put(NameType.GENERIC,
Collections.unmodifiableSet(
new HashSet<String>(Arrays.asList("da", "dal", "de", "del", "dela", "de la", "della",
"des", "di", "do", "dos", "du", "van", "von"))));
}
/**
* Joins some strings with an internal separator.
* @param strings Strings to join
* @param sep String to separate them with
* @return a single String consisting of each element of <code>strings</code> interleaved by <code>sep</code>
*/
private static String join(final Iterable<String> strings, final String sep) {
final StringBuilder sb = new StringBuilder();
final Iterator<String> si = strings.iterator();
if (si.hasNext()) {
sb.append(si.next());
}
while (si.hasNext()) {
sb.append(sep).append(si.next());
}
return sb.toString();
}
private static final int DEFAULT_MAX_PHONEMES = 20;
private final Lang lang;
private final NameType nameType;
private final RuleType ruleType;
private final boolean concat;
private final int maxPhonemes;
/**
* Generates a new, fully-configured phonetic engine.
*
* @param nameType
* the type of names it will use
* @param ruleType
* the type of rules it will apply
* @param concat
* if it will concatenate multiple encodings
*/
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) {
this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES);
}
/**
* Generates a new, fully-configured phonetic engine.
*
* @param nameType
* the type of names it will use
* @param ruleType
* the type of rules it will apply
* @param concat
* if it will concatenate multiple encodings
* @param maxPhonemes
* the maximum number of phonemes that will be handled
* @since 1.7
*/
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat,
final int maxPhonemes) {
if (ruleType == RuleType.RULES) {
throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES);
}
this.nameType = nameType;
this.ruleType = ruleType;
this.concat = concat;
this.lang = Lang.instance(nameType);
this.maxPhonemes = maxPhonemes;
}
/**
* Applies the final rules to convert from a language-specific phonetic representation to a
* language-independent representation.
*
* @param phonemeBuilder the current phonemes
* @param finalRules the final rules to apply
* @return the resulting phonemes
*/
private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder,
final Map<String, List<Rule>> finalRules) {
if (finalRules == null) {
throw new NullPointerException("finalRules can not be null");
}
if (finalRules.isEmpty()) {
return phonemeBuilder;
}
final Map<Rule.Phoneme, Rule.Phoneme> phonemes =
new TreeMap<Rule.Phoneme, Rule.Phoneme>(Rule.Phoneme.COMPARATOR);
for (final Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) {
PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages());
final String phonemeText = phoneme.getPhonemeText().toString();
for (int i = 0; i < phonemeText.length();) {
final RulesApplication rulesApplication =
new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).invoke();
final boolean found = rulesApplication.isFound();
subBuilder = rulesApplication.getPhonemeBuilder();
if (!found) {
// not found, appending as-is
subBuilder.append(phonemeText.subSequence(i, i + 1));
}
i = rulesApplication.getI();
}
// the phonemes map orders the phonemes only based on their text, but ignores the language set
// when adding new phonemes, check for equal phonemes and merge their language set, otherwise
// phonemes with the same text but different language set get lost
for (final Rule.Phoneme newPhoneme : subBuilder.getPhonemes()) {
if (phonemes.containsKey(newPhoneme)) {
final Rule.Phoneme oldPhoneme = phonemes.remove(newPhoneme);
final Rule.Phoneme mergedPhoneme = oldPhoneme.mergeWithLanguage(newPhoneme.getLanguages());
phonemes.put(mergedPhoneme, mergedPhoneme);
} else {
phonemes.put(newPhoneme, newPhoneme);
}
}
}
return new PhonemeBuilder(phonemes.keySet());
}
/**
* Encodes a string to its phonetic representation.
*
* @param input
* the String to encode
* @return the encoding of the input
*/
public String encode(final String input) {
final Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
return encode(input, languageSet);
}
/**
* Encodes an input string into an output phonetic representation, given a set of possible origin languages.
*
* @param input
* String to phoneticise; a String with dashes or spaces separating each word
* @param languageSet
* set of possible origin languages
* @return a phonetic representation of the input; a String containing '-'-separated phonetic representations of the
* input
*/
public String encode(String input, final Languages.LanguageSet languageSet) {
final Map<String, List<Rule>> rules = Rule.getInstanceMap(this.nameType, RuleType.RULES, languageSet);
// rules common across many (all) languages
final Map<String, List<Rule>> finalRules1 = Rule.getInstanceMap(this.nameType, this.ruleType, "common");
// rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
final Map<String, List<Rule>> finalRules2 = Rule.getInstanceMap(this.nameType, this.ruleType, languageSet);
// tidy the input
// lower case is a locale-dependent operation
input = input.toLowerCase(Locale.ENGLISH).replace('-', ' ').trim();
if (this.nameType == NameType.GENERIC) {
if (input.length() >= 2 && input.substring(0, 2).equals("d'")) { // check for d'
final String remainder = input.substring(2);
final String combined = "d" + remainder;
return "(" + encode(remainder) + ")-(" + encode(combined) + ")";
}
for (final String l : NAME_PREFIXES.get(this.nameType)) {
// handle generic prefixes
if (input.startsWith(l + " ")) {
// check for any prefix in the words list
final String remainder = input.substring(l.length() + 1); // input without the prefix
final String combined = l + remainder; // input with prefix without space
return "(" + encode(remainder) + ")-(" + encode(combined) + ")";
}
}
}
final List<String> words = Arrays.asList(input.split("\\s+"));
final List<String> words2 = new ArrayList<String>();
// special-case handling of word prefixes based upon the name type
switch (this.nameType) {
case SEPHARDIC:
for (final String aWord : words) {
final String[] parts = aWord.split("'");
final String lastPart = parts[parts.length - 1];
words2.add(lastPart);
}
words2.removeAll(NAME_PREFIXES.get(this.nameType));
break;
case ASHKENAZI:
words2.addAll(words);
words2.removeAll(NAME_PREFIXES.get(this.nameType));
break;
case GENERIC:
words2.addAll(words);
break;
default:
throw new IllegalStateException("Unreachable case: " + this.nameType);
}
if (this.concat) {
// concat mode enabled
input = join(words2, " ");
} else if (words2.size() == 1) {
// not a multi-word name
input = words.iterator().next();
} else {
// encode each word in a multi-word name separately (normally used for approx matches)
final StringBuilder result = new StringBuilder();
for (final String word : words2) {
result.append("-").append(encode(word));
}
// return the result without the leading "-"
return result.substring(1);
}
PhonemeBuilder phonemeBuilder = PhonemeBuilder.empty(languageSet);
// loop over each char in the input - we will handle the increment manually
for (int i = 0; i < input.length();) {
final RulesApplication rulesApplication =
new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).invoke();
i = rulesApplication.getI();
phonemeBuilder = rulesApplication.getPhonemeBuilder();
}
// Apply the general rules
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules1);
// Apply the language-specific rules
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules2);
return phonemeBuilder.makeString();
}
/**
* Gets the Lang language guessing rules being used.
*
* @return the Lang in use
*/
public Lang getLang() {
return this.lang;
}
/**
* Gets the NameType being used.
*
* @return the NameType in use
*/
public NameType getNameType() {
return this.nameType;
}
/**
* Gets the RuleType being used.
*
* @return the RuleType in use
*/
public RuleType getRuleType() {
return this.ruleType;
}
/**
* Gets if multiple phonetic encodings are concatenated or if just the first one is kept.
*
* @return true if multiple phonetic encodings are returned, false if just the first is
*/
public boolean isConcat() {
return this.concat;
}
/**
* Gets the maximum number of phonemes the engine will calculate for a given input.
*
* @return the maximum number of phonemes
* @since 1.7
*/
public int getMaxPhonemes() {
return this.maxPhonemes;
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import org.apache.commons.codec.CharEncoding;
/**
* Constants used to process resource files.
*
* <p>This class is immutable and thread-safe.</p>
*
* @since 1.6
* @version $Id$
*/
class ResourceConstants {
static final String CMT = "//";
static final String ENCODING = CharEncoding.UTF_8;
static final String EXT_CMT_END = "*/";
static final String EXT_CMT_START = "/*";
}

View File

@ -0,0 +1,720 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
/**
* A phoneme rule.
* <p>
* Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
* and a logical flag indicating if all languages must be in play. A rule matches if:
* <ul>
* <li>the pattern matches at the current position</li>
* <li>the string up until the beginning of the pattern matches the left context</li>
* <li>the string from the end of the pattern matches the right context</li>
* <li>logical is ALL and all languages are in scope; or</li>
* <li>logical is any other value and at least one language is in scope</li>
* </ul>
* <p>
* Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
* to explicitly construct their own.
* <p>
* Rules are immutable and thread-safe.
* <p>
* <b>Rules resources</b>
* <p>
* Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
* named following the pattern:
* <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote>
* <p>
* The format of these resources is the following:
* <ul>
* <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
* will be interpreted as:
* <ol>
* <li>pattern</li>
* <li>left context</li>
* <li>right context</li>
* <li>phoneme</li>
* </ol>
* </li>
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded
* as a comment.</li>
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip
* all content until a line ending in '*' and '/' is found.</li>
* <li><b>Blank lines:</b> All blank lines will be skipped.</li>
* </ul>
*
* @since 1.6
* @version $Id$
*/
public class Rule {
public static final class Phoneme implements PhonemeExpr {
public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() {
@Override
public int compare(final Phoneme o1, final Phoneme o2) {
for (int i = 0; i < o1.phonemeText.length(); i++) {
if (i >= o2.phonemeText.length()) {
return +1;
}
final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i);
if (c != 0) {
return c;
}
}
if (o1.phonemeText.length() < o2.phonemeText.length()) {
return -1;
}
return 0;
}
};
private final StringBuilder phonemeText;
private final Languages.LanguageSet languages;
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
this.phonemeText = new StringBuilder(phonemeText);
this.languages = languages;
}
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) {
this(phonemeLeft.phonemeText, phonemeLeft.languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
this(phonemeLeft.phonemeText, languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
public Phoneme append(final CharSequence str) {
this.phonemeText.append(str);
return this;
}
public Languages.LanguageSet getLanguages() {
return this.languages;
}
@Override
public Iterable<Phoneme> getPhonemes() {
return Collections.singleton(this);
}
public CharSequence getPhonemeText() {
return this.phonemeText;
}
/**
* Deprecated since 1.9.
*
* @param right the Phoneme to join
* @return a new Phoneme
* @deprecated since 1.9
*/
@Deprecated
public Phoneme join(final Phoneme right) {
return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
this.languages.restrictTo(right.languages));
}
/**
* Returns a new Phoneme with the same text but a union of its
* current language set and the given one.
*
* @param lang the language set to merge
* @return a new Phoneme
*/
public Phoneme mergeWithLanguage(final LanguageSet lang) {
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
@Override
public String toString() {
return phonemeText.toString() + "[" + languages + "]";
}
}
public interface PhonemeExpr {
Iterable<Phoneme> getPhonemes();
}
public static final class PhonemeList implements PhonemeExpr {
private final List<Phoneme> phonemes;
public PhonemeList(final List<Phoneme> phonemes) {
this.phonemes = phonemes;
}
@Override
public List<Phoneme> getPhonemes() {
return this.phonemes;
}
}
/**
* A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
*/
public interface RPattern {
boolean isMatch(CharSequence input);
}
public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return true;
}
};
public static final String ALL = "ALL";
private static final String DOUBLE_QUOTE = "\"";
private static final String HASH_INCLUDE = "#include";
private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class);
static {
for (final NameType s : NameType.values()) {
final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class);
for (final RuleType rt : RuleType.values()) {
final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>();
final Languages ls = Languages.getInstance(s);
for (final String l : ls.getLanguages()) {
try {
rs.put(l, parseRules(createScanner(s, rt, l), createResourceName(s, rt, l)));
} catch (final IllegalStateException e) {
throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e);
}
}
if (!rt.equals(RuleType.RULES)) {
rs.put("common", parseRules(createScanner(s, rt, "common"), createResourceName(s, rt, "common")));
}
rts.put(rt, Collections.unmodifiableMap(rs));
}
RULES.put(s, Collections.unmodifiableMap(rts));
}
}
private static boolean contains(final CharSequence chars, final char input) {
for (int i = 0; i < chars.length(); i++) {
if (chars.charAt(i) == input) {
return true;
}
}
return false;
}
private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) {
return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt",
nameType.getName(), rt.getName(), lang);
}
private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
final String resName = createResourceName(nameType, rt, lang);
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
if (rulesIS == null) {
throw new IllegalArgumentException("Unable to load resource: " + resName);
}
return new Scanner(rulesIS, ResourceConstants.ENCODING);
}
private static Scanner createScanner(final String lang) {
final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
if (rulesIS == null) {
throw new IllegalArgumentException("Unable to load resource: " + resName);
}
return new Scanner(rulesIS, ResourceConstants.ENCODING);
}
private static boolean endsWith(final CharSequence input, final CharSequence suffix) {
if (suffix.length() > input.length()) {
return false;
}
for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) {
if (input.charAt(i) != suffix.charAt(j)) {
return false;
}
}
return true;
}
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
final List<Rule> allRules = new ArrayList<Rule>();
for (final List<Rule> rules : ruleMap.values()) {
allRules.addAll(rules);
}
return allRules;
}
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
getInstanceMap(nameType, rt, Languages.ANY);
}
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
final String lang) {
final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);
if (rules == null) {
throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
nameType.getName(), rt.getName(), lang));
}
return rules;
}
private static Phoneme parsePhoneme(final String ph) {
final int open = ph.indexOf("[");
if (open >= 0) {
if (!ph.endsWith("]")) {
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
}
final String before = ph.substring(0, open);
final String in = ph.substring(open + 1, ph.length() - 1);
final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));
return new Phoneme(before, Languages.LanguageSet.from(langs));
} else {
return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
}
private static PhonemeExpr parsePhonemeExpr(final String ph) {
if (ph.startsWith("(")) { // we have a bracketed list of options
if (!ph.endsWith(")")) {
throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
}
final List<Phoneme> phs = new ArrayList<Phoneme>();
final String body = ph.substring(1, ph.length() - 1);
for (final String part : body.split("[|]")) {
phs.add(parsePhoneme(part));
}
if (body.startsWith("|") || body.endsWith("|")) {
phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
}
return new PhonemeList(phs);
} else {
return parsePhoneme(ph);
}
}
private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>();
int currentLine = 0;
boolean inMultilineComment = false;
while (scanner.hasNextLine()) {
currentLine++;
final String rawLine = scanner.nextLine();
String line = rawLine;
if (inMultilineComment) {
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
inMultilineComment = false;
}
} else {
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
inMultilineComment = true;
} else {
// discard comments
final int cmtI = line.indexOf(ResourceConstants.CMT);
if (cmtI >= 0) {
line = line.substring(0, cmtI);
}
// trim leading-trailing whitespace
line = line.trim();
if (line.length() == 0) {
continue; // empty lines can be safely skipped
}
if (line.startsWith(HASH_INCLUDE)) {
// include statement
final String incl = line.substring(HASH_INCLUDE.length()).trim();
if (incl.contains(" ")) {
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
location);
} else {
lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
}
} else {
// rule
final String[] parts = line.split("\\s+");
if (parts.length != 4) {
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
" parts: " + rawLine + " in " + location);
} else {
try {
final String pat = stripQuotes(parts[0]);
final String lCon = stripQuotes(parts[1]);
final String rCon = stripQuotes(parts[2]);
final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
final int cLine = currentLine;
final Rule r = new Rule(pat, lCon, rCon, ph) {
private final int myLine = cLine;
private final String loc = location;
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("Rule");
sb.append("{line=").append(myLine);
sb.append(", loc='").append(loc).append('\'');
sb.append(", pat='").append(pat).append('\'');
sb.append(", lcon='").append(lCon).append('\'');
sb.append(", rcon='").append(rCon).append('\'');
sb.append('}');
return sb.toString();
}
};
final String patternKey = r.pattern.substring(0,1);
List<Rule> rules = lines.get(patternKey);
if (rules == null) {
rules = new ArrayList<Rule>();
lines.put(patternKey, rules);
}
rules.add(r);
} catch (final IllegalArgumentException e) {
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
location, e);
}
}
}
}
}
}
return lines;
}
/**
* Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case.
*
* @param regex
* the regular expression to compile
* @return an RPattern that will match this regex
*/
private static RPattern pattern(final String regex) {
final boolean startsWith = regex.startsWith("^");
final boolean endsWith = regex.endsWith("$");
final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length());
final boolean boxes = content.contains("[");
if (!boxes) {
if (startsWith && endsWith) {
// exact match
if (content.length() == 0) {
// empty
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() == 0;
}
};
} else {
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.equals(content);
}
};
}
} else if ((startsWith || endsWith) && content.length() == 0) {
// matches every string
return ALL_STRINGS_RMATCHER;
} else if (startsWith) {
// matches from start
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return startsWith(input, content);
}
};
} else if (endsWith) {
// matches from start
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return endsWith(input, content);
}
};
}
} else {
final boolean startsWithBox = content.startsWith("[");
final boolean endsWithBox = content.endsWith("]");
if (startsWithBox && endsWithBox) {
String boxContent = content.substring(1, content.length() - 1);
if (!boxContent.contains("[")) {
// box containing alternatives
final boolean negate = boxContent.startsWith("^");
if (negate) {
boxContent = boxContent.substring(1);
}
final String bContent = boxContent;
final boolean shouldMatch = !negate;
if (startsWith && endsWith) {
// exact match
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch;
}
};
} else if (startsWith) {
// first char
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch;
}
};
} else if (endsWith) {
// last char
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() > 0 &&
contains(bContent, input.charAt(input.length() - 1)) == shouldMatch;
}
};
}
}
}
}
return new RPattern() {
Pattern pattern = Pattern.compile(regex);
@Override
public boolean isMatch(final CharSequence input) {
final Matcher matcher = pattern.matcher(input);
return matcher.find();
}
};
}
private static boolean startsWith(final CharSequence input, final CharSequence prefix) {
if (prefix.length() > input.length()) {
return false;
}
for (int i = 0; i < prefix.length(); i++) {
if (input.charAt(i) != prefix.charAt(i)) {
return false;
}
}
return true;
}
private static String stripQuotes(String str) {
if (str.startsWith(DOUBLE_QUOTE)) {
str = str.substring(1);
}
if (str.endsWith(DOUBLE_QUOTE)) {
str = str.substring(0, str.length() - 1);
}
return str;
}
private final RPattern lContext;
private final String pattern;
private final PhonemeExpr phoneme;
private final RPattern rContext;
/**
* Creates a new rule.
*
* @param pattern
* the pattern
* @param lContext
* the left context
* @param rContext
* the right context
* @param phoneme
* the resulting phoneme
*/
public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) {
this.pattern = pattern;
this.lContext = pattern(lContext + "$");
this.rContext = pattern("^" + rContext);
this.phoneme = phoneme;
}
/**
* Gets the left context. This is a regular expression that must match to the left of the pattern.
*
* @return the left context Pattern
*/
public RPattern getLContext() {
return this.lContext;
}
/**
* Gets the pattern. This is a string-literal that must exactly match.
*
* @return the pattern
*/
public String getPattern() {
return this.pattern;
}
/**
* Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
*
* @return the phoneme
*/
public PhonemeExpr getPhoneme() {
return this.phoneme;
}
/**
* Gets the right context. This is a regular expression that must match to the right of the pattern.
*
* @return the right context Pattern
*/
public RPattern getRContext() {
return this.rContext;
}
/**
* Decides if the pattern and context match the input starting at a position. It is a match if the
* <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and
* <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>.
*
* @param input
* the input String
* @param i
* the int position within the input
* @return true if the pattern and left/right context match, false otherwise
*/
public boolean patternAndContextMatches(final CharSequence input, final int i) {
if (i < 0) {
throw new IndexOutOfBoundsException("Can not match pattern at negative indexes");
}
final int patternLength = this.pattern.length();
final int ipl = i + patternLength;
if (ipl > input.length()) {
// not enough room for the pattern to match
return false;
}
// evaluate the pattern, left context and right context
// fail early if any of the evaluations is not successful
if (!input.subSequence(i, ipl).equals(this.pattern)) {
return false;
} else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) {
return false;
}
return this.lContext.isMatch(input.subSequence(0, i));
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
/**
* Types of rule.
*
* @since 1.6
* @version $Id$
*/
public enum RuleType {
/** Approximate rules, which will lead to the largest number of phonetic interpretations. */
APPROX("approx"),
/** Exact rules, which will lead to a minimum number of phonetic interpretations. */
EXACT("exact"),
/** For internal use only. Please use {@link #APPROX} or {@link #EXACT}. */
RULES("rules");
private final String name;
RuleType(final String name) {
this.name = name;
}
/**
* Gets the rule name.
*
* @return the rule name.
*/
public String getName() {
return this.name;
}
}

View File

@ -0,0 +1,21 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Implementation details of the Beider-Morse codec.
</body>
</html>

View File

@ -0,0 +1,21 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Language and phonetic encoders.
</body>
</html>

View File

@ -0,0 +1,251 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.commons.codec.Charsets;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringDecoder;
import org.apache.commons.codec.StringEncoder;
import org.apache.commons.codec.binary.Base64;
/**
* Identical to the Base64 encoding defined by <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>
* and allows a character set to be specified.
* <p>
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
* handling software.
* <p>
* This class is immutable and thread-safe.
*
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
* Header Extensions for Non-ASCII Text</a>
*
* @since 1.3
* @version $Id$
*/
public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
/**
* The default charset used for string decoding and encoding.
*/
private final Charset charset;
/**
* Default constructor.
*/
public BCodec() {
this(Charsets.UTF_8);
}
/**
* Constructor which allows for the selection of a default charset
*
* @param charset
* the default string charset to use.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @since 1.7
*/
public BCodec(final Charset charset) {
this.charset = charset;
}
/**
* Constructor which allows for the selection of a default charset
*
* @param charsetName
* the default charset to use.
* @throws java.nio.charset.UnsupportedCharsetException
* If the named charset is unavailable
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public BCodec(final String charsetName) {
this(Charset.forName(charsetName));
}
@Override
protected String getEncoding() {
return "B";
}
@Override
protected byte[] doEncoding(final byte[] bytes) {
if (bytes == null) {
return null;
}
return Base64.encodeBase64(bytes);
}
@Override
protected byte[] doDecoding(final byte[] bytes) {
if (bytes == null) {
return null;
}
return Base64.decodeBase64(bytes);
}
/**
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped.
*
* @param value
* string to convert to Base64 form
* @param charset
* the charset for <code>value</code>
* @return Base64 string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
* @since 1.7
*/
public String encode(final String value, final Charset charset) throws EncoderException {
if (value == null) {
return null;
}
return encodeText(value, charset);
}
/**
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped.
*
* @param value
* string to convert to Base64 form
* @param charset
* the charset for <code>value</code>
* @return Base64 string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
public String encode(final String value, final String charset) throws EncoderException {
if (value == null) {
return null;
}
try {
return this.encodeText(value, charset);
} catch (final UnsupportedEncodingException e) {
throw new EncoderException(e.getMessage(), e);
}
}
/**
* Encodes a string into its Base64 form using the default charset. Unsafe characters are escaped.
*
* @param value
* string to convert to Base64 form
* @return Base64 string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
@Override
public String encode(final String value) throws EncoderException {
if (value == null) {
return null;
}
return encode(value, this.getCharset());
}
/**
* Decodes a Base64 string into its original form. Escaped characters are converted back to their original
* representation.
*
* @param value
* Base64 string to convert into its original form
* @return original string
* @throws DecoderException
* A decoder exception is thrown if a failure condition is encountered during the decode process.
*/
@Override
public String decode(final String value) throws DecoderException {
if (value == null) {
return null;
}
try {
return this.decodeText(value);
} catch (final UnsupportedEncodingException e) {
throw new DecoderException(e.getMessage(), e);
}
}
/**
* Encodes an object into its Base64 form using the default charset. Unsafe characters are escaped.
*
* @param value
* object to convert to Base64 form
* @return Base64 object
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
@Override
public Object encode(final Object value) throws EncoderException {
if (value == null) {
return null;
} else if (value instanceof String) {
return encode((String) value);
} else {
throw new EncoderException("Objects of type " +
value.getClass().getName() +
" cannot be encoded using BCodec");
}
}
/**
* Decodes a Base64 object into its original form. Escaped characters are converted back to their original
* representation.
*
* @param value
* Base64 object to convert into its original form
* @return original object
* @throws DecoderException
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
* during the decode process.
*/
@Override
public Object decode(final Object value) throws DecoderException {
if (value == null) {
return null;
} else if (value instanceof String) {
return decode((String) value);
} else {
throw new DecoderException("Objects of type " +
value.getClass().getName() +
" cannot be decoded using BCodec");
}
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
* @since 1.7
*/
public Charset getCharset() {
return this.charset;
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
*/
public String getDefaultCharset() {
return this.charset.name();
}
}

View File

@ -0,0 +1,358 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.BitSet;
import org.apache.commons.codec.Charsets;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringDecoder;
import org.apache.commons.codec.StringEncoder;
/**
* Similar to the Quoted-Printable content-transfer-encoding defined in
* <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
* characters to be decipherable on an ASCII terminal without decoding.
* <p>
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
* handling software.
* <p>
* This class is conditionally thread-safe.
* The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
* but is not volatile, and accesses are not synchronised.
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
* is used to ensure safe publication of the value between threads, and must not invoke
* {@link #setEncodeBlanks(boolean)} after initial setup.
*
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
* Header Extensions for Non-ASCII Text</a>
*
* @since 1.3
* @version $Id$
*/
public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
/**
* The default charset used for string decoding and encoding.
*/
private final Charset charset;
/**
* BitSet of printable characters as defined in RFC 1522.
*/
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
// Static initializer for printable chars collection
static {
// alpha characters
PRINTABLE_CHARS.set(' ');
PRINTABLE_CHARS.set('!');
PRINTABLE_CHARS.set('"');
PRINTABLE_CHARS.set('#');
PRINTABLE_CHARS.set('$');
PRINTABLE_CHARS.set('%');
PRINTABLE_CHARS.set('&');
PRINTABLE_CHARS.set('\'');
PRINTABLE_CHARS.set('(');
PRINTABLE_CHARS.set(')');
PRINTABLE_CHARS.set('*');
PRINTABLE_CHARS.set('+');
PRINTABLE_CHARS.set(',');
PRINTABLE_CHARS.set('-');
PRINTABLE_CHARS.set('.');
PRINTABLE_CHARS.set('/');
for (int i = '0'; i <= '9'; i++) {
PRINTABLE_CHARS.set(i);
}
PRINTABLE_CHARS.set(':');
PRINTABLE_CHARS.set(';');
PRINTABLE_CHARS.set('<');
PRINTABLE_CHARS.set('>');
PRINTABLE_CHARS.set('@');
for (int i = 'A'; i <= 'Z'; i++) {
PRINTABLE_CHARS.set(i);
}
PRINTABLE_CHARS.set('[');
PRINTABLE_CHARS.set('\\');
PRINTABLE_CHARS.set(']');
PRINTABLE_CHARS.set('^');
PRINTABLE_CHARS.set('`');
for (int i = 'a'; i <= 'z'; i++) {
PRINTABLE_CHARS.set(i);
}
PRINTABLE_CHARS.set('{');
PRINTABLE_CHARS.set('|');
PRINTABLE_CHARS.set('}');
PRINTABLE_CHARS.set('~');
}
private static final byte BLANK = 32;
private static final byte UNDERSCORE = 95;
private boolean encodeBlanks = false;
/**
* Default constructor.
*/
public QCodec() {
this(Charsets.UTF_8);
}
/**
* Constructor which allows for the selection of a default charset.
*
* @param charset
* the default string charset to use.
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
* @since 1.7
*/
public QCodec(final Charset charset) {
super();
this.charset = charset;
}
/**
* Constructor which allows for the selection of a default charset.
*
* @param charsetName
* the charset to use.
* @throws java.nio.charset.UnsupportedCharsetException
* If the named charset is unavailable
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
public QCodec(final String charsetName) {
this(Charset.forName(charsetName));
}
@Override
protected String getEncoding() {
return "Q";
}
@Override
protected byte[] doEncoding(final byte[] bytes) {
if (bytes == null) {
return null;
}
final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
if (this.encodeBlanks) {
for (int i = 0; i < data.length; i++) {
if (data[i] == BLANK) {
data[i] = UNDERSCORE;
}
}
}
return data;
}
@Override
protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
if (bytes == null) {
return null;
}
boolean hasUnderscores = false;
for (final byte b : bytes) {
if (b == UNDERSCORE) {
hasUnderscores = true;
break;
}
}
if (hasUnderscores) {
final byte[] tmp = new byte[bytes.length];
for (int i = 0; i < bytes.length; i++) {
final byte b = bytes[i];
if (b != UNDERSCORE) {
tmp[i] = b;
} else {
tmp[i] = BLANK;
}
}
return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
}
return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
}
/**
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
*
* @param str
* string to convert to quoted-printable form
* @param charset
* the charset for str
* @return quoted-printable string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
* @since 1.7
*/
public String encode(final String str, final Charset charset) throws EncoderException {
if (str == null) {
return null;
}
return encodeText(str, charset);
}
/**
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
*
* @param str
* string to convert to quoted-printable form
* @param charset
* the charset for str
* @return quoted-printable string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
public String encode(final String str, final String charset) throws EncoderException {
if (str == null) {
return null;
}
try {
return encodeText(str, charset);
} catch (final UnsupportedEncodingException e) {
throw new EncoderException(e.getMessage(), e);
}
}
/**
* Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
*
* @param str
* string to convert to quoted-printable form
* @return quoted-printable string
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
@Override
public String encode(final String str) throws EncoderException {
if (str == null) {
return null;
}
return encode(str, getCharset());
}
/**
* Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
* representation.
*
* @param str
* quoted-printable string to convert into its original form
* @return original string
* @throws DecoderException
* A decoder exception is thrown if a failure condition is encountered during the decode process.
*/
@Override
public String decode(final String str) throws DecoderException {
if (str == null) {
return null;
}
try {
return decodeText(str);
} catch (final UnsupportedEncodingException e) {
throw new DecoderException(e.getMessage(), e);
}
}
/**
* Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
*
* @param obj
* object to convert to quoted-printable form
* @return quoted-printable object
* @throws EncoderException
* thrown if a failure condition is encountered during the encoding process.
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (obj == null) {
return null;
} else if (obj instanceof String) {
return encode((String) obj);
} else {
throw new EncoderException("Objects of type " +
obj.getClass().getName() +
" cannot be encoded using Q codec");
}
}
/**
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
* representation.
*
* @param obj
* quoted-printable object to convert into its original form
* @return original object
* @throws DecoderException
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
* during the decode process.
*/
@Override
public Object decode(final Object obj) throws DecoderException {
if (obj == null) {
return null;
} else if (obj instanceof String) {
return decode((String) obj);
} else {
throw new DecoderException("Objects of type " +
obj.getClass().getName() +
" cannot be decoded using Q codec");
}
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
* @since 1.7
*/
public Charset getCharset() {
return this.charset;
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
*/
public String getDefaultCharset() {
return this.charset.name();
}
/**
* Tests if optional transformation of SPACE characters is to be used
*
* @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
*/
public boolean isEncodeBlanks() {
return this.encodeBlanks;
}
/**
* Defines whether optional transformation of SPACE characters is to be used
*
* @param b
* <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
*/
public void setEncodeBlanks(final boolean b) {
this.encodeBlanks = b;
}
}

View File

@ -0,0 +1,602 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.BitSet;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.Charsets;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringDecoder;
import org.apache.commons.codec.StringEncoder;
import org.apache.commons.codec.binary.StringUtils;
/**
* Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
* <p>
* The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
* printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
* unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
* data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
* to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
* gateway.
* <p>
* Note:
* <p>
* Depending on the selected {@code strict} parameter, this class will implement a different set of rules of the
* quoted-printable spec:
* <ul>
* <li>{@code strict=false}: only rules #1 and #2 are implemented
* <li>{@code strict=true}: all rules #1 through #5 are implemented
* </ul>
* Originally, this class only supported the non-strict mode, but the codec in this partial form could already be used
* for certain applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance
* Q codec. The strict mode has been added in 1.10.
* <p>
* This class is immutable and thread-safe.
*
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
* Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
*
* @since 1.3
* @version $Id$
*/
public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
/**
* The default charset used for string decoding and encoding.
*/
private final Charset charset;
/**
* Indicates whether soft line breaks shall be used during encoding (rule #3-5).
*/
private final boolean strict;
/**
* BitSet of printable characters as defined in RFC 1521.
*/
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
private static final byte ESCAPE_CHAR = '=';
private static final byte TAB = 9;
private static final byte SPACE = 32;
private static final byte CR = 13;
private static final byte LF = 10;
/**
* Safe line length for quoted printable encoded text.
*/
private static final int SAFE_LENGTH = 73;
// Static initializer for printable chars collection
static {
// alpha characters
for (int i = 33; i <= 60; i++) {
PRINTABLE_CHARS.set(i);
}
for (int i = 62; i <= 126; i++) {
PRINTABLE_CHARS.set(i);
}
PRINTABLE_CHARS.set(TAB);
PRINTABLE_CHARS.set(SPACE);
}
/**
* Default constructor, assumes default charset of {@link Charsets#UTF_8}
*/
public QuotedPrintableCodec() {
this(Charsets.UTF_8, false);
}
/**
* Constructor which allows for the selection of the strict mode.
*
* @param strict
* if {@code true}, soft line breaks will be used
* @since 1.10
*/
public QuotedPrintableCodec(final boolean strict) {
this(Charsets.UTF_8, strict);
}
/**
* Constructor which allows for the selection of a default charset.
*
* @param charset
* the default string charset to use.
* @since 1.7
*/
public QuotedPrintableCodec(final Charset charset) {
this(charset, false);
}
/**
* Constructor which allows for the selection of a default charset and strict mode.
*
* @param charset
* the default string charset to use.
* @param strict
* if {@code true}, soft line breaks will be used
* @since 1.10
*/
public QuotedPrintableCodec(final Charset charset, final boolean strict) {
this.charset = charset;
this.strict = strict;
}
/**
* Constructor which allows for the selection of a default charset.
*
* @param charsetName
* the default string charset to use.
* @throws UnsupportedCharsetException
* If no support for the named charset is available
* in this instance of the Java virtual machine
* @throws IllegalArgumentException
* If the given charsetName is null
* @throws IllegalCharsetNameException
* If the given charset name is illegal
*
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
*/
public QuotedPrintableCodec(final String charsetName)
throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
this(Charset.forName(charsetName), false);
}
/**
* Encodes byte into its quoted-printable representation.
*
* @param b
* byte to encode
* @param buffer
* the buffer to write to
* @return The number of bytes written to the <code>buffer</code>
*/
private static final int encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) {
buffer.write(ESCAPE_CHAR);
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
buffer.write(hex1);
buffer.write(hex2);
return 3;
}
/**
* Return the byte at position <code>index</code> of the byte array and
* make sure it is unsigned.
*
* @param index
* position in the array
* @param bytes
* the byte array
* @return the unsigned octet at position <code>index</code> from the array
*/
private static int getUnsignedOctet(final int index, final byte[] bytes) {
int b = bytes[index];
if (b < 0) {
b = 256 + b;
}
return b;
}
/**
* Write a byte to the buffer.
*
* @param b
* byte to write
* @param encode
* indicates whether the octet shall be encoded
* @param buffer
* the buffer to write to
* @return the number of bytes that have been written to the buffer
*/
private static int encodeByte(final int b, final boolean encode,
final ByteArrayOutputStream buffer) {
if (encode) {
return encodeQuotedPrintable(b, buffer);
} else {
buffer.write(b);
return 1;
}
}
/**
* Checks whether the given byte is whitespace.
*
* @param b
* byte to be checked
* @return <code>true</code> if the byte is either a space or tab character
*/
private static boolean isWhitespace(final int b) {
return b == SPACE || b == TAB;
}
/**
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
* <p>
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param printable
* bitset of characters deemed quoted-printable
* @param bytes
* array of bytes to be encoded
* @return array of bytes containing quoted-printable data
*/
public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) {
return encodeQuotedPrintable(printable, bytes, false);
}
/**
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
* <p>
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param printable
* bitset of characters deemed quoted-printable
* @param bytes
* array of bytes to be encoded
* @param strict
* if {@code true} the full ruleset is used, otherwise only rule #1 and rule #2
* @return array of bytes containing quoted-printable data
* @since 1.10
*/
public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes, boolean strict) {
if (bytes == null) {
return null;
}
if (printable == null) {
printable = PRINTABLE_CHARS;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
if (strict) {
int pos = 1;
// encode up to buffer.length - 3, the last three octets will be treated
// separately for simplification of note #3
for (int i = 0; i < bytes.length - 3; i++) {
int b = getUnsignedOctet(i, bytes);
if (pos < SAFE_LENGTH) {
// up to this length it is safe to add any byte, encoded or not
pos += encodeByte(b, !printable.get(b), buffer);
} else {
// rule #3: whitespace at the end of a line *must* be encoded
encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
// rule #5: soft line break
buffer.write(ESCAPE_CHAR);
buffer.write(CR);
buffer.write(LF);
pos = 1;
}
}
// rule #3: whitespace at the end of a line *must* be encoded
// if we would do a soft break line after this octet, encode whitespace
int b = getUnsignedOctet(bytes.length - 3, bytes);
boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5);
pos += encodeByte(b, encode, buffer);
// note #3: '=' *must not* be the ultimate or penultimate character
// simplification: if < 6 bytes left, do a soft line break as we may need
// exactly 6 bytes space for the last 2 bytes
if (pos > SAFE_LENGTH - 2) {
buffer.write(ESCAPE_CHAR);
buffer.write(CR);
buffer.write(LF);
}
for (int i = bytes.length - 2; i < bytes.length; i++) {
b = getUnsignedOctet(i, bytes);
// rule #3: trailing whitespace shall be encoded
encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b));
encodeByte(b, encode, buffer);
}
} else {
for (final byte c : bytes) {
int b = c;
if (b < 0) {
b = 256 + b;
}
if (printable.get(b)) {
buffer.write(b);
} else {
encodeQuotedPrintable(b, buffer);
}
}
}
return buffer.toByteArray();
}
/**
* Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
* back to their original representation.
* <p>
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
* defined in RFC 1521.
*
* @param bytes
* array of quoted-printable characters
* @return array of original bytes
* @throws DecoderException
* Thrown if quoted-printable decoding is unsuccessful
*/
public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException {
if (bytes == null) {
return null;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
final int b = bytes[i];
if (b == ESCAPE_CHAR) {
try {
// if the next octet is a CR we have found a soft line break
if (bytes[++i] == CR) {
continue;
}
final int u = Utils.digit16(bytes[i]);
final int l = Utils.digit16(bytes[++i]);
buffer.write((char) ((u << 4) + l));
} catch (final ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid quoted-printable encoding", e);
}
} else if (b != CR && b != LF) {
// every other octet is appended except for CR & LF
buffer.write(b);
}
}
return buffer.toByteArray();
}
/**
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
* <p>
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param bytes
* array of bytes to be encoded
* @return array of bytes containing quoted-printable data
*/
@Override
public byte[] encode(final byte[] bytes) {
return encodeQuotedPrintable(PRINTABLE_CHARS, bytes, strict);
}
/**
* Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
* back to their original representation.
* <p>
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
* defined in RFC 1521.
*
* @param bytes
* array of quoted-printable characters
* @return array of original bytes
* @throws DecoderException
* Thrown if quoted-printable decoding is unsuccessful
*/
@Override
public byte[] decode(final byte[] bytes) throws DecoderException {
return decodeQuotedPrintable(bytes);
}
/**
* Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
* <p>
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param str
* string to convert to quoted-printable form
* @return quoted-printable string
* @throws EncoderException
* Thrown if quoted-printable encoding is unsuccessful
*
* @see #getCharset()
*/
@Override
public String encode(final String str) throws EncoderException {
return this.encode(str, getCharset());
}
/**
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
* are converted back to their original representation.
*
* @param str
* quoted-printable string to convert into its original form
* @param charset
* the original string charset
* @return original string
* @throws DecoderException
* Thrown if quoted-printable decoding is unsuccessful
* @since 1.7
*/
public String decode(final String str, final Charset charset) throws DecoderException {
if (str == null) {
return null;
}
return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
}
/**
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
* are converted back to their original representation.
*
* @param str
* quoted-printable string to convert into its original form
* @param charset
* the original string charset
* @return original string
* @throws DecoderException
* Thrown if quoted-printable decoding is unsuccessful
* @throws UnsupportedEncodingException
* Thrown if charset is not supported
*/
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
if (str == null) {
return null;
}
return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
}
/**
* Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
* converted back to their original representation.
*
* @param str
* quoted-printable string to convert into its original form
* @return original string
* @throws DecoderException
* Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
* @see #getCharset()
*/
@Override
public String decode(final String str) throws DecoderException {
return this.decode(str, this.getCharset());
}
/**
* Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
*
* @param obj
* string to convert to a quoted-printable form
* @return quoted-printable object
* @throws EncoderException
* Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
* unsuccessful
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return encode((byte[]) obj);
} else if (obj instanceof String) {
return encode((String) obj);
} else {
throw new EncoderException("Objects of type " +
obj.getClass().getName() +
" cannot be quoted-printable encoded");
}
}
/**
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
* representation.
*
* @param obj
* quoted-printable object to convert into its original form
* @return original object
* @throws DecoderException
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
* condition is encountered during the decode process.
*/
@Override
public Object decode(final Object obj) throws DecoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return decode((byte[]) obj);
} else if (obj instanceof String) {
return decode((String) obj);
} else {
throw new DecoderException("Objects of type " +
obj.getClass().getName() +
" cannot be quoted-printable decoded");
}
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
* @since 1.7
*/
public Charset getCharset() {
return this.charset;
}
/**
* Gets the default charset name used for string decoding and encoding.
*
* @return the default charset name
*/
public String getDefaultCharset() {
return this.charset.name();
}
/**
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
* <p>
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param str
* string to convert to quoted-printable form
* @param charset
* the charset for str
* @return quoted-printable string
* @since 1.7
*/
public String encode(final String str, final Charset charset) {
if (str == null) {
return null;
}
return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
}
/**
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
* <p>
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
* RFC 1521 and is suitable for encoding binary data and unformatted text.
*
* @param str
* string to convert to quoted-printable form
* @param charset
* the charset for str
* @return quoted-printable string
* @throws UnsupportedEncodingException
* Thrown if the charset is not supported
*/
public String encode(final String str, final String charset) throws UnsupportedEncodingException {
if (str == null) {
return null;
}
return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
}
}

View File

@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.binary.StringUtils;
/**
* Implements methods common to all codecs defined in RFC 1522.
* <p>
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
* encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
* is unlikely to confuse existing message handling software.
* <p>
* This class is immutable and thread-safe.
*
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
* Message Header Extensions for Non-ASCII Text</a>
*
* @since 1.3
* @version $Id$
*/
abstract class RFC1522Codec {
/** Separator. */
protected static final char SEP = '?';
/** Prefix. */
protected static final String POSTFIX = "?=";
/** Postfix. */
protected static final String PREFIX = "=?";
/**
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
* <p>
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
*
* @param text
* a string to encode
* @param charset
* a charset to be used
* @return RFC 1522 compliant "encoded-word"
* @throws EncoderException
* thrown if there is an error condition during the Encoding process.
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
protected String encodeText(final String text, final Charset charset) throws EncoderException {
if (text == null) {
return null;
}
final StringBuilder buffer = new StringBuilder();
buffer.append(PREFIX);
buffer.append(charset);
buffer.append(SEP);
buffer.append(this.getEncoding());
buffer.append(SEP);
final byte [] rawData = this.doEncoding(text.getBytes(charset));
buffer.append(StringUtils.newStringUsAscii(rawData));
buffer.append(POSTFIX);
return buffer.toString();
}
/**
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
* <p>
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
*
* @param text
* a string to encode
* @param charsetName
* the charset to use
* @return RFC 1522 compliant "encoded-word"
* @throws EncoderException
* thrown if there is an error condition during the Encoding process.
* @throws UnsupportedEncodingException
* if charset is not available
*
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
*/
protected String encodeText(final String text, final String charsetName)
throws EncoderException, UnsupportedEncodingException {
if (text == null) {
return null;
}
return this.encodeText(text, Charset.forName(charsetName));
}
/**
* Applies an RFC 1522 compliant decoding scheme to the given string of text.
* <p>
* This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
*
* @param text
* a string to decode
* @return A new decoded String or <code>null</code> if the input is <code>null</code>.
* @throws DecoderException
* thrown if there is an error condition during the decoding process.
* @throws UnsupportedEncodingException
* thrown if charset specified in the "encoded-word" header is not supported
*/
protected String decodeText(final String text)
throws DecoderException, UnsupportedEncodingException {
if (text == null) {
return null;
}
if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
throw new DecoderException("RFC 1522 violation: malformed encoded content");
}
final int terminator = text.length() - 2;
int from = 2;
int to = text.indexOf(SEP, from);
if (to == terminator) {
throw new DecoderException("RFC 1522 violation: charset token not found");
}
final String charset = text.substring(from, to);
if (charset.equals("")) {
throw new DecoderException("RFC 1522 violation: charset not specified");
}
from = to + 1;
to = text.indexOf(SEP, from);
if (to == terminator) {
throw new DecoderException("RFC 1522 violation: encoding token not found");
}
final String encoding = text.substring(from, to);
if (!getEncoding().equalsIgnoreCase(encoding)) {
throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
}
from = to + 1;
to = text.indexOf(SEP, from);
byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
data = doDecoding(data);
return new String(data, charset);
}
/**
* Returns the codec name (referred to as encoding in the RFC 1522).
*
* @return name of the codec
*/
protected abstract String getEncoding();
/**
* Encodes an array of bytes using the defined encoding scheme.
*
* @param bytes
* Data to be encoded
* @return A byte array containing the encoded data
* @throws EncoderException
* thrown if the Encoder encounters a failure condition during the encoding process.
*/
protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
/**
* Decodes an array of bytes using the defined encoding scheme.
*
* @param bytes
* Data to be decoded
* @return a byte array that contains decoded data
* @throws DecoderException
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
*/
protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
}

View File

@ -0,0 +1,368 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringDecoder;
import org.apache.commons.codec.StringEncoder;
import org.apache.commons.codec.binary.StringUtils;
/**
* Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
* <p>
* This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
* {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
* 1.4 rely on the platform's default charset encoding.
* <p>
* This class is immutable and thread-safe.
*
* @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
* of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a>
*
* @since 1.2
* @version $Id$
*/
public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
/**
* Radix used in encoding and decoding.
*/
static final int RADIX = 16;
/**
* The default charset used for string decoding and encoding.
*
* @deprecated TODO: This field will be changed to a private final Charset in 2.0.
*/
@Deprecated
protected String charset;
/**
* Release 1.5 made this field final.
*/
protected static final byte ESCAPE_CHAR = '%';
/**
* BitSet of www-form-url safe characters.
*/
protected static final BitSet WWW_FORM_URL = new BitSet(256);
// Static initializer for www_form_url
static {
// alpha characters
for (int i = 'a'; i <= 'z'; i++) {
WWW_FORM_URL.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
WWW_FORM_URL.set(i);
}
// numeric characters
for (int i = '0'; i <= '9'; i++) {
WWW_FORM_URL.set(i);
}
// special chars
WWW_FORM_URL.set('-');
WWW_FORM_URL.set('_');
WWW_FORM_URL.set('.');
WWW_FORM_URL.set('*');
// blank to be replaced with +
WWW_FORM_URL.set(' ');
}
/**
* Default constructor.
*/
public URLCodec() {
this(CharEncoding.UTF_8);
}
/**
* Constructor which allows for the selection of a default charset.
*
* @param charset the default string charset to use.
*/
public URLCodec(final String charset) {
super();
this.charset = charset;
}
/**
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
*
* @param urlsafe
* bitset of characters deemed URL safe
* @param bytes
* array of bytes to convert to URL safe characters
* @return array of bytes containing URL safe characters
*/
public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
if (bytes == null) {
return null;
}
if (urlsafe == null) {
urlsafe = WWW_FORM_URL;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (final byte c : bytes) {
int b = c;
if (b < 0) {
b = 256 + b;
}
if (urlsafe.get(b)) {
if (b == ' ') {
b = '+';
}
buffer.write(b);
} else {
buffer.write(ESCAPE_CHAR);
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
buffer.write(hex1);
buffer.write(hex2);
}
}
return buffer.toByteArray();
}
/**
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
* back to their original representation.
*
* @param bytes
* array of URL safe characters
* @return array of original bytes
* @throws DecoderException
* Thrown if URL decoding is unsuccessful
*/
public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
if (bytes == null) {
return null;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
final int b = bytes[i];
if (b == '+') {
buffer.write(' ');
} else if (b == ESCAPE_CHAR) {
try {
final int u = Utils.digit16(bytes[++i]);
final int l = Utils.digit16(bytes[++i]);
buffer.write((char) ((u << 4) + l));
} catch (final ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid URL encoding: ", e);
}
} else {
buffer.write(b);
}
}
return buffer.toByteArray();
}
/**
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
*
* @param bytes
* array of bytes to convert to URL safe characters
* @return array of bytes containing URL safe characters
*/
@Override
public byte[] encode(final byte[] bytes) {
return encodeUrl(WWW_FORM_URL, bytes);
}
/**
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
* back to their original representation.
*
* @param bytes
* array of URL safe characters
* @return array of original bytes
* @throws DecoderException
* Thrown if URL decoding is unsuccessful
*/
@Override
public byte[] decode(final byte[] bytes) throws DecoderException {
return decodeUrl(bytes);
}
/**
* Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
*
* @param str
* string to convert to a URL safe form
* @param charset
* the charset for str
* @return URL safe string
* @throws UnsupportedEncodingException
* Thrown if charset is not supported
*/
public String encode(final String str, final String charset) throws UnsupportedEncodingException {
if (str == null) {
return null;
}
return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
}
/**
* Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
*
* @param str
* string to convert to a URL safe form
* @return URL safe string
* @throws EncoderException
* Thrown if URL encoding is unsuccessful
*
* @see #getDefaultCharset()
*/
@Override
public String encode(final String str) throws EncoderException {
if (str == null) {
return null;
}
try {
return encode(str, getDefaultCharset());
} catch (final UnsupportedEncodingException e) {
throw new EncoderException(e.getMessage(), e);
}
}
/**
* Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
* back to their original representation.
*
* @param str
* URL safe string to convert into its original form
* @param charset
* the original string charset
* @return original string
* @throws DecoderException
* Thrown if URL decoding is unsuccessful
* @throws UnsupportedEncodingException
* Thrown if charset is not supported
*/
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
if (str == null) {
return null;
}
return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
}
/**
* Decodes a URL safe string into its original form using the default string charset. Escaped characters are
* converted back to their original representation.
*
* @param str
* URL safe string to convert into its original form
* @return original string
* @throws DecoderException
* Thrown if URL decoding is unsuccessful
* @see #getDefaultCharset()
*/
@Override
public String decode(final String str) throws DecoderException {
if (str == null) {
return null;
}
try {
return decode(str, getDefaultCharset());
} catch (final UnsupportedEncodingException e) {
throw new DecoderException(e.getMessage(), e);
}
}
/**
* Encodes an object into its URL safe form. Unsafe characters are escaped.
*
* @param obj
* string to convert to a URL safe form
* @return URL safe object
* @throws EncoderException
* Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return encode((byte[])obj);
} else if (obj instanceof String) {
return encode((String)obj);
} else {
throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
}
}
/**
* Decodes a URL safe object into its original form. Escaped characters are converted back to their original
* representation.
*
* @param obj
* URL safe object to convert into its original form
* @return original object
* @throws DecoderException
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
* condition is encountered during the decode process.
*/
@Override
public Object decode(final Object obj) throws DecoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return decode((byte[]) obj);
} else if (obj instanceof String) {
return decode((String) obj);
} else {
throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
}
}
/**
* The default charset used for string decoding and encoding.
*
* @return the default string charset.
*/
public String getDefaultCharset() {
return this.charset;
}
/**
* The <code>String</code> encoding used for decoding and encoding.
*
* @return Returns the encoding.
*
* @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
*/
@Deprecated
public String getEncoding() {
return this.charset;
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import org.apache.commons.codec.DecoderException;
/**
* Utility methods for this package.
*
* <p>This class is immutable and thread-safe.</p>
*
* @version $Id$
* @since 1.4
*/
class Utils {
/**
* Returns the numeric value of the character <code>b</code> in radix 16.
*
* @param b
* The byte to be converted.
* @return The numeric value represented by the character in radix 16.
*
* @throws DecoderException
* Thrown when the byte is not valid per {@link Character#digit(char,int)}
*/
static int digit16(final byte b) throws DecoderException {
final int i = Character.digit((char) b, URLCodec.RADIX);
if (i == -1) {
throw new DecoderException("Invalid URL encoding: not a valid digit (radix " + URLCodec.RADIX + "): " + b);
}
return i;
}
}

View File

@ -0,0 +1,23 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
<p>
Network related encoding and decoding.
</p>
</body>
</html>

View File

@ -0,0 +1,29 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- $Id$ -->
<html>
<body>
<p>
This document is the API specification for the Apache Commons Codec Library, version 1.3.
</p>
<p>
This library requires a JRE version of 1.2.2 or greater.
The hypertext links originating from this document point to Sun's version 1.3 API as the 1.2.2 API documentation
is no longer on-line.
</p>
</body>
</html>

View File

@ -0,0 +1,100 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
</head>
<body>
<p>Interfaces and classes used by
the various implementations in the sub-packages.</p>
<p>Definitive implementations of commonly used encoders and decoders.</p>
<p>Codec is currently comprised of a modest set of utilities and a
simple framework for String encoding and decoding in three categories:
Binary Encoders, Language Encoders, and Network Encoders. </p>
<h4><a name="Common Encoders">Binary Encoders</a></h4>
<table border="1" width="100%" cellspacing="2" cellpadding="3">
<tbody>
<tr>
<td>
<a href="binary/Base64.html">
org.apache.commons.codec.binary.Base64</a>
</td>
<td>
Provides Base64 content-transfer-encoding as defined in
<a href="http://www.ietf.org/rfc/rfc2045.txt"> RFC 2045</a>
</td>
<td>Production</td>
</tr>
<tr>
<td>
<a href="binary/Hex.html">
org.apache.commons.codec.binary.Hex</a>
</td>
<td>
Converts an array of bytes into an array of characters
representing the hexadecimal values of each byte in order
</td>
<td>Production</td>
</tr>
</tbody>
</table>
<h4>
<a name="Language Encoders">Language Encoders</a>
</h4>
<p>
Codec contains a number of commonly used language and phonetic
encoders
</p>
<table border="1" width="100%" cellspacing="2" cellpadding="3">
<tbody>
<tr>
<td>
<a href="#">org.apache.commons.codec.language.Soundex</a>
</td>
<td>Implementation of the Soundex algorithm.</td>
<td>Production</td>
</tr>
<tr>
<td>
<a href="#">org.apache.commons.codec.language.Metaphone</a>
</td>
<td>Implementation of the Metaphone algorithm.</td>
<td>Production</td>
</tr>
</tbody>
</table>
<h4><a name="Network_Encoders">Network Encoders</a></h4>
<h4> </h4>
<p> Codec contains network related encoders </p>
<table border="1" width="100%" cellspacing="2" cellpadding="3">
<tbody>
<tr>
<td>
<a href="#">org.apache.commons.codec.net.URLCodec</a>
</td>
<td>Implements the 'www-form-urlencoded' encoding scheme.</td>
<td>Production</td>
</tr>
</tbody>
</table>
<br>
</body>
</html>

View File

@ -0,0 +1,594 @@
/**
* Copyright 2011 The Buzz Media, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.imgscalr;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.awt.image.BufferedImageOp;
import java.awt.image.ImagingOpException;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.imgscalr.Scalr.Method;
import org.imgscalr.Scalr.Mode;
import org.imgscalr.Scalr.Rotation;
/**
* Class used to provide the asynchronous versions of all the methods defined in
* {@link Scalr} for the purpose of efficiently handling large amounts of image
* operations via a select number of processing threads asynchronously.
* <p/>
* Given that image-scaling operations, especially when working with large
* images, can be very hardware-intensive (both CPU and memory), in large-scale
* deployments (e.g. a busy web application) it becomes increasingly important
* that the scale operations performed by imgscalr be manageable so as not to
* fire off too many simultaneous operations that the JVM's heap explodes and
* runs out of memory or pegs the CPU on the host machine, staving all other
* running processes.
* <p/>
* Up until now it was left to the caller to implement their own serialization
* or limiting logic to handle these use-cases. Given imgscalr's popularity in
* web applications it was determined that this requirement be common enough
* that it should be integrated directly into the imgscalr library for everyone
* to benefit from.
* <p/>
* Every method in this class wraps the matching methods in the {@link Scalr}
* class in new {@link Callable} instances that are submitted to an internal
* {@link ExecutorService} for execution at a later date. A {@link Future} is
* returned to the caller representing the task that is either currently
* performing the scale operation or will at a future date depending on where it
* is in the {@link ExecutorService}'s queue. {@link Future#get()} or
* {@link Future#get(long, TimeUnit)} can be used to block on the
* <code>Future</code>, waiting for the scale operation to complete and return
* the resultant {@link BufferedImage} to the caller.
* <p/>
* This design provides the following features:
* <ul>
* <li>Non-blocking, asynchronous scale operations that can continue execution
* while waiting on the scaled result.</li>
* <li>Serialize all scale requests down into a maximum number of
* <em>simultaneous</em> scale operations with no additional/complex logic. The
* number of simultaneous scale operations is caller-configurable (see
* {@link #THREAD_COUNT}) so as best to optimize the host system (e.g. 1 scale
* thread per core).</li>
* <li>No need to worry about overloading the host system with too many scale
* operations, they will simply queue up in this class and execute in-order.</li>
* <li>Synchronous/blocking behavior can still be achieved (if desired) by
* calling <code>get()</code> or <code>get(long, TimeUnit)</code> immediately on
* the returned {@link Future} from any of the methods below.</li>
* </ul>
* <h3>Performance</h3>
* When tuning this class for optimal performance, benchmarking your particular
* hardware is the best approach. For some rough guidelines though, there are
* two resources you want to watch closely:
* <ol>
* <li>JVM Heap Memory (Assume physical machine memory is always sufficiently
* large)</li>
* <li># of CPU Cores</li>
* </ol>
* You never want to allocate more scaling threads than you have CPU cores and
* on a sufficiently busy host where some of the cores may be busy running a
* database or a web server, you will want to allocate even less scaling
* threads.
* <p/>
* So as a maximum you would never want more scaling threads than CPU cores in
* any situation and less so on a busy server.
* <p/>
* If you allocate more threads than you have available CPU cores, your scaling
* operations will slow down as the CPU will spend a considerable amount of time
* context-switching between threads on the same core trying to finish all the
* tasks in parallel. You might still be tempted to do this because of the I/O
* delay some threads will encounter reading images off disk, but when you do
* your own benchmarking you'll likely find (as I did) that the actual disk I/O
* necessary to pull the image data off disk is a much smaller portion of the
* execution time than the actual scaling operations.
* <p/>
* If you are executing on a storage medium that is unexpectedly slow and I/O is
* a considerable portion of the scaling operation (e.g. S3 or EBS volumes),
* feel free to try using more threads than CPU cores to see if that helps; but
* in most normal cases, it will only slow down all other parallel scaling
* operations.
* <p/>
* As for memory, every time an image is scaled it is decoded into a
* {@link BufferedImage} and stored in the JVM Heap space (decoded image
* instances are always larger than the source images on-disk). For larger
* images, that can use up quite a bit of memory. You will need to benchmark
* your particular use-cases on your hardware to get an idea of where the sweet
* spot is for this; if you are operating within tight memory bounds, you may
* want to limit simultaneous scaling operations to 1 or 2 regardless of the
* number of cores just to avoid having too many {@link BufferedImage} instances
* in JVM Heap space at the same time.
* <p/>
* These are rough metrics and behaviors to give you an idea of how best to tune
* this class for your deployment, but nothing can replacement writing a small
* Java class that scales a handful of images in a number of different ways and
* testing that directly on your deployment hardware.
* <h3>Resource Overhead</h3>
* The {@link ExecutorService} utilized by this class won't be initialized until
* one of the operation methods are called, at which point the
* <code>service</code> will be instantiated for the first time and operation
* queued up.
* <p/>
* More specifically, if you have no need for asynchronous image processing
* offered by this class, you don't need to worry about wasted resources or
* hanging/idle threads as they will never be created if you never use this
* class.
* <h3>Cleaning up Service Threads</h3>
* By default the {@link Thread}s created by the internal
* {@link ThreadPoolExecutor} do not run in <code>daemon</code> mode; which
* means they will block the host VM from exiting until they are explicitly shut
* down in a client application; in a server application the container will shut
* down the pool forcibly.
* <p/>
* If you have used the {@link AsyncScalr} class and are trying to shut down a
* client application, you will need to call {@link #getService()} then
* {@link ExecutorService#shutdown()} or {@link ExecutorService#shutdownNow()}
* to have the threads terminated; you may also want to look at the
* {@link ExecutorService#awaitTermination(long, TimeUnit)} method if you'd like
* to more closely monitor the shutting down process (and finalization of
* pending scale operations).
* <h3>Reusing Shutdown AsyncScalr</h3>
* If you have previously called <code>shutdown</code> on the underlying service
* utilized by this class, subsequent calls to any of the operations this class
* provides will invoke the internal {@link #checkService()} method which will
* replace the terminated underlying {@link ExecutorService} with a new one via
* the {@link #createService()} method.
* <h3>Custom Implementations</h3>
* If a subclass wants to customize the {@link ExecutorService} or
* {@link ThreadFactory} used under the covers, this can be done by overriding
* the {@link #createService()} method which is invoked by this class anytime a
* new {@link ExecutorService} is needed.
* <p/>
* By default the {@link #createService()} method delegates to the
* {@link #createService(ThreadFactory)} method with a new instance of
* {@link DefaultThreadFactory}. Either of these methods can be overridden and
* customized easily if desired.
* <p/>
* <strong>TIP</strong>: A common customization to this class is to make the
* {@link Thread}s generated by the underlying factory more server-friendly, in
* which case the caller would want to use an instance of the
* {@link ServerThreadFactory} when creating the new {@link ExecutorService}.
* <p/>
* This can be done in one line by overriding {@link #createService()} and
* returning the result of:
* <code>return createService(new ServerThreadFactory());</code>
* <p/>
* By default this class uses an {@link ThreadPoolExecutor} internally to handle
* execution of queued image operations. If a different type of
* {@link ExecutorService} is desired, again, simply overriding the
* {@link #createService()} method of choice is the right way to do that.
*
* @author Riyad Kalla (software@thebuzzmedia.com)
* @since 3.2
*/
@SuppressWarnings("javadoc")
public class AsyncScalr {
/**
* System property name used to set the number of threads the default
* underlying {@link ExecutorService} will use to process async image
* operations.
* <p/>
* Value is "<code>imgscalr.async.threadCount</code>".
*/
public static final String THREAD_COUNT_PROPERTY_NAME = "imgscalr.async.threadCount";
/**
* Number of threads the internal {@link ExecutorService} will use to
* simultaneously execute scale requests.
* <p/>
* This value can be changed by setting the
* <code>imgscalr.async.threadCount</code> system property (see
* {@link #THREAD_COUNT_PROPERTY_NAME}) to a valid integer value &gt; 0.
* <p/>
* Default value is <code>2</code>.
*/
public static final int THREAD_COUNT = Integer.getInteger(
THREAD_COUNT_PROPERTY_NAME, 2);
/**
* Initializer used to verify the THREAD_COUNT system property.
*/
static {
if (THREAD_COUNT < 1)
throw new RuntimeException("System property '"
+ THREAD_COUNT_PROPERTY_NAME + "' set THREAD_COUNT to "
+ THREAD_COUNT + ", but THREAD_COUNT must be > 0.");
}
protected static ExecutorService service;
/**
* Used to get access to the internal {@link ExecutorService} used by this
* class to process scale operations.
* <p/>
* <strong>NOTE</strong>: You will need to explicitly shutdown any service
* currently set on this class before the host JVM exits.
* <p/>
* You can call {@link ExecutorService#shutdown()} to wait for all scaling
* operations to complete first or call
* {@link ExecutorService#shutdownNow()} to kill any in-process operations
* and purge all pending operations before exiting.
* <p/>
* Additionally you can use
* {@link ExecutorService#awaitTermination(long, TimeUnit)} after issuing a
* shutdown command to try and wait until the service has finished all
* tasks.
*
* @return the current {@link ExecutorService} used by this class to process
* scale operations.
*/
public static ExecutorService getService() {
return service;
}
/**
* @see Scalr#apply(BufferedImage, BufferedImageOp...)
*/
public static Future<BufferedImage> apply(final BufferedImage src,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.apply(src, ops);
}
});
}
/**
* @see Scalr#crop(BufferedImage, int, int, BufferedImageOp...)
*/
public static Future<BufferedImage> crop(final BufferedImage src,
final int width, final int height, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.crop(src, width, height, ops);
}
});
}
/**
* @see Scalr#crop(BufferedImage, int, int, int, int, BufferedImageOp...)
*/
public static Future<BufferedImage> crop(final BufferedImage src,
final int x, final int y, final int width, final int height,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.crop(src, x, y, width, height, ops);
}
});
}
/**
* @see Scalr#pad(BufferedImage, int, BufferedImageOp...)
*/
public static Future<BufferedImage> pad(final BufferedImage src,
final int padding, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.pad(src, padding, ops);
}
});
}
/**
* @see Scalr#pad(BufferedImage, int, Color, BufferedImageOp...)
*/
public static Future<BufferedImage> pad(final BufferedImage src,
final int padding, final Color color, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.pad(src, padding, color, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final int targetSize, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, targetSize, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Method, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Method scalingMethod, final int targetSize,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, scalingMethod, targetSize, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Mode, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Mode resizeMode, final int targetSize,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, resizeMode, targetSize, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Method, Mode, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Method scalingMethod, final Mode resizeMode,
final int targetSize, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, scalingMethod, resizeMode, targetSize,
ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, int, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final int targetWidth, final int targetHeight,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, targetWidth, targetHeight, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Method, int, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Method scalingMethod, final int targetWidth,
final int targetHeight, final BufferedImageOp... ops) {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, scalingMethod, targetWidth,
targetHeight, ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Mode, int, int, BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Mode resizeMode, final int targetWidth,
final int targetHeight, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, resizeMode, targetWidth, targetHeight,
ops);
}
});
}
/**
* @see Scalr#resize(BufferedImage, Method, Mode, int, int,
* BufferedImageOp...)
*/
public static Future<BufferedImage> resize(final BufferedImage src,
final Method scalingMethod, final Mode resizeMode,
final int targetWidth, final int targetHeight,
final BufferedImageOp... ops) throws IllegalArgumentException,
ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.resize(src, scalingMethod, resizeMode,
targetWidth, targetHeight, ops);
}
});
}
/**
* @see Scalr#rotate(BufferedImage, Rotation, BufferedImageOp...)
*/
public static Future<BufferedImage> rotate(final BufferedImage src,
final Rotation rotation, final BufferedImageOp... ops)
throws IllegalArgumentException, ImagingOpException {
checkService();
return service.submit(new Callable<BufferedImage>() {
public BufferedImage call() throws Exception {
return Scalr.rotate(src, rotation, ops);
}
});
}
protected static ExecutorService createService() {
return createService(new DefaultThreadFactory());
}
protected static ExecutorService createService(ThreadFactory factory)
throws IllegalArgumentException {
if (factory == null)
throw new IllegalArgumentException("factory cannot be null");
return Executors.newFixedThreadPool(THREAD_COUNT, factory);
}
/**
* Used to verify that the underlying <code>service</code> points at an
* active {@link ExecutorService} instance that can be used by this class.
* <p/>
* If <code>service</code> is <code>null</code>, has been shutdown or
* terminated then this method will replace it with a new
* {@link ExecutorService} by calling the {@link #createService()} method
* and assigning the returned value to <code>service</code>.
* <p/>
* Any subclass that wants to customize the {@link ExecutorService} or
* {@link ThreadFactory} used internally by this class should override the
* {@link #createService()}.
*/
protected static void checkService() {
if (service == null || service.isShutdown() || service.isTerminated()) {
/*
* If service was shutdown or terminated, assigning a new value will
* free the reference to the instance, allowing it to be GC'ed when
* it is done shutting down (assuming it hadn't already).
*/
service = createService();
}
}
/**
* Default {@link ThreadFactory} used by the internal
* {@link ExecutorService} to creates execution {@link Thread}s for image
* scaling.
* <p/>
* More or less a copy of the hidden class backing the
* {@link Executors#defaultThreadFactory()} method, but exposed here to make
* it easier for implementors to extend and customize.
*
* @author Doug Lea
* @author Riyad Kalla (software@thebuzzmedia.com)
* @since 4.0
*/
protected static class DefaultThreadFactory implements ThreadFactory {
protected static final AtomicInteger poolNumber = new AtomicInteger(1);
protected final ThreadGroup group;
protected final AtomicInteger threadNumber = new AtomicInteger(1);
protected final String namePrefix;
DefaultThreadFactory() {
SecurityManager manager = System.getSecurityManager();
/*
* Determine the group that threads created by this factory will be
* in.
*/
group = (manager == null ? Thread.currentThread().getThreadGroup()
: manager.getThreadGroup());
/*
* Define a common name prefix for the threads created by this
* factory.
*/
namePrefix = "pool-" + poolNumber.getAndIncrement() + "-thread-";
}
/**
* Used to create a {@link Thread} capable of executing the given
* {@link Runnable}.
* <p/>
* Thread created by this factory are utilized by the parent
* {@link ExecutorService} when processing queued up scale operations.
*/
public Thread newThread(Runnable r) {
/*
* Create a new thread in our specified group with a meaningful
* thread name so it is easy to identify.
*/
Thread thread = new Thread(group, r, namePrefix
+ threadNumber.getAndIncrement(), 0);
// Configure thread according to class or subclass
thread.setDaemon(false);
thread.setPriority(Thread.NORM_PRIORITY);
return thread;
}
}
/**
* An extension of the {@link DefaultThreadFactory} class that makes two
* changes to the execution {@link Thread}s it generations:
* <ol>
* <li>Threads are set to be daemon threads instead of user threads.</li>
* <li>Threads execute with a priority of {@link Thread#MIN_PRIORITY} to
* make them more compatible with server environment deployments.</li>
* </ol>
* This class is provided as a convenience for subclasses to use if they
* want this (common) customization to the {@link Thread}s used internally
* by {@link AsyncScalr} to process images, but don't want to have to write
* the implementation.
*
* @author Riyad Kalla (software@thebuzzmedia.com)
* @since 4.0
*/
protected static class ServerThreadFactory extends DefaultThreadFactory {
/**
* Overridden to set <code>daemon</code> property to <code>true</code>
* and decrease the priority of the new thread to
* {@link Thread#MIN_PRIORITY} before returning it.
*/
@Override
public Thread newThread(Runnable r) {
Thread thread = super.newThread(r);
thread.setDaemon(true);
thread.setPriority(Thread.MIN_PRIORITY);
return thread;
}
}
}

2349
src/org/imgscalr/Scalr.java Normal file

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ import org.objectweb.asm.tree.ClassNode;
import the.bytecode.club.bytecodeviewer.api.ClassNodeLoader;
import the.bytecode.club.bytecodeviewer.gui.ClassViewer;
import the.bytecode.club.bytecodeviewer.gui.FileNavigationPane;
import the.bytecode.club.bytecodeviewer.gui.BootScreen;
import the.bytecode.club.bytecodeviewer.gui.MainViewerGUI;
import the.bytecode.club.bytecodeviewer.gui.RunOptions;
import the.bytecode.club.bytecodeviewer.gui.SearchingPane;
@ -67,7 +68,6 @@ import the.bytecode.club.bytecodeviewer.plugin.PluginManager;
* TODO:
*
* 3.0.0: (RETIREMENT PARTY, WOHOOO)
* maybe just do AMS5 then obfuscate the dex2jar shit.
* Add obfuscation:
* - Add integer boxing and other obfuscation methods contra implemented
* - Insert unadded/debug opcodes to try to fuck up decompilers
@ -91,11 +91,13 @@ import the.bytecode.club.bytecodeviewer.plugin.PluginManager;
* refresh appears under panes that are non refreshable
* make ez-injection plugin console show all sys.out calls
* edit then save issues?
*
* Search open doesnt append .class
* Search open doesnt append .class to tab name
*
* -----2.9.7-----:
* 07/02/2015 - Added ajustable font size.
* 07/05/2015 - Started working on the new Boot Screen.
* 07/06/2015 - Moved the font size to be under the view menu.
* 07/06/2015 - Fixed a bug with plugins not being able to grab the currently viewed class.
*
* @author Konloch
*
@ -123,6 +125,7 @@ public class BytecodeViewer {
private static String pluginsName = getBCVDirectory() + fs + "recentplugins.bcv";
public static String settingsName = getBCVDirectory() + fs + "settings.bcv";
public static String tempDirectory = getBCVDirectory() + fs + "bcv_temp" + fs;
public static String libsDirectory = getBCVDirectory() + fs + "libs" + fs;
public static String krakatauWorkingDirectory = getBCVDirectory() + fs + "krakatau_" + krakatauVersion + fs + "Krakatau-master";
private static ArrayList<String> recentFiles = DiskReader.loadArrayList(filesName, false);
private static ArrayList<String> recentPlugins = DiskReader.loadArrayList(pluginsName, false);
@ -132,11 +135,12 @@ public class BytecodeViewer {
public static ArrayList<Process> krakatau = new ArrayList<Process>();
public static Refactorer refactorer = new Refactorer();
public static boolean pingback = false;
public static boolean deleteForiegnLibraries = true;
/**
* The version checker thread
*/
private static Thread versionChecker = new Thread() {
public static Thread versionChecker = new Thread() {
@Override
public void run() {
try {
@ -294,6 +298,39 @@ public class BytecodeViewer {
}
};
public static Thread PingBack = new Thread() {
public void run() {
try {
new HTTPRequest(new URL("https://bytecodeviewer.com/add.php")).read();
} catch(Exception e) {
//ignore
}
}
};
public static void pingback() {
JOptionPane pane = new JOptionPane(
"Would you like to 'pingback' to https://bytecodeviewer.com to be counted in the global users for BCV?");
Object[] options = new String[] { "Yes", "No" };
pane.setOptions(options);
JDialog dialog = pane.createDialog(BytecodeViewer.viewer,
"Bytecode Viewer - Optional Pingback");
dialog.setVisible(true);
Object obj = pane.getValue();
int result = -1;
for (int k = 0; k < options.length; k++)
if (options[k].equals(obj))
result = k;
if (result == 0) {
try {
PingBack.start();
} catch (Exception e) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
}
}
}
/**
* Grab the byte array from the loaded Class object
* @param clazz
@ -317,6 +354,15 @@ public class BytecodeViewer {
*/
public static void main(String[] args) {
System.setSecurityManager(sm);
try {
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
} catch (Exception e) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
}
new BootScreen().DO_FIRST_BOOT(args);
}
public static void BOOT(String[] args) {
checkKrakatau();
System.out.println("https://the.bytecode.club - Created by @Konloch - Bytecode Viewer " + version);
cleanup();
@ -329,18 +375,13 @@ public class BytecodeViewer {
cleanup();
}
});
try {
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
} catch (Exception e) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
}
viewer = new MainViewerGUI();
Settings.loadGUI();
resetRecentFilesMenu();
if (viewer.chckbxmntmNewCheckItem_12.isSelected()) // start only if selected
versionChecker.start();
/*if (viewer.chckbxmntmNewCheckItem_12.isSelected()) // start only if selected
versionChecker.start();*/
viewer.setVisible(true);
System.out.println("Start up took " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
@ -350,33 +391,10 @@ public class BytecodeViewer {
openFiles(new File[] { new File(s) }, true);
}
if(!pingback) {
/*if(!pingback) {
pingback = true;
pingback();
}
}
public static void pingback() {
JOptionPane pane = new JOptionPane(
"Would you like to 'pingback' to https://bytecodeviewer.com to be counted in the global users for BCV?");
Object[] options = new String[] { "Yes", "No" };
pane.setOptions(options);
JDialog dialog = pane.createDialog(BytecodeViewer.viewer,
"Bytecode Viewer - Optional Pingback");
dialog.setVisible(true);
Object obj = pane.getValue();
int result = -1;
for (int k = 0; k < options.length; k++)
if (options[k].equals(obj))
result = k;
if (result == 0) {
try {
new HTTPRequest(new URL("https://bytecodeviewer.com/add.php")).read();
} catch (Exception e) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
}
}
}*/
}
/**
@ -569,7 +587,8 @@ public class BytecodeViewer {
public static void openFiles(final File[] files, boolean recentFiles) {
if(recentFiles)
for (File f : files)
BytecodeViewer.addRecentFile(f);
if(f.exists())
BytecodeViewer.addRecentFile(f);
BytecodeViewer.viewer.setIcon(true);
update = true;

View File

@ -77,6 +77,8 @@ public class JarUtils {
* @throws IOException
*/
public static void loadResources(final File zipFile) throws IOException {
if(!zipFile.exists())
return; //just ignore
ZipInputStream jis = new ZipInputStream(new FileInputStream(zipFile));
ZipEntry entry;
while ((entry = jis.getNextEntry()) != null) {

View File

@ -189,6 +189,7 @@ public class Settings {
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.panel2JDGUI_E.isSelected()), false);
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.panel3JDGUI_E.isSelected()), false);
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.viewer.fontSpinner.getValue()), false);
DiskWriter.writeNewLine(BytecodeViewer.settingsName, String.valueOf(BytecodeViewer.deleteForiegnLibraries), false);
} catch(Exception e) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI(e);
}
@ -376,6 +377,7 @@ public class Settings {
BytecodeViewer.viewer.panel2JDGUI_E.setSelected(Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 110, false)));
BytecodeViewer.viewer.panel3JDGUI_E.setSelected(Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 111, false)));
BytecodeViewer.viewer.fontSpinner.setValue(Integer.parseInt(DiskReader.loadString(BytecodeViewer.settingsName, 112, false)));
BytecodeViewer.deleteForiegnLibraries = Boolean.parseBoolean(DiskReader.loadString(BytecodeViewer.settingsName, 113, false));
} catch(Exception e) {
//ignore because errors are expected, first start up and outdated settings.
//e.printStackTrace();

View File

@ -34,7 +34,7 @@ public class AboutWindow extends JFrame {
getContentPane().add(txtrBytecodeViewerIs, "name_140466526081695");txtrBytecodeViewerIs.setEnabled(false);
this.setResizable(false);
this.setLocationRelativeTo(null);
}
}
@Override
public void setVisible(boolean b) {

View File

@ -0,0 +1,281 @@
package the.bytecode.club.bytecodeviewer.gui;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import java.awt.Dimension;
import java.awt.GridBagLayout;
import javax.swing.JProgressBar;
import java.awt.GridBagConstraints;
import javax.swing.JScrollPane;
import java.awt.Insets;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import javax.swing.text.html.HTMLEditorKit;
import the.bytecode.club.bytecodeviewer.BytecodeViewer;
import the.bytecode.club.bytecodeviewer.Resources;
import me.konloch.kontainer.io.HTTPRequest;
/**
* First boot, will automatically connect to BytecodeViewer for PingBack
* It'll Check BCV version
* then it'll download repos from the library
* After it's completed and compared MD5 hashes, it simply dynamically loads all jars in /libs/ folder of BCV
* While all of this is happening, it'll show the HOW-TO guide for BCV
*
* Download Failed? Corrupt Jar? Append -clean to BCV startup
*
* @author Konloch
*
*/
public class BootScreen extends JFrame {
private static final long serialVersionUID = -1098467609722393444L;
private static boolean FIRST_BOOT = false;
private JProgressBar progressBar = new JProgressBar();
public BootScreen() {
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
this.setIconImages(Resources.iconList);
setSize(new Dimension(600, 800));
setTitle("Bytecode Viewer Boot Screen - Starting Up");
GridBagLayout gridBagLayout = new GridBagLayout();
gridBagLayout.columnWidths = new int[]{0, 0};
gridBagLayout.rowHeights = new int[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
gridBagLayout.columnWeights = new double[]{1.0, Double.MIN_VALUE};
gridBagLayout.rowWeights = new double[]{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, Double.MIN_VALUE};
getContentPane().setLayout(gridBagLayout);
JScrollPane scrollPane = new JScrollPane();
GridBagConstraints gbc_scrollPane = new GridBagConstraints();
gbc_scrollPane.gridheight = 24;
gbc_scrollPane.insets = new Insets(0, 0, 5, 0);
gbc_scrollPane.fill = GridBagConstraints.BOTH;
gbc_scrollPane.gridx = 0;
gbc_scrollPane.gridy = 0;
getContentPane().add(scrollPane, gbc_scrollPane);
JEditorPane editorPane = new JEditorPane();
editorPane.setEditorKit(new HTMLEditorKit());
editorPane.setText("http://www.icesoft.org/java/home.jsf");
scrollPane.setViewportView(editorPane);
GridBagConstraints gbc_progressBar = new GridBagConstraints();
gbc_progressBar.fill = GridBagConstraints.HORIZONTAL;
gbc_progressBar.gridx = 0;
gbc_progressBar.gridy = 24;
getContentPane().add(progressBar, gbc_progressBar);
this.setLocationRelativeTo(null);
}
public void DO_FIRST_BOOT(String args[]) {
this.setVisible(true);
if(FIRST_BOOT)
return;
FIRST_BOOT = true;
boolean foundAtleastOne = false;
setTitle("Bytecode Viewer Boot Screen - Checking Libraries...");
try {
int completedCheck = 0;
List<String> urlList = new ArrayList<String>();
HTTPRequest req = new HTTPRequest(new URL("https://github.com/Konloch/bytecode-viewer/tree/master/libs"));
for(String s : req.read())
if(s.contains("href=\"/Konloch/bytecode-viewer/blob/master/libs/")) {
urlList.add("https://github.com"+s.split("<a href=")[1].split("\"")[1]);
foundAtleastOne = true;
}
if(!foundAtleastOne) {
new the.bytecode.club.bytecodeviewer.api.ExceptionUI("Bytecode Viewer ran into an issue, for some reason github is not returning what we're expecting. Please try rebooting, if this issue persists please contact @Konloch.");
return;
}
File libsDirectory = new File(BytecodeViewer.libsDirectory);
if(args.length >= 1)
if(args[0].equalsIgnoreCase("-clean"))
libsDirectory.delete();
if(!libsDirectory.exists())
libsDirectory.mkdir();
List<String> libsList = new ArrayList<String>();
List<String> libsFileList = new ArrayList<String>();
for(File f : libsDirectory.listFiles()) {
libsList.add(f.getName());
libsFileList.add(f.getAbsolutePath());
}
progressBar.setMaximum(urlList.size());
for(String s : urlList) {
String fileName = s.substring("https://github.com/Konloch/bytecode-viewer/blob/master/libs/".length(), s.length());
if(!libsList.contains(fileName)) {
setTitle("Bytecode Viewer Boot Screen - Downloading " + fileName);
boolean passed = false;
while(!passed) {
InputStream is = null;
FileOutputStream fos = null;
try {
is = new URL("https://github.com/Konloch/bytecode-viewer/raw/master/libs/" + fileName).openConnection().getInputStream();
fos = new FileOutputStream(BytecodeViewer.libsDirectory + BytecodeViewer.fs + fileName);
System.out.println("Downloading from "+s);
byte[] buffer = new byte[8192];
int len;
int downloaded = 0;
boolean flag = false;
while ((len = is.read(buffer)) > 0) {
fos.write(buffer, 0, len);
fos.flush();
downloaded += 8192;
int mbs = downloaded / 1048576;
if(mbs % 5 == 0 && mbs != 0) {
if(!flag)
System.out.println("Downloaded " + mbs + "MBs so far");
flag = true;
} else
flag = false;
}
libsFileList.add(BytecodeViewer.libsDirectory + BytecodeViewer.fs + fileName);
} finally {
try {
if (is != null) {
is.close();
}
} finally {
if (fos != null) {
fos.flush();
}
if (fos != null) {
fos.close();
}
}
}
System.out.println("Download finished!");
passed = true;
}
}
completedCheck++;
progressBar.setValue(completedCheck);
}
if(BytecodeViewer.deleteForiegnLibraries) {
setTitle("Bytecode Viewer Boot Screen - Checking & Deleting Foriegn/Outdated Libraries...");
for(String s : libsFileList) {
File f = new File(s);
boolean delete = true;
for(String urlS : urlList) {
String fileName = urlS.substring("https://github.com/Konloch/bytecode-viewer/blob/master/libs/".length(), urlS.length());
if(fileName.equals(f.getName())) {
delete = false;
}
}
if(delete) {
f.delete();
System.out.println("Detected & Deleted Foriegn/Outdated Jar/File: " + f.getName());
}
}
}
setTitle("Bytecode Viewer Boot Screen - Loading Libraries...");
for(String s : libsFileList ) {
if(s.endsWith(".jar")) {
File f = new File(s);
setTitle("Bytecode Viewer Boot Screen - Loading Library " + f.getName());
System.out.println(f.getName());
JarFile jarFile = new JarFile(s);
Enumeration<JarEntry> e = jarFile.entries();
ClassPathHack.addFile(f);
while (e.hasMoreElements()) {
JarEntry je = (JarEntry) e.nextElement();
if(je.isDirectory() || !je.getName().endsWith(".class")){
continue;
}
try {
String className = je.getName().substring(0,je.getName().length()-6);
className = className.replace('/', '.');
ClassLoader.getSystemClassLoader().loadClass(className);
} catch(java.lang.VerifyError | java.lang.ExceptionInInitializerError | java.lang.IncompatibleClassChangeError | java.lang.NoClassDefFoundError | Exception e2) {
//ignore
}
}
jarFile.close();
}
}
setTitle("Bytecode Viewer Boot Screen - Booting!");
} catch(Exception e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
e.printStackTrace();
new the.bytecode.club.bytecodeviewer.api.ExceptionUI("Bytecode Viewer ran into an error while booting, trying to force it anyways."+ BytecodeViewer.nl+ BytecodeViewer.nl+
"Please ensure you have an active internet connection and restart BCV. If this presists please visit http://github.com/Konloch/Bytecode-Viewer or http://bytecodeviewer.com"+ BytecodeViewer.nl + BytecodeViewer.nl + sw.toString());
}
setTitle("Bytecode Viewer Boot Screen - Finished");
BytecodeViewer.BOOT(args);
if(BytecodeViewer.pingback) {
BytecodeViewer.PingBack.start();
BytecodeViewer.pingback = true;
}
if(BytecodeViewer.viewer.chckbxmntmNewCheckItem_12.isSelected())
BytecodeViewer.versionChecker.start();
this.setVisible(false);
}
public static class ClassPathHack {
private static final Class<?>[] parameters = new Class[] {URL.class};
public static void addFile(File f) throws IOException {
// f.toURL is deprecated
addURL(f.toURI().toURL());
}
protected static void addURL(URL u) throws IOException {
URLClassLoader sysloader = (URLClassLoader) ClassLoader.getSystemClassLoader();
Class<?> sysclass = URLClassLoader.class;
try {
Method method = sysclass.getDeclaredMethod("addURL", parameters);
method.setAccessible(true);
method.invoke(sysloader, u);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

View File

@ -222,7 +222,6 @@ public class ClassViewer extends Viewer {
private static final long serialVersionUID = -8650495368920680024L;
ArrayList<MethodData> lnData = new ArrayList<MethodData>();
String name;
public ClassNode cn;
JSplitPane sp;
JSplitPane sp2;
public JPanel panel1Search = new JPanel(new BorderLayout());

View File

@ -1293,13 +1293,6 @@ public class MainViewerGUI extends JFrame implements FileChangeNotifier {
mnSettings.add(decodeAPKResources);
mnSettings.add(separator_36);
mnSettings.add(mnFontSize);
fontSpinner.setModel(new SpinnerNumberModel(new Integer(12), new Integer(1), null, new Integer(1)));
mnFontSize.add(fontSpinner);
mnSettings.add(separator_13);
mntmSetPythonDirectory.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent arg0) {
@ -1758,6 +1751,13 @@ public class MainViewerGUI extends JFrame implements FileChangeNotifier {
panelGroup3.add(panel3Smali);
panelGroup3.add(panel3Bytecode);
panelGroup3.add(panel3Hexcode);
mnNewMenu_6.add(separator_13);
fontSpinner.setPreferredSize(new Dimension(42, 20));
fontSpinner.setSize(new Dimension(42, 20));
fontSpinner.setModel(new SpinnerNumberModel(new Integer(12), new Integer(1), null, new Integer(1)));
mnNewMenu_6.add(mnFontSize);
mnFontSize.add(fontSpinner);
panelGroup1.setSelected(panel1Proc.getModel(), true);//my one true love

View File

@ -17,6 +17,7 @@ import the.bytecode.club.bytecodeviewer.api.Plugin;
import the.bytecode.club.bytecodeviewer.plugin.PluginLaunchStrategy;
/**
* @author Konloch
* @author Bibl (don't ban me pls)
* @created 1 Jun 2015
*/