RandomStringGenerator.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.IntUnaryOperator;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
/**
* Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to
* define the properties of the generator. See the documentation for the {@link Builder} class to see available properties.
*
* <pre>
* // Generates a 20 code point string, using only the letters a-z
* RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build();
* String randomLetters = generator.generate(20);
* </pre>
* <pre>
* // Using Apache Commons RNG for randomness
* UniformRandomProvider rng = RandomSource.create(...);
* // Generates a 20 code point string, using only the letters a-z
* RandomStringGenerator generator = RandomStringGenerator.builder()
* .withinRange('a', 'z')
* .usingRandom(rng::nextInt)
* .build();
* String randomLetters = generator.generate(20);
* </pre>
* <p>
* {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method
* {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally.
* </p>
*
* @since 1.1
*/
public final class RandomStringGenerator {
/**
* A builder for generating {@code RandomStringGenerator} instances.
*
* <p>
* The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the
* methods defined in this class, prior to calling {@link #build()}.
* </p>
* <p>
* All the property setting methods return the {@code Builder} instance to allow for method chaining.
* </p>
* <p>
* The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and
* {@link Character#MAX_CODE_POINT} respectively.
* </p>
* <p>
* The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used.
* </p>
* <p>
* The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied
* to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used
* predicates are provided by the {@link CharacterPredicates} enum.
* </p>
* <p>
* This class is not thread safe.
* </p>
*
* @since 1.1
*/
public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
/**
* The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
* ({@value}).
*/
public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
/**
* The default string length produced by this builder: {@value}.
*/
public static final int DEFAULT_LENGTH = 0;
/**
* The default minimum code point allowed: {@value}.
*/
public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
/**
* The minimum code point allowed.
*/
private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
/**
* The maximum code point allowed.
*/
private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
/**
* Filters for code points.
*/
private Set<CharacterPredicate> inclusivePredicates;
/**
* The source of randomness.
*/
private IntUnaryOperator random;
/**
* The source of provided characters.
*/
private Set<Character> characterSet = new HashSet<>();
/**
* Whether calls accumulates the source of provided characters. The default is {@code false}.
*/
private boolean accumulate;
/**
* Creates a new instance.
*/
public Builder() {
// empty
}
/**
* Builds a new {@code RandomStringGenerator}.
*
* @return A new {@code RandomStringGenerator}
* @deprecated Use {@link #get()}.
*/
@Deprecated
@Override
public RandomStringGenerator build() {
return get();
}
/**
* Limits the characters in the generated string to those that match at least one of the predicates supplied.
*
* <p>
* Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
* will replace the previously stored predicates.
* </p>
*
* @param predicates the predicates, may be {@code null} or empty.
* @return {@code this} instance.
*/
public Builder filteredBy(final CharacterPredicate... predicates) {
if (ArrayUtils.isEmpty(predicates)) {
inclusivePredicates = null;
return this;
}
if (inclusivePredicates == null) {
inclusivePredicates = new HashSet<>();
} else {
inclusivePredicates.clear();
}
Collections.addAll(inclusivePredicates, predicates);
return this;
}
/**
* Builds a new {@code RandomStringGenerator}.
*
* @return A new {@code RandomStringGenerator}.
* @since 1.12.0
*/
@Override
public RandomStringGenerator get() {
return new RandomStringGenerator(this);
}
private void initCharList() {
if (!accumulate) {
characterSet = new HashSet<>();
}
}
/**
* Limits the characters in the generated string to those who match at supplied list of Character.
*
* <p>
* Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
* will replace the previously stored Character.
* </p>
*
* @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty
* @return {@code this} instance.
* @since 1.2
*/
public Builder selectFrom(final char... chars) {
initCharList();
if (chars != null) {
for (final char c : chars) {
characterSet.add(c);
}
}
return this;
}
/**
* Sets whether calls accumulates the source of provided characters. The default is {@code false}.
*
* <pre>
* {@code
* RandomStringGenerator gen = RandomStringGenerator.builder()
* .setAccumulate(true)
* .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } })
* .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[',
* '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation
* // additional builder calls as needed
* .build();
* }
* </pre>
*
* @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}.
* @return {@code this} instance.
* @since 1.14.0
*/
public Builder setAccumulate(final boolean accumulate) {
this.accumulate = accumulate;
return this;
}
/**
* Overrides the default source of randomness. It is highly recommended that a random number generator library like
* <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
*
* <p>
* {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
* </p>
*
* <pre>
* {@code
* UniformRandomProvider rng = RandomSource.create(...);
* RandomStringGenerator gen = RandomStringGenerator.builder()
* .usingRandom(rng::nextInt)
* // additional builder calls as needed
* .build();
* }
* </pre>
*
* <p>
* Passing {@code null} to this method will revert to the default source of randomness.
* </p>
*
* @param random the source of randomness, may be {@code null}.
* @return {@code this} instance.
* @since 1.14.0
*/
public Builder usingRandom(final IntUnaryOperator random) {
this.random = random;
return this;
}
/**
* Overrides the default source of randomness. It is highly recommended that a random number generator library like
* <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
*
* <p>
* {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
* </p>
*
* <pre>
* {@code
* UniformRandomProvider rng = RandomSource.create(...);
* RandomStringGenerator gen = RandomStringGenerator.builder()
* .usingRandom(rng::nextInt)
* // additional builder calls as needed
* .build();
* }
* </pre>
*
* <p>
* Passing {@code null} to this method will revert to the default source of randomness.
* </p>
*
* @param random the source of randomness, may be {@code null}.
* @return {@code this} instance.
*/
public Builder usingRandom(final TextRandomProvider random) {
this.random = random;
return this;
}
/**
* Sets the array of minimum and maximum char allowed in the generated string.
*
* For example:
*
* <pre>
* {@code
* char[][] pairs = { { '0', '9' } };
* char[][] pairs = { { 'a', 'z' } };
* char[][] pairs = { { 'a', 'z' }, { '0', '9' } };
* }
* </pre>
*
* @param pairs array of characters array, expected is to pass min, max pairs through this arg.
* @return {@code this} instance.
*/
public Builder withinRange(final char[]... pairs) {
initCharList();
if (pairs != null) {
for (final char[] pair : pairs) {
Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
final int minimumCodePoint = pair[0];
final int maximumCodePoint = pair[1];
Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
maximumCodePoint);
for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
characterSet.add((char) index);
}
}
}
return this;
}
/**
* Sets the minimum and maximum code points allowed in the generated string.
*
* @param minimumCodePoint the smallest code point allowed (inclusive).
* @param maximumCodePoint the largest code point allowed (inclusive).
* @return {@code this} instance.
* @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}.
* @throws IllegalArgumentException if {@code minimumCodePoint < 0}.
* @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}.
*/
public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
maximumCodePoint);
Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
this.minimumCodePoint = minimumCodePoint;
this.maximumCodePoint = maximumCodePoint;
return this;
}
}
/**
* Constructs a new builder.
*
* @return a new builder.
* @since 1.11.0
*/
public static Builder builder() {
return new Builder();
}
/**
* The smallest allowed code point (inclusive).
*/
private final int minimumCodePoint;
/**
* The largest allowed code point (inclusive).
*/
private final int maximumCodePoint;
/**
* Filters for code points.
*/
private final Set<CharacterPredicate> inclusivePredicates;
/**
* The source of randomness for this generator.
*/
private final IntUnaryOperator random;
/**
* The source of provided characters.
*/
private final List<Character> characterList;
/**
* Constructs the generator.
*
* @param minimumCodePoint smallest allowed code point (inclusive).
* @param maximumCodePoint largest allowed code point (inclusive).
* @param inclusivePredicates filters for code points.
* @param random source of randomness.
* @param characterSet list of predefined set of characters.
*/
private RandomStringGenerator(final Builder builder) {
this.minimumCodePoint = builder.minimumCodePoint;
this.maximumCodePoint = builder.maximumCodePoint;
this.inclusivePredicates = builder.inclusivePredicates;
this.random = builder.random;
this.characterList = new ArrayList<>(builder.characterSet);
}
/**
* Generates a random string, containing the specified number of code points.
*
* <p>
* Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned,
* although the resulting string may contain pairs of surrogates that together encode a supplementary character.
* </p>
* <p>
* Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the
* {@link Character} documentation to understand how Java stores Unicode values.
* </p>
*
* @param length the number of code points to generate.
* @return The generated string.
* @throws IllegalArgumentException if {@code length < 0}.
*/
public String generate(final int length) {
if (length == 0) {
return StringUtils.EMPTY;
}
Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
final StringBuilder builder = new StringBuilder(length);
long remaining = length;
do {
final int codePoint;
if (characterList != null && !characterList.isEmpty()) {
codePoint = generateRandomNumber(characterList);
} else {
codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
}
switch (Character.getType(codePoint)) {
case Character.UNASSIGNED:
case Character.PRIVATE_USE:
case Character.SURROGATE:
continue;
default:
}
if (inclusivePredicates != null) {
boolean matchedFilter = false;
for (final CharacterPredicate predicate : inclusivePredicates) {
if (predicate.test(codePoint)) {
matchedFilter = true;
break;
}
}
if (!matchedFilter) {
continue;
}
}
builder.appendCodePoint(codePoint);
remaining--;
} while (remaining != 0);
return builder.toString();
}
/**
* Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points.
*
* @param minLengthInclusive the minimum (inclusive) number of code points to generate.
* @param maxLengthInclusive the maximum (inclusive) number of code points to generate.
* @return The generated string.
* @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}.
* @see RandomStringGenerator#generate(int)
* @since 1.2
*/
public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive,
minLengthInclusive);
return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
}
/**
* Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
*
* @param minInclusive the minimum value allowed.
* @param maxInclusive the maximum value allowed.
* @return The random number.
*/
private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
if (random != null) {
return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive;
}
return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
}
/**
* Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
*
* @param characterList predefined char list.
* @return The random number.
*/
private int generateRandomNumber(final List<Character> characterList) {
final int listSize = characterList.size();
if (random != null) {
return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0);
}
return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
}
}