/* * [TestCollisionProbability.java] * * Summary: Calculate theoretical and simulated probability of generating a non-unique random names. * * Copyright: (c) 2009-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2009-01-01 initial version */ package com.mindprod.example; import com.mindprod.fastcat.FastCat; import java.util.BitSet; import java.util.Random; import static java.lang.System.*; /** * Calculate theoretical and simulated probability of generating a non-unique random names. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2009-01-01 initial version * @since 2009-01-01 */ public final class TestCollisionProbability { /** * maximum number of acceptable trials */ private static final int MAX_TRIALS = 1000000; /** * minimum number of acceptable trials */ private static final int MIN_TRIALS = 100; /** * we can't compute accurately above this limit on n */ private static final long APPROXIMATING_LIMIT_FOR_N = 100000000L; /** * we can't simulate above this limit on m */ private static final long SIMULATING_LIMIT_FOR_M = 10000000L; /** * we can't simulate above this limit on n */ private static final long SIMULATING_LIMIT_FOR_N = 1000000L; /** * sun's random number generator to use in simulations */ private static final Random wheel = new Random(); /** * Calculate the probability for a true random number generator avoiding collisions. * * @param n count of names generated * @param m count of possible names * * @return probability of no collisions */ private static double calculateTheoreticalProbability( long n, long m ) { double probabilityOfNoCollision = 1.0d; // loop would fail utterly for n= Long.MAX_VALUE and would never // complete for very large n. for ( long i = 0; i < n; i++ ) { probabilityOfNoCollision *= ( double ) ( m - i ) / m; } return probabilityOfNoCollision; } /** * Display the probability for a random number generator avoiding collisions. * * @param probabilityOfNoCollision probability of no collisions 0..1 * @param description sort of generator used. * @param n count of names generated * @param m count of possible names */ private static void display( double probabilityOfNoCollision, String description, long n, long m ) { double probabilityOfCollision = 1.0d - probabilityOfNoCollision; out.println( "Generating n=" + n + " Strings of m=" + m + " possible Strings\n" + "that you could generate with " + description + ",\n" + "the probability of collision is " + probabilityOfCollision + "\n" + "where 0 is never, and 1 is always," ); if ( probabilityOfCollision <= 0.0 ) { out.println( "in other words, don't worry about collisions." ); } else if ( probabilityOfCollision >= 1.0 ) { out.println( "in other words, collisions will always happen." ); } else { out.println( "or one in " + 1 / probabilityOfCollision ); } } /** * Display some random strings where m = Long.MAX_VALUE+1 */ private static void displaySamples() { /* * here is how you might generate random strings in practice. Note there * is no such method as Random.nextLong( long ), so you have to trim the * result to 0..Long.MAX_VALUE yourself. */ /* * to get strings with letters digits 0-9 try this: */ out.println( "Sample random decimal string: " + Long.toString( wheel.nextLong() & Long .MAX_VALUE ) ); /* to get strings with letters a-f and digits 0-9 try this: */ out.println( "Sample random hex string: " + Long.toHexString( wheel .nextLong() & Long .MAX_VALUE ) ); /* to get strings with letters \\p{Lower} and digits 0-9 try this: */ out.println( "Sample random alphabetic string: " + Long.toString( wheel.nextLong() & Long .MAX_VALUE, 36 ) ); } /** * Estimate probability for a true random number generator avoiding collisions. for large n this would take * impossibly long to compute, so we approximate to get a pessimistic number. * * @param n count of names generated * @param m count of possible names * * @return probability of no collisions */ private static double estimateTheoreticalProbability( long n, long m ) { out.println( "approximating..." ); return Math.pow( ( double ) ( m - n ) / m, n ); } /** * Estimate probability for a true random number generator avoiding collisions. for large n this would take * impossibly long to compute, so we approximate to get a pessimistic number. * * @param n count of names generated * @param m count of possible names * @param trials number of trials in simulation * * @return probability of no collisions */ private static double simulateActualProbability( long n, long m, int trials ) { assert trials >= MIN_TRIALS : "not enough trials"; assert trials <= MAX_TRIALS : "too many trials"; assert n <= SIMULATING_LIMIT_FOR_N : "n too large"; assert m <= SIMULATING_LIMIT_FOR_M : "m too large"; assert SIMULATING_LIMIT_FOR_N < Integer .MAX_VALUE : "SIMULATING_LIMIT_FOR_N too large."; assert SIMULATING_LIMIT_FOR_M < Integer .MAX_VALUE : "SIMULATING_LIMIT_FOR_M too large."; final FastCat sb = new FastCat( 10 ); sb.append( "\nSimulating ", trials, " trials of n=", n, " numbers " ); sb.append( "each in range 0 .. ", m - 1, " where m=", m, "." ); out.println( sb.toString() ); int m1 = ( int ) m; // will consume roughly m/8 bytes. BitSet b = new BitSet( m1 ); int collisions = 0; for ( int t = 0; t < trials; t++ ) { b.clear(); for ( int i = 0; i < n; i++ ) { int poss = wheel.nextInt( m1 ); if ( b.get( poss ) ) { // oops had a collision this trials. collisions++; break; } else { b.set( poss ); } } } return ( double ) ( trials - collisions ) / trials; } /** * main Calculate theoretical probability of generating a non-unique random hex name in n Strings of a set of m * possibilities via
private static Random wheel = new Random(); // seed with elapsedTime of day
...
* String almostUniqueAndRandom =
Long.toHexString( wheel.nextLong() & * Long.MAX_VALUE);
you get * Strings of form "0" .. "7fffffffffffffff", a maximum 16 characters long.

try out with:
java * com.mindprod.example.TestCollisionProbability 20000 9223372036854775807 1000
or to see a simulation, try * something smaller like
java com.mindprod.example.TestCollisionProbability 200 10000000 1000
* * @param args n number of Strings you plan to generate
m string you will generate over the range 0..m *

* If you are generating 32-bit hashcodes use m=4_294_967_296 * If you are using 60-bit hashcodes, use m=1_152_921_504_606_846_976 * 64-bit hash codes will overamp this program. * If you are using it to solwe the birthday problem use n=number of friends m=365. *
* trials how many sets of n numbers to generate in the simulation */ public static void main( String[] args ) { // n = how many "unique" Strings you plan to generate if ( args.length != 3 ) { throw new IllegalArgumentException( "You must have n, the number of generated Strings and m, the number of possible Strings, and t, " + "the number of trials on the command line." ); } long n = Long.parseLong( args[ 0 ] ); long m = Long.parseLong( args[ 1 ] ); int trials = Integer.parseInt( args[ 2 ] ); if ( !( 0 < n && n < m ) ) { throw new IllegalArgumentException( "You must have 0 < n < m" ); } if ( !( MIN_TRIALS <= trials && trials <= MAX_TRIALS ) ) { throw new IllegalArgumentException( "Must have " + MIN_TRIALS + " <= trials <= " + MAX_TRIALS ); } // Show some typical random strings displaySamples(); double probabilityOfNoCollision; if ( n <= APPROXIMATING_LIMIT_FOR_N ) { probabilityOfNoCollision = calculateTheoreticalProbability( n, m ); } else { probabilityOfNoCollision = estimateTheoreticalProbability( n, m ); } display( probabilityOfNoCollision, "a true number generator", n, m ); if ( n <= SIMULATING_LIMIT_FOR_N && m <= SIMULATING_LIMIT_FOR_M ) { probabilityOfNoCollision = simulateActualProbability( n, m, trials ); display( probabilityOfNoCollision, "Sun's java.util.Random pseudo-random number generator", n, m ); } else { out.println( "The problem too big to simulate." ); } } // end main }