/* * [TestSignum.java] * * Summary: example use of java.Long.signum and alternative implementations. * * Copyright: (c) 2009-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.1 2006-02-01 use System.nanotime instead of System.CurrentTimeInMillis add wibble2 alogorithm * 1.2 2006-02-02 rename wibble2 to Kobzda, add signHalf * 1.3 2006-02-03 add piotr, more encapsulated conformance tests. * 1.4 2006-02-18 run entire competition twice to ensure all optimisation complete in for the * second "warm" heat. * add percent of cpuTimeToBeatInNanos * add commas to numeric printouts * cleaned up typos in comments. * 1.5 2006-02-19 redefine speed % so bigger is better. 100% = top contender so far. Unfortunately * it obsoletes old values when there is a new champ, so likely will leave value as is for while even if * someone gets over 100% on my hardware. * Remove benchmark results from program listings. * 1.6 2006-02-19 give optimisers a rest period to finish analysis and optimising after the warmup. * Emit HTML summary of results. * add lohi32 algorithm * 1.7 2006-02-20 drop sgn and Stephan from timing trials since they don't compute the correct results. The * code is still there if you want to hook it up again in trial. They are still part of the conformance * testing. * add glossary * adjust to discount timing overheadInNanos. * refactor to move code into BenchmarkMeasurement class, rename methods and variables * changed definition of iterations, so counts actual number of signum calls, makes it. * added elapsed vs cpu elapsedTime, and per-call elapsedTime to displays. * specify elapsedTime to beat is per-call elapsedTime. * 1.8 2006-02-22 add JET's machine language solution to piotr and lohi. JET got both perfect. Explanation * of why piotr beat lohi even though it takes theoretically one more machine cycle. */ package com.mindprod.example; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Random; import static java.lang.System.*; /** * example use of java.Long.signum and alternative implementations. *

* Compare ten different ways of doing a signum. *

* Results of benchmarks to find the fastest Long.signum implementation. *

* BenchmarkMeasurement results are posted at http://mindprod.com/jgloss/benchmark.html Terminolgy used here: *

*

*

benchmark
*
one running of this program on a given hardware/software combo. Obviously you can't do more * than one benchmark per run.
*

*

trial
*
a cold or warm set of measurements.
*

*

cold
*
a trial done immediately after starting the runtime.
*

*

warm
*
a trial done after exercising the runtime * and letting it rest, so that it has opportunity to do on-the-fly optimisation before you clock it.
*

*

measurement
*
one candidate algorithm tested over a period of seconds with repeated executions of a TEST * suite.
*

*

elapsed time
*
wall clock time.
*

*

cpu time
*
time crudely adjusted to remove cpu overhead of measuring.
*

*

call ns
*
cpu time in ns, (not clock cycles) for one signum calculation, just * a few nanoseconds. For a very fast machine, this number could be less than 1.
*

*

speed
*
speed index, aka SI. HowToProcess fast this condender is relative to the record. The record holder should be * about 100%, others * proportionately less.
*
* * @author Roedy Green, Canadian Mind Products * @version 1.8 2006-02-22 add JET's machine language solution to piotr and lohi. JET got both perfect. Explanation * of why piotr beat lohi even though it takes theoretically one more machine cycle. * @since 2009 */ @SuppressWarnings( { "UnusedDeclaration", "JavaDoc" } ) final class TestSignum { /** * how many times to run the conformance tests 20,000,000 = 20 million times, number of SIGNUM invocations to handle * conformance tests. */ private static final int SIGNUM_CONFORMANCE_INVOCATIONS = 20000000; /** * how many times to run each set of 21 diff values for one candidate. Computed in static init. */ private static final int suiteIterations; /** * HowToProcess long to rest after the warm up trial to give background optimisers elapsedTime to complete their * analysis. */ private static final long REST_PERIOD_IN_MILLIS = 10000; /** * how many times to invoke signum for each candidate in a trial times. It can be bigger than Integer.MAX_VALUE, but * suiteIterations which depends on it, may not. suggest 2,000,000,000L for a live TEST, 20,000,000L to TEST. */ private static final long SIGNUM_INVOCATIONS = 2000000000L; /** * crucial corner values to TEST for conformance. */ @SuppressWarnings( { "NumericOverflow" } ) private static final long testSuiteValues[] = { Long.MIN_VALUE, Long.MIN_VALUE + 1, Long.MIN_VALUE + Integer.MIN_VALUE - 2, Integer.MIN_VALUE - 1, Integer.MIN_VALUE, Integer.MIN_VALUE + 1, Integer.MIN_VALUE + 2, -2, -1, 0, 1, 2, Integer.MAX_VALUE - 2, Integer.MAX_VALUE - 1, Integer.MAX_VALUE, Integer.MAX_VALUE + 1, Integer.MAX_VALUE + 2, Long.MAX_VALUE - 2, Long.MAX_VALUE - 1, Long.MAX_VALUE }; // 0 decimal places, commas, rounded, locale-sensitive. private static final DecimalFormat df0 = new DecimalFormat( "#,##0" ); // 3 digits, left-zero, 2 decimal places, rounded, locale-sensitive. private static final DecimalFormat df2 = new DecimalFormat( "#,##0.00" ); /** * where we accumulate the HTML version of the results. */ private static final StringBuilder html = new StringBuilder( 3000 ); /** * best elapsedTime so far. Contestants are rated as a percent of that performance If you change the number of * iterations. Defined later in static init. Constast for entire run. */ private static final double cpuTimeToBeatInNanos; /** * reigning champ's best elapsedTime to complete one signum call in fractional ns. */ private static final double NANOS_PER_CALL_TO_BEAT = 1.22d; /** * dummy variable stop removal of code by optimisation. *

* public helps discourage the optimiser a little. */ private static int globalSum; /** * elapsedTime per candidate in nanoseconds spent with dummy algorithm that does nothing. It is recomputed for each * trial, after all other measurements done, but before we display the results. We would rather underestimate than * overestimate it. It represents the elapsedTime spent looping, fetching operands, toting up the results, call/ret * to signum, in other words all but computing the signum itself. */ private static long overheadInNanos; /** * compute other run invariants now we have configurables nailed down. This * code must follow static finals. */ static { // compute suiteIterations, number of TEST suites to run. long proposedSuiteIterations = SIGNUM_INVOCATIONS / testSuiteValues.length; if ( proposedSuiteIterations >= Integer.MAX_VALUE ) { throw new IllegalArgumentException( "SIGNUM_INVOCATIONS is too big." ); } suiteIterations = ( int ) proposedSuiteIterations; cpuTimeToBeatInNanos = ( long ) ( NANOS_PER_CALL_TO_BEAT * SIGNUM_INVOCATIONS ); } /** * TEST all four signum-like algorithms for conformance. To pass must return a + int for a positive long, a -ve int * for a negative long and 0 for a zero long. * * @param diff number to be collapsed to an int preserving sign and zeroness. */ private static void checkConformanceOfAllCandidates( long diff ) { // overhead deliberately does not conform. We leave item out. checkConformanceOfOneValue( kobzda( diff ), diff, "kobzda" ); checkConformanceOfOneValue( lohi32( diff ), diff, "lohi32" ); checkConformanceOfOneValue( piotr( diff ), diff, "piotr" ); checkConformanceOfOneValue( sgn( diff ), diff, "sgn" ); checkConformanceOfOneValue( signHalf( diff ), diff, "signHalf" ); checkConformanceOfOneValue( signOf( diff ), diff, "signOf" ); checkConformanceOfOneValue( stephan( diff ), diff, "stephan" ); checkConformanceOfOneValue( Long.signum( diff ), diff, "Sun Long.signum" ); checkConformanceOfOneValue( twoifs( diff ), diff, "twoifs" ); checkConformanceOfOneValue( wibble( diff ), diff, "Wibble" ); } /** * check that algorithm conforms to the gold standard signum with two ifs * * @param answer answer TEST algorithm gave * @param diff TEST number * @param algorithm name of algorithm */ private static void checkConformanceOfOneValue( int answer, long diff, String algorithm ) { // we don't insist on exact signum answer only than signum of answer be // correct. if ( twoifs( answer ) != twoifs( diff ) ) { err.println( algorithm + " fails at " + df0.format( diff ) + " giving " + df0.format( answer ) ); } } /** * emit HTML to head this trail * * @param platform short name for the platform * @param hardwarePlatform description of the hardware * @param softwarePlatform description of the software, e.g. OS, JVM */ private static void echoCommandLineHTML( String platform, String hardwarePlatform, String softwarePlatform ) { html.append( "" ); html.append( "\n" ); html.append( "" ); html.append( ""java.exe"\n" ); html.append( "com.mindprod.example.TestSignum\n" ); html.append( """ ); html.append( platform ); html.append( ""\n" + """ ); html.append( hardwarePlatform ); html.append( ""\n" + """ ); html.append( softwarePlatform ); html.append( ""\n" + "\n" ); } /** * emit HTML to head this trail * * @param platform short name for the platform * @param hardwarePlatform description of the hardware * @param softwarePlatform description of the software, e.g. OS, JVM * @param temperature usually "Warm" or "Cold" or any other descriptive String for benchmark. */ private static void emitTrialHeaderHTML( String platform, String hardwarePlatform, String softwarePlatform, String temperature ) { html.append( "\n" ); html.append( platform ); html.append( "\n" + "" ); html.append( hardwarePlatform ); html.append( "\n" + "" ); html.append( softwarePlatform ); html.append( "\n" + "" ); html.append( temperature ); html.append( "\n" + "\n" ); html.append( BenchmarkMeasurement.toHTMLHeading() ); } /** * aux method for stephan * * @param left first of two numbers to compare * @param right second of two number to compare * * @return 1 if first is less than the second. */ private static int isLessThan( final long left, final long right ) { return ( int ) -( ( left - right ) >> 63 ); } /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Peter Kobzda, who came up with it 17 hours before Wibble, whom I earlier gave the attribution to. */ private static int kobzda( long diff ) { // the beauty of this method is most of the work is in int, not long // which works well on 32-bit machines. return ( int ) ( diff >>> 32 ) | ( int ) diff >>> 1 | ( int ) diff & 1; } /** * Pads the string out to the given length by applying blanks on the left. Method stolen and simplified from * com.mindprod.common18.ST to make this class standalone for pedagogical purposes. * * @param s String to be padded/chopped. * @param newLen length of new String desired. * @param chop true if Strings longer than newLen should be truncated to newLen chars. * * @return String padded on left/chopped to the desired length. */ @SuppressWarnings( { "SameParameterValue" } ) private static String leftPad( String s, int newLen, boolean chop ) { int grow = newLen - s.length(); if ( grow <= 0 ) { if ( chop ) { return s.substring( 0, newLen ); } else { return s; } } else { // com.mindprod.common18.ST handles problem of out of range // subscript here. return " ".substring( 0, grow ) + s; } } // end leftPad /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Roedy Green, Canadian Mind Products */ private static int lohi32( long diff ) { /** *

         *   I designed this algorithm with the 32-bit Pentium architecture in mind.
         *   It makes no sense for a 64-bit machine.
         *   I am trying to coax the optimiser into generating inline
         *   Pentium code like this that would take only 3 clock cycles:
         *   diff=edx:eax  result=eax
         *   TEST edx,edx ; TEST hi 32 bits
         *   jz isZero    ; lo is the result, already eax=lo
         *   mov eax,edx  ; result is hi
         * isZero:
         *  JET used the stack rather than edx:eax for parameter passing,
         *  but other than that it hit got my intended algorithm bang on.
         * 
*/ final int hi = ( int ) ( diff >>> 32 ); if ( hi != 0 ) { return hi; } else { return ( int ) diff; } } /** * Time the Kobzda signum implementation, formerly known as wibble2 * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureKobzda() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += kobzda( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the lohi32 signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureLoHi32() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += lohi32( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the how much overheadInNanos there is in measuring * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureOverhead() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { // Overhead will be underestimated if compiler // precomputes localSum. // We would prefer to underestimate that overestimate. // We are sightly overestimating the overheadInNanos by // including an // long to int conversion, which should be optimised away. // If we were to leave item out, the overheadInNanos of // maintaining diff // for the loop would be lost. localSum += ( int ) diff; } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the piotr signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measurePiotr() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += piotr( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the signOf signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ @SuppressWarnings( "unused" ) private static long measureSgn() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += sgn( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the signHalf signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureSignHalf() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += signHalf( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the signOf signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureSignOf() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += signOf( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the Stephan's signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ @SuppressWarnings( "unused" ) private static long measureStephan() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += stephan( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the sun's signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureSun() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { // Sun's Long.signum is implemented as: // (int) ((i >> 63) | (-i >>> 63)); localSum += Long.signum( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the standard signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureTwoifs() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += twoifs( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * Time the wibble signum implementation * * @return nanoseconds elapsed time to run the testSuite for this candidate. */ private static long measureWibble() { int localSum = 0; long start = System.nanoTime(); for ( int i = 0; i < suiteIterations; i++ ) { for ( long diff : testSuiteValues ) { localSum += wibble( diff ); } } globalSum += localSum; return System.nanoTime() - start; } /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. In Pentium assembler * you could implement this algorthm with following code: *

*

*

*

     *  diff = edx:eax result = eax
     *  mov ebx,eax
     *  shl eax,1
     *  or  eax,ebx
     *  slr eax,1
     *  or  eax,edx
     *  which would take 5 cycles, 2 more that lohi.  However, JET did even
     * better,
     *  with code essentially this using a clever trick to implement piotr.
     *   lea    ecx,0(eax,eax)  ; shifts lo left by doubling, keeps copy of lo
     *   or     eax,ecx
     *   shr    eax,1
     *   or     eax,edx
     *  This is 4 cycles, still one more than lohi. Why was Piotr so much
     * faster
     * on JET?
     *  Piotr has no pipeline-confounding jumps. Further, the lo then high
     * operands actually
     *  come from the ram-based stack. Piotr nicely separates the accesses
     * giving plenty of
     *  for pre-emptive fetch of hi. lohi insists on having them both upfront,
     * so it has to wait
     *  for memory access. Piotr does not have to wait.
     *  Modern CPUS hurry up and wait for RAM most of the time.
     * 
* * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Peter Kobzda */ private static int piotr( long diff ) { return ( int ) ( diff >>> 32 ) | ( ( int ) diff | ( int ) diff << 1 ) >>> 1; } /** * Thomas Hawtin's algorithm Alternate to signum for use in compare. Not a true signum, since it returns ints other * than +-1. Where there is any possibility of overflow, you should compare two longs with < rather than * subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Thomas Hawtin */ private static int sgn( long diff ) { return ( ( int ) ( diff >> 63 ) ) | ( ( int ) ( ( ~diff ) >>> 63 ) ); } /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Peter Kobzda */ private static int signHalf( long diff ) { return ( diff <= 0 ) ? ( int ) ( diff >>> 32 ) : 1; } /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author Roedy Green, Canadian Mind Products */ private static int signOf( long diff ) { return ( diff != 0 ) ? ( ( int ) ( diff >>> 32 ) | 1 ) : 0; } /** * stephan Ram's signum algorithm * * @param diff usually represents the difference of two long. * * @return signum of diff, +1, 0 or -1. * @author Stephan Ram */ private static int stephan( long diff ) { return isLessThan( 0, diff ) - isLessThan( diff, 0 ); } /** * Run a suite of tests on all candidates, both conformance and speed. We run this twice. * * @param platform short description of platform * @param hardwarePlatform description of the hardware * @param softwarePlatform description of the software, e.g. OS, JVM * @param temperature usually "Warm" or "Cold" or any other descriptive String for benchmark. */ private static void trial( String platform, String hardwarePlatform, String softwarePlatform, String temperature ) { out.println(); out.println( "---- " + hardwarePlatform + " : " + softwarePlatform + " : " + temperature + " ----" ); emitTrialHeaderHTML( platform, hardwarePlatform, softwarePlatform, temperature ); out.println( "Checking conformance on crucial corner values..." ); for ( long diff : testSuiteValues ) { checkConformanceOfAllCandidates( diff ); } out.println( "Checking conformance on " + df0.format( SIGNUM_CONFORMANCE_INVOCATIONS ) + " random longs..." ); Random wheel = new Random(); for ( int i = 0; i < SIGNUM_CONFORMANCE_INVOCATIONS; i++ ) { checkConformanceOfAllCandidates( wheel.nextLong() ); } out.println( "Running timing tests with " + df0.format( SIGNUM_INVOCATIONS ) + " signum invocations per candidate." ); ArrayList results = new ArrayList<>( 15 ); results.add( new BenchmarkMeasurement( measureKobzda(), "kobzda" ) ); results.add( new BenchmarkMeasurement( measureLoHi32(), "lohi32" ) ); results.add( new BenchmarkMeasurement( measurePiotr(), "piotr" ) ); // sgn gives wrong answers // results.add( new BenchmarkMeasurement( measureSgn(), "sgn" ) ); results.add( new BenchmarkMeasurement( measureSignHalf(), "signHalf" ) ); results.add( new BenchmarkMeasurement( measureSignOf(), "signOf" ) ); // stephan gives wrong answers // results.add( new BenchmarkMeasurement( measureStephan(), "stephan" ) // ); results .add( new BenchmarkMeasurement( measureSun(), "Sun Long.signum" ) ); results.add( new BenchmarkMeasurement( measureTwoifs(), "twoifs" ) ); results.add( new BenchmarkMeasurement( measureWibble(), "wibble" ) ); // finish off line of dots out.println(); // measure elapsedTime spent with dummy algorithm that does nothing // recomputed for each trial overheadInNanos = measureOverhead(); out.println( "Timing overheadInNanos, (included below in elapsed time but not cpu time),\n" + "is " + df0.format( overheadInNanos ) + " nanoseconds per candidate." ); /* sort fastest first, in ascending order my elapsedTime. */ Collections.sort( results ); /* print BenchmarkMeasurement results, fastest first */ out.println( BenchmarkMeasurement.toStringHeading() ); for ( BenchmarkMeasurement result : results ) { // display results for a contender out.println( result ); // prepare html version of results for a contender html.append( result.toHTML() ); } out.println( "finished trial" ); } /** * Collapse number down to +1 0 or -1 depending on sign. Typically used in compare routines to collapse a difference * of two longs to an int. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return true signum of diff, +1, 0 or -1. * @author Roedy Green, Canadian Mind Products */ private static int twoifs( long diff ) { if ( diff > 0 ) { return 1; } if ( diff < 0 ) { return -1; } else { return 0; } } /** * alternate to signum for use in compare. Not a true signum, since it returns ints other than +-1. Where there is * any possibility of overflow, you should compare two longs with < rather than subtraction. * * @param diff number to be collapsed to an int preserving sign and zeroness. usually represents the difference of * two long. * * @return sign of diff, some -ve int, 0 or some -ve int. * @author wibble */ private static int wibble( long diff ) { return ( int ) ( diff >>> 32 ) | ( ( int ) diff >>> 1 ) & Integer.MAX_VALUE | ( int ) diff & 1; } /** * Test harness to compare the various signum methods. * * @param args * [0] = platform parameter in quotes, a short description of the platform you are testing, e.g. JET. *

* [1] = hardware platform parameter in quotes, a description of the platform you are testing. *

* e.g. "Athlon XP 2000+ 1.692 GHz 512MB" *

* [2] = software platform description e.g. "Win2K Sun java.exe 1.5 -server" */ public static void main( String[] args ) { if ( args.length != 3 ) { throw new IllegalArgumentException( "Must have " + "\"short platform desc\" " + "\"hardware platform desc\" " + "\"software platform desc\" " + "on command line." ); } String platform = args[ 0 ].trim(); String hardwarePlatform = args[ 1 ].trim(); String softwarePlatform = args[ 2 ].trim(); echoCommandLineHTML( platform, hardwarePlatform, softwarePlatform ); // run a cold set of tests, HotSpot style optimisers will kick in // presumably some elapsedTime during this TEST, or during the rest // after they have gathered enough statistics. trial( platform, hardwarePlatform, softwarePlatform, "Cold" ); // Rest for a short while to give background, low priority optimising // threads elapsedTime to catch up and optimise. out.println( "Resting to give optimisers a chance..." ); try { Thread.sleep( REST_PERIOD_IN_MILLIS ); } catch ( InterruptedException e ) { out.println( "awakened prematurely" ); } // by now, optimisations should be complete. trial( platform, hardwarePlatform, softwarePlatform, "Warm" ); // display the HTML summary on the console, // not to a file to simplify use without configuring. // You can redirect item to files with append etc. as you see fit. err.println( html ); out.println( "all done" ); } /** * This is how Sun's Long.signum is implemented. * * @param diff usually represents the difference of two long. * * @return signum of diff, +1, 0 or -1. * @author Lee Boynton of Sun */ public static int sunSignum( long diff ) { return ( int ) ( ( diff >> 63 ) | ( -diff >>> 63 ) ); } /** * nested static class for results from timing one candidate algorithm */ static final class BenchmarkMeasurement implements Comparable { /** * name of algorithm */ final String contender; /** * raw execution elapsedTime in nanoseconds, unadjusted for overheadInNanos */ final long elapsedTime; /** * constructor * * @param elapsedTime nanoseconds this benchmark took, raw elapsedTime, no overheadInNanos adjustment. * @param contender name of the candidate algorithm */ BenchmarkMeasurement( long elapsedTime, String contender ) { // let user know something is happening out.print( '.' ); this.elapsedTime = elapsedTime; this.contender = contender; } /** * Display a html header row suitable for heading up toHTML one trial. * * @return HTML for one table row header describing 5 values, elapsed, cpu, per call, speed percentage and * contender, with room later for notes. */ public static String toHTMLHeading() { return "" + "\n" // blank col supplied by line above. + "elapsed ns\n" + "cpu ns\n" + "call ns\n" + "SI\n" + "candidate\n" + "notes\n" + "\n"; } /** * prepare a string to act as header for this.toString * * @return headings for five columns, describing one candidate's results for one trial: elapsed, cpu, per call, * speed percentage and contender. */ public static String toStringHeading() { // _______elapsed_ns_:____________cpu_ns_:_call_ns_:_____SI_:_candidate // ----1,932,157,579_:_------932,157,579_:_---4.66_:_-64.37_:_piotr // --------14-------___-----------14----___---7--___---6---___ return "" + " elapsed ns : " + " cpu ns : " + "call ns : " + " SI : " + "candidate"; } /** * Sort by elaspedTime, fastest first. * Defines default the sort order for BenchmarkMeasurement Objects. * Compare this BenchmarkMeasurement with another BenchmarkMeasurement. * Compares elapsedTime. * Informally, returns (this-other) or +ve if this is more positive than other. * * @param other other BenchmarkMeasurement to compare with this one * * @return +ve if this>other, 0 if this==other, -ve if this<other */ public final int compareTo( BenchmarkMeasurement other ) { return Long.signum( this.elapsedTime - other.elapsedTime ); } /** * Get speed as a percent of the current contender. usually 0..100, adjusted for overheadInNanos and * cpuTimeToBeatInNanos. * * @return speed */ public double getSpeedIndex() { // Rate candidate as percent of nominal best performance. // bigger is better. // 100% is the current record holder. return ( cpuTimeToBeatInNanos / ( double ) ( this .elapsedTime - overheadInNanos ) ) * 100.0d; } /** * Display benchmark result in as a HTML table row for one contender in one trial. * * @return HTML for one table row describing 5 values, elapsed, cpu, per call, speed percentage and contender. */ public String toHTML() { // display total elapsed elapsedTime in a fixed width field String elapsedNanosToDisplay = df0.format( this.elapsedTime ); // display total cpu elapsedTime in a fixed width field String cpuNanosToDisplay = df0.format( this.elapsedTime - overheadInNanos ); // display cpu/elapsedTime per signum call in a fixed width field String cpuPerInvocationNanosToDisplay = df2 .format( ( double ) ( this.elapsedTime - overheadInNanos ) / ( double ) SIGNUM_INVOCATIONS ); String speedToDisplay = df2.format( this.getSpeedIndex() ); return "" + "" /* indent one col */ + elapsedNanosToDisplay + "" + cpuNanosToDisplay + "" + cpuPerInvocationNanosToDisplay + "" + speedToDisplay + "" + contender + "\n"/* last col not used */; } /** * Display benchmark result in a human-comprehensible String, for one contender in one trial. * * @return comma decorated String describing elapsed, cpu, per call, speed percentage and contender. Time is * always 17 chars wide and speed is always 7, not counting colons and spaces to separate fields. */ public String toString() { // _______elapsed_ns_:____________cpu_ns_:_call_ns_:_____SI_:_candidate // ----1,932,157,579_:_------932,157,579_:_---4.66_:_-64.37_:_piotr // --------14-------___-----------14----___---7--___---6---___ // display total elapsed elapsedTime in a fixed width field String elapsedNanosToDisplay = df0.format( this.elapsedTime ); elapsedNanosToDisplay = leftPad( elapsedNanosToDisplay, 17, false ); // display total cpu elapsedTime in a fixed width field String cpuNanosToDisplay = df0.format( this.elapsedTime - overheadInNanos ); cpuNanosToDisplay = leftPad( cpuNanosToDisplay, 17, false ); // display cpu/elapsedTime per signum call in a fixed width field String cpuPerInvocationNanosToDisplay = df2 .format( ( double ) ( this.elapsedTime - overheadInNanos ) / ( double ) SIGNUM_INVOCATIONS ); cpuPerInvocationNanosToDisplay = leftPad( cpuPerInvocationNanosToDisplay, 7, false ); String speedToDisplay = df2.format( this.getSpeedIndex() ); speedToDisplay = leftPad( speedToDisplay, 6, false ); return elapsedNanosToDisplay + " : " + cpuNanosToDisplay + " : " + cpuPerInvocationNanosToDisplay + " : " + speedToDisplay + " : " + this .contender; } } }