/*
* [Merge.java]
*
* Summary: Various ways of merging two SortedArrayLists, also deduping.
*
* Copyright: (c) 2003-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.2 2003-10-06
* 1.3 2007-07-29 IntelliJ inspector lint, preparze ANT script,
*/
package com.mindprod.sorted;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import static java.lang.System.*;
/**
* Various ways of merging two SortedArrayLists, also deduping.
*
* All methods are static.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.3 2007-07-29 IntelliJ inspector lint, preparze ANT script,
* @since 2003-08-23
*/
public final class Merge
{
/**
* True if you want the TEST harness code.
*/
private static final boolean DEBUGGING = false;
/**
* Remove any matching exceptions from an array list.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param primary primary SortedArrayList, will be sorted by the comparator.
* @param unwanted List of exception objects which will be removed if they exist in the primary list. Will be
* sorted by the comparator.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
*
* @return primary list with any matching objects in the exception list removed, and sorted in order by the
* comparator.
*/
public static SortedArrayList allBut( SortedArrayList primary,
SortedArrayList unwanted,
Comparator comparator )
{
return genericCombiner( primary,
false,
true,
unwanted,
false,
false,
comparator,
false );
}
/**
* Remove any duplicates. The first object encountered is kept. Subsequent ones are removed.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param withDups SortedArrayList with duplicates. It will be sorted by the comparator.
* @param comparator Compator for comparing two elements in the SortedArrayLists to see if they are duplicates.
*
* @return list with any duplicates removed and sorted in order by the comparator.
*/
@SuppressWarnings( { "SameParameterValue", "WeakerAccess" } )
public static SortedArrayList deDupKeepingFirst( SortedArrayList withDups,
Comparator comparator )
{
assert ( comparator != null ) : "can't have a null comparator.";
withDups.sort( comparator );
int size = withDups.size();
SortedArrayList result = new SortedArrayList<>( size );
result.setComparator( comparator );
if ( size <= 1 )
{
if ( size == 1 )
{
result.add( withDups.get( 0 ) );
}
result.sorted = true;
return result;
}
E prev = withDups.get( 0 );
result.add( prev );
for ( int i = 1; i < size; i++ )
{
E o = withDups.get( i );
if ( comparator.compare( o, prev ) != 0 )
{
result.add( o );
prev = o;
}
}
result.sorted = true;
return result;
}
/**
* Remove any duplicates. The last duplicate object encountered is kept. previous duplicates are removed. *
*
* @param generic type of the SortedArrayLists, e.g. String
* @param withDups SortedArrayList with duplicates. It will be sorted by the comparator.
* @param comparator Comparator for comparing two elements in the SortedArrayLists to see if they are duplicates.
* must not be null.
*
* @return list with any duplicates removed and sorted in order by the comparator.
*/
public static SortedArrayList deDupKeepingLast( SortedArrayList withDups,
Comparator comparator )
{
assert ( comparator != null ) : "can't have a null comparator.";
withDups.sort( comparator );
int size = withDups.size();
SortedArrayList result = new SortedArrayList<>( size );
result.setComparator( comparator );
if ( size <= 1 )
{
if ( size == 1 )
{
result.add( withDups.get( 0 ) );
}
result.sorted = true;
return result;
}
E prev = withDups.get( 0 );
for ( int i = 1; i < size; i++ )
{
E o = withDups.get( i );
if ( comparator.compare( o, prev ) != 0 )
{
result.add( prev );
}
prev = o;
}
result.add( prev );
result.sorted = true;
return result;
}
/**
* Combine two SortedArrayLists matching using the supplied comparator. You decide precisely what you want included
* in the result. If there are duplicates in primary and secondary they match correspondingly with the excess of
* eitther considered as non-matching. pri sec matched=m u m u=unmatched possibilities include - F F F F --
* empty result
- F F F T -- allBut with primary and secondary function reversed.
- F F T F --
* intersection ( matches, taking secondary )
- F F T T -- secondary only
- F T F F -- allBut,
* pairwise=false
- F T F T -- differences
- F T T F -- replace
- F T T T -- update
* - T F F F -- intersection ( matches, taking primary ), pairwise = false. onlyif, justLegal (considering
* secondary a list of legal possibilities)
- T F F T -- replace with primary and secondary function
* reversed.
- T F T F -- matches (both primary and secondary included )
- T F T T -- secondaries +
* matching primaries
- T T F F -- primary only
- T T F T -- update with primary and secondary
* functions reversed.
- T T T F -- primaries + matching secondaries
- T T T T -- union, combine
* everything
/ul> * @param generic type of the SortedArrayLists, e.g. String
*
* @param primary primary SortedArrayList, will be sorted by the comparator.
* @param includeMatchedPrimary true if you want elements from the primary with a match in the secondary
* included in the result.
* @param includeUnmatchedPrimary true if you want elements from the primary without a match in the secondary
* included in the result.
* @param secondary SortedArrayList of updates to the primary list. Will be sorted by the
* comparator.
* @param includeMatchedSecondary true if you want elements from the secondary with a match in the primary
* included in the result. If you say true to both includeMatchedPrimaries and
* includeMatchedSecondaries you will get two copies of that matched element.
* @param includeUnmatchedSecondary true if you want elements from the secondary without a match in the primary
* included in the result.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
* @param pairwise true if duplicate primaries must have a duplicate secondary to match. false if
* duplicate primaries repeatedly match the first duplicate secondary, and
* duplicate secondaries are considered as unmatching.
*
* @return New SortedArrayList with elements combined from primary and secondary.
*/
@SuppressWarnings( { "WeakerAccess", "MethodParameterNamingConvention" } )
public static SortedArrayList genericCombiner( SortedArrayList primary,
boolean includeMatchedPrimary,
boolean includeUnmatchedPrimary,
SortedArrayList secondary,
boolean includeMatchedSecondary,
boolean includeUnmatchedSecondary,
Comparator super E> comparator,
boolean pairwise )
{
assert ( comparator != null ) : "can't have a null comparator.";
// put in same order so can find matches easily.
primary.sort( comparator );
secondary.sort( comparator );
SortedArrayList result;
int primarySize = primary.size();
if ( primarySize == 0 )
{
if ( includeUnmatchedSecondary )
{
return secondary.clone();
}
else
{
result = new SortedArrayList<>( 0 );
result.setComparator( comparator );
result.sorted = true;
return result;
}
}
int secondarySize = secondary.size();
if ( secondarySize == 0 )
{
if ( includeUnmatchedPrimary )
{
return primary.clone();
}
else
{
result = new SortedArrayList<>( 0 );
result.setComparator( comparator );
result.sorted = true;
return result;
}
}
result = new SortedArrayList<>( primarySize + secondarySize );
result.setComparator( comparator );
int i = 0, j = 0;
while ( i < primarySize && j < secondarySize )
{
E p = primary.get( i );
E s = secondary.get( j );
int diff = comparator.compare( p, s );
if ( diff < 0 )
{
if ( includeUnmatchedPrimary )
{
result.add( p );
}
i++;
}
else if ( diff == 0 )
{
// we have a match,
if ( includeMatchedPrimary )
{
result.add( p );
}
if ( includeMatchedSecondary )
{
result.add( s );
}
i++;
if ( pairwise )
{
j++;
}
}
else
/* ( diff > 0 ) */
{
// merge in unmatched secondary.
if ( includeUnmatchedSecondary )
{
result.add( s );
}
j++;
}
} // end while
if ( includeUnmatchedPrimary )
{
// process any remaining primary
for ( ; i < primarySize; i++ )
{
result.add( primary.get( i ) );
}
}
if ( includeUnmatchedSecondary )
{
// process any remaining secondary
for ( ; j < secondarySize; j++ )
{
result.add( secondary.get( j ) );
}
}
result.sorted = true;
return result;
}
/**
* only primaries that have a match in the list of legal possibilities.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param primary primary SortedArrayList, will be sorted by the comparator.
* @param secondary List of objects which are are legal matches. Will be sorted by the comparator.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
*
* @return primary list with any elements without matches is the legal list removed.
*/
public static SortedArrayList intersection( SortedArrayList primary,
SortedArrayList secondary,
Comparator super E> comparator )
{
return genericCombiner( primary,
true,
false,
secondary,
false,
false,
comparator,
false );
}
/**
* TEST harness
*
* @param args not used
*/
public static void main( String[] args )
{
if ( DEBUGGING )
{
// TEST allBut
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "peach" );
primary.add( "strawberry" );
primary.add( "lemon" );
primary.add( "apricot" );
primary.add( "pear" );
primary.add( "grapefruit" );
SortedArrayList unwanted = new SortedArrayList<>();
unwanted.add( "GRAPEFRUIT" );
unwanted.add( "lemon" );
SortedArrayList r =
allBut( primary, unwanted, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "allBut --------------" );
//noinspection ForLoopReplaceableByForEach
for ( int i = 0; i < size; i++ )
{
out.println( r.get( i ) );
}
}
// TEST deDupKeepingFirst
{
SortedArrayList dups = new SortedArrayList<>();
dups.add( "APPLE" );
dups.add( "apple" );
dups.add( "apple" );
dups.add( "peach" );
dups.add( "strawberry" );
dups.add( "apricot" );
dups.add( "STRAWBERRY" );
dups.add( "grapefruit" );
SortedArrayList r =
deDupKeepingFirst( dups, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "deDupKeepingFirst --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST deDupKeepingLast
{
SortedArrayList dups = new SortedArrayList<>();
dups.add( "APPLE" );
dups.add( "apple" );
dups.add( "apple" );
dups.add( "peach" );
dups.add( "strawberry" );
dups.add( "apricot" );
dups.add( "STRAWBERRY" );
dups.add( "grapefruit" );
SortedArrayList r =
deDupKeepingLast( dups, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "deDupKeepingLast --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST intersection
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apple" );
primary.add( "APPLE" );
primary.add( "apricot" );
primary.add( "grapefruit" );
primary.add( "peach" );
primary.add( "strawberry" );
primary.add( "STRAWBERRY" );
SortedArrayList legal = new SortedArrayList<>();
legal.add( "aPPLE" );
legal.add( "apPLE" );
legal.add( "ugli fruit" );
SortedArrayList r =
intersection( primary,
legal,
String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "intersection --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST pruneNulls
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "GRAPEFRUIT" );
primary.add( "grapefruit" );
primary.add( "lemon" );
primary.add( "lemon" );
primary.add( "peach" );
primary.add( "pear" );
primary.add( "strawberry" );
primary.set( 2, null );
primary.set( 6, null );
SortedArrayList r = pruneNulls( primary );
int size = r.size();
out.println( "pruneNulls --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST range
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "apricot" );
primary.add( "GRAPEFRUIT" );
primary.add( "grapefruit" );
primary.add( "lemon" );
primary.add( "lemon" );
primary.add( "peach" );
primary.add( "pear" );
primary.add( "strawberry" );
SortedArrayList r =
range( primary,
String.CASE_INSENSITIVE_ORDER,
"apricot",
"lemon" );
int size = r.size();
out.println( "range --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST replace
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apple" );
primary.add( "APPLE" );
primary.add( "apricot" );
primary.add( "grapefruit" );
primary.add( "peach" );
primary.add( "strawberry" );
primary.add( "STRAWBERRY" );
SortedArrayList update = new SortedArrayList<>();
update.add( "aPPLE" );
update.add( "apPLE" );
update.add( "ugli fruit" );
SortedArrayList r =
replace( primary, update, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "replace --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST union
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apple" );
primary.add( "peach" );
primary.add( "strawberry" );
primary.add( "APPLE" );
primary.add( "apricot" );
primary.add( "STRAWBERRY" );
primary.add( "grapefruit" );
SortedArrayList secondary = new SortedArrayList<>();
secondary.add( "kiwi" );
secondary.add( "apple" );
secondary.add( "ugli fruit" );
SortedArrayList r =
union( primary, secondary, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "union --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
// TEST update
{
SortedArrayList primary = new SortedArrayList<>();
primary.add( "apple" );
primary.add( "apple" );
primary.add( "APPLE" );
primary.add( "apricot" );
primary.add( "grapefruit" );
primary.add( "peach" );
primary.add( "strawberry" );
primary.add( "STRAWBERRY" );
SortedArrayList update = new SortedArrayList<>();
update.add( "aPPLE" );
update.add( "apPLE" );
update.add( "ugli fruit" );
SortedArrayList r =
update( primary, update, String.CASE_INSENSITIVE_ORDER );
int size = r.size();
out.println( "update --------------" );
for ( String aR : r )
{
out.println( aR );
}
}
} // end if DEBUG
} // end main
/**
* Prune out any null elements from the array. Array will not be automatically sorted.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param a SortedArrayList to be pruned of null elements.
*
* @return SortedArrayList with null elements removed.
*/
public static SortedArrayList pruneNulls( SortedArrayList a )
{
// We don't bother with two passes to get a perfectly accurate size
// estimate.
// Sort order does not matter.
int size = a.size();
SortedArrayList result = new SortedArrayList<>( size );
result.setComparator( a.getComparator() );
for ( E elt : a )
{
if ( elt != null )
{
result.add( elt );
}
}
result.sorted = a.sorted;
return result;
}
/**
* Prune out any null elements from the array.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param a ArrayList to be pruned of null elements.
*
* @return ArrayList with null elements removed.
*/
public static ArrayList pruneNulls( ArrayList a )
{
// we don't bother with two passes to get a perfectly accurate size
// estimate.
// Sort order does not matter.
int size = a.size();
ArrayList result = new ArrayList<>( size );
//noinspection ForLoopReplaceableByForEach
for ( int i = 0; i < size; i++ )
{
E elt = a.get( i );
if ( elt != null )
{
result.add( elt );
}
}
return result;
}
/**
* Extract a range of an SortedArrayList Finds end points are provided as indices. Does no sort first
*
* @param generic type of the SortedArrayLists, e.g. String
* @param a SortedArrayList to extract the range from.
* @param from lowest index to return. Must exist.
* @param to Highest index to return. Must exist.
*
* @return subrange of the array from through to.
*/
@SuppressWarnings( { "WeakerAccess" } )
public static SortedArrayList range( SortedArrayList a,
int from,
int to )
{
SortedArrayList result;
if ( from <= to )
{
int resultSize = to - from + 1;
result = new SortedArrayList<>( resultSize );
for ( int i = from; i <= to; i++ )
{
result.add( a.get( i ) );
}
}
else
{
// return an empty list
result = new SortedArrayList<>( 0 );
}
result.setComparator( a.getComparator() );
result.sorted = a.sorted;
return result;
}
/**
* Extract a range of an SortedArrayList Finds end points of the range by binary search of the elements.
* Automatically sorts first if necessary.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param a SortedArrayList to extract the range from.
* @param comparator comparator Comparator for comparing two elements in the SortedArrayLists
* @param from lowest object to return. Need not exactually exist.
* @param to Highest object to return, need not actually exist.
*
* @return subrange of the array starting with first element matching from, and ending with last element matching
* to.
*/
@SuppressWarnings( { "SameParameterValue", "WeakerAccess" } )
public static SortedArrayList range( SortedArrayList a,
Comparator super E> comparator,
E from,
E to )
{
assert ( comparator != null ) : "can't have a null comparator.";
a.sort( comparator );
int size = a.size();
// find low element.
int fromIndex = Collections.binarySearch( a, from, comparator );
if ( fromIndex < 0 )
{
fromIndex = -( fromIndex + 1 );
}
else
{
// if there are dups, no guarantee we hit the first one.
// Back up till hit something
// too small
for ( int i = fromIndex - 1; i >= 0; i-- )
{
if ( comparator.compare( a.get( i ), from ) < 0 )
{
// we went too far.
break;
}
else
{
fromIndex = i;
}
} // end for
} // end else
// find high element.
int toIndex = Collections.binarySearch( a, to, comparator );
if ( toIndex < 0 )
{
toIndex = -( toIndex + 1 ) - 1;
}
else
{
// if there are dups, no guarantee we hit the first one.
// look ahead till hit something
// too big
for ( int i = toIndex + 1; i < size; i++ )
{
if ( comparator.compare( a.get( i ), to ) > 0 )
{
// we went too far.
break;
}
else
{
toIndex = i;
}
} // end for
} // end else
return range( a, fromIndex, toIndex );
}
/**
* Update a primary list with list of updates. Updates with a match will replace the corresponding primary. Updates
* without a match will be discarded. Primaries without a match are included. Similar to update.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param primary primary SortedArrayList, will be sorted by the comparator.
* @param update array list of changed to the primary list.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
*
* @return primary list with any matching objects replaced. If there are duplicates in the primary and update file,
* they will match pairwise. Updates without a match will be discarded.
*/
public static SortedArrayList replace( SortedArrayList primary,
SortedArrayList update,
Comparator super E> comparator )
{
return genericCombiner( primary,
false,
true,
update,
true,
false,
comparator,
true );
}
/**
* Combine two SortedArrayLists into one. All elements are included from both lists.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param primary main SortedArrayList, will be sorted by the comparator.
* @param secondary extra SortedArrayList, will be sorted by the comparator.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
*
* @return primary list with all the elements sorted in order by the comparator. Where there are duplicates, the
* primaries will come first.
*/
public static SortedArrayList union( SortedArrayList primary,
SortedArrayList secondary,
Comparator super E> comparator )
{
return genericCombiner( primary,
true,
true,
secondary,
true,
true,
comparator,
true );
}
/**
* Update a primary list with list of updates. Updates with a match will replace the corresponding primary. Updates
* without a match will be included. Primaries without a match are included. Similar to replace.
*
* @param generic type of the SortedArrayLists, e.g. String
* @param primary primary SortedArrayList, will be sorted by the comparator.
* @param update array list of changed to the primary list.
* @param comparator Comparator for comparing two elements in the SortedArrayLists.
*
* @return primary list with any matching objects replaced. If there are duplicates in the primary and update file,
* they will match pairwise. Updates without a match will be inserted.
*/
@SuppressWarnings( { "SameParameterValue", "WeakerAccess" } )
public static SortedArrayList update( SortedArrayList primary,
SortedArrayList update,
Comparator super E> comparator )
{
return genericCombiner( primary,
false,
true,
update,
true,
true,
comparator,
true );
}
} // end Merge