Intro
This time, I present the ObjectBucketSort, a stable sorting algorithm for objects.
Code
io.github.coderodde.util.ObjectBucketSort.java:
package io.github.coderodde.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.RandomAccess;
/**
* This class contains the implementations of object bucket sort algorithm.
*/
public class ObjectBucketSort {
private ObjectBucketSort() {
}
/**
* Sorts the input list containing {@link Comparable} objects.
*
* @param <T> the element type.
* @param list the list to sort.
*/
public static <T extends Comparable<? super T>>
void sort(List<T> list) {
if (!(list instanceof RandomAccess)) {
// Use an ArrayList as a proxy:
List<T> aux = new ArrayList<>(list);
sort(aux);
list.clear();
list.addAll(aux);
return;
}
// Maps a bucket representative to the actual bucket:
Map<T, List<T>> m = new HashMap<>();
// Build the buckets:
for (T elem : list) {
m.computeIfAbsent(elem, (k) -> new ArrayList<>()).add(elem);
}
// Sort the keys:
List<T> keys = new ArrayList<>(m.keySet());
Collections.sort(keys);
list.clear(); // Does not shrink the array list.
// Traverse buckets in sorted key order:
for (T key : keys) {
list.addAll(m.get(key));
}
}
/**
* Sorts the input list using an explicit comparator.
*
* @param <T> the element type.
* @param list the list to sort.
* @param cmp the comparator object.
*/
public static <T> void sort(List<T> list, Comparator<? super T> cmp) {
if (!(list instanceof RandomAccess)) {
// Use an ArrayList as a proxy:
List<T> aux = new ArrayList<>(list);
sort(aux, cmp);
list.clear();
list.addAll(aux);
return;
}
// Maps a bucket representative to the actual bucket:
Map<T, List<T>> m = new HashMap<>();
// Build the buckets:
for (T elem : list) {
m.computeIfAbsent(elem, (k) -> new ArrayList<>()).add(elem);
}
// Sort the keys:
List<T> keys = new ArrayList<>(m.keySet());
Collections.sort(keys, cmp);
list.clear(); // Does not shrink the array list.
// Traverse buckets in sorted key order:
for (T key : keys) {
list.addAll(m.get(key));
}
}
}
io.github.coderodde.util.ObjectBucketSortTest.java:
package io.github.coderodde.util;
import java.util.LinkedList;
import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;
public class ObjectBucketSortTest {
@Test
public void sortLinkedList() {
List<Integer> list = new LinkedList<>();
list.addAll(List.of(3, 1, 5, 4, 2));
ObjectBucketSort.sort(list);
for (int i = 0, key = 1; i < list.size(); ++i, ++key) {
assertEquals(Integer.valueOf(key), list.get(i));
}
}
}
io.github.coderodde.util.demo.BenchmarkRunner.java:
package io.github.coderodde.util.demo;
import java.util.ArrayList;
import java.util.List;
/**
* This class implements a benchmark runner.
*/
public class BenchmarkRunner implements Runnable {
@FunctionalInterface
public interface Sorter {
void sort(List<Integer> list);
}
public static final int NUMBER_OF_LISTS = 5;
public static final int LIST_LENGTH = 1_000_000;
private final Sorter sorter;
private final List<Integer>[] lists = new List[NUMBER_OF_LISTS];
private final String name;
private int listsAdded = 0;
private int listsBenchmarked = 0;
public BenchmarkRunner(Sorter sorter, String name) {
this.sorter = sorter;
this.name = name;
}
public String getSorterName() {
return name;
}
public void addList(List<Integer> list) {
if (listsAdded >= lists.length) {
throw new IllegalStateException("Cannot accommodate more lists.");
}
lists[listsAdded++] = new ArrayList<>(list);
}
public int getNumberOfIterations() {
return lists.length;
}
@Override
public void run() {
if (listsBenchmarked >= lists.length) {
throw new IllegalStateException("Benchmark exhausted.");
}
sorter.sort(lists[listsBenchmarked++]);
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (o == null) {
return false;
}
if (!getClass().equals(o.getClass())) {
return false;
}
BenchmarkRunner other = (BenchmarkRunner) o;
if (lists.length != other.lists.length) {
return false;
}
if (listsBenchmarked != other.listsBenchmarked) {
return false;
}
for (int i = 0; i < lists.length; ++i) {
if (!lists[i].equals(other.lists[i])) {
return false;
}
}
return true;
}
}
io.github.coderodde.util.demo.BenchmarkRunnerProvider.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.util.demo.BenchmarkRunner.Sorter;
/**
* This interface defines the API for benchmark runner provision.
*/
public abstract class BenchmarkRunnerProvider {
protected final Sorter sorter;
protected final String sorterName;
/**
* Constructs this class.
*
* @param sorter the actual sorting algorithm.
* @param sorterName the name of the sorting algorithm.
*/
public BenchmarkRunnerProvider(Sorter sorter, String sorterName) {
this.sorter = sorter;
this.sorterName = sorterName;
}
/**
* Creates the benchmark runner.
*
* @return benchmark provider.
*/
public abstract BenchmarkRunner provide();
}
io.github.coderodde.util.demo.AscendingBenchmarkProvider.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.util.demo.BenchmarkRunner.Sorter;
import java.util.ArrayList;
import java.util.List;
/**
* This class provides the sparse data benchmark runner.
*/
public class AscendingBenchmarkProvider extends BenchmarkRunnerProvider {
public AscendingBenchmarkProvider(Sorter sorter,
String sorterName) {
super(sorter, sorterName);
}
@Override
public BenchmarkRunner provide() {
BenchmarkRunner br =
new BenchmarkRunner(
(lst) -> sorter.sort(lst), sorterName);
for (int i = 0; i < BenchmarkRunner.NUMBER_OF_LISTS; ++i) {
br.addList(createRandomList());
}
return br;
}
private List<Integer> createRandomList() {
List<Integer> list = new ArrayList<>(BenchmarkRunner.LIST_LENGTH);
for (int i = 0; i < BenchmarkRunner.LIST_LENGTH; ++i) {
list.add(i);
}
return list;
}
}
io.github.coderodde.util.demo.DescendingBenchmarkProvider.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.util.demo.BenchmarkRunner.Sorter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* This class provides the sparse data benchmark runner.
*/
public class DescendingBenchmarkProvider extends BenchmarkRunnerProvider {
public DescendingBenchmarkProvider(Sorter sorter, String sorterName) {
super(sorter, sorterName);
}
@Override
public BenchmarkRunner provide() {
BenchmarkRunner br = new BenchmarkRunner((lst) -> sorter.sort(lst),
sorterName);
for (int i = 0; i < BenchmarkRunner.NUMBER_OF_LISTS; ++i) {
br.addList(createRandomList());
}
return br;
}
private List<Integer> createRandomList() {
List<Integer> list = new ArrayList<>(BenchmarkRunner.LIST_LENGTH);
for (int i = 0; i < BenchmarkRunner.LIST_LENGTH; ++i) {
list.add(i);
}
Collections.reverse(list);
return list;
}
}
io.github.coderodde.util.demo.DenseBenchmarkProvider.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.util.demo.BenchmarkRunner.Sorter;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* This class provides the dense data benchmark runner.
*/
public class DenseBenchmarkProvider extends BenchmarkRunnerProvider {
private static final int KEY_UPPER_BOUND = 1_000;
private final Random random;
public DenseBenchmarkProvider(Sorter sorter,
String sorterName,
Random random) {
super(sorter, sorterName);
this.random = random;
}
@Override
public BenchmarkRunner provide() {
BenchmarkRunner br = new BenchmarkRunner((lst) -> sorter.sort(lst),
sorterName);
for (int i = 0; i < BenchmarkRunner.NUMBER_OF_LISTS; ++i) {
br.addList(createRandomList());
}
return br;
}
private List<Integer> createRandomList() {
List<Integer> list = new ArrayList<>(BenchmarkRunner.LIST_LENGTH);
for (int i = 0; i < BenchmarkRunner.LIST_LENGTH; ++i) {
list.add(random.nextInt(KEY_UPPER_BOUND));
}
return list;
}
}
io.github.coderodde.util.demo.SparseBenchmarkProvider.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.util.demo.BenchmarkRunner.Sorter;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* This class provides the sparse data benchmark runner.
*/
public class SparseBenchmarkProvider extends BenchmarkRunnerProvider {
private final Random random;
public SparseBenchmarkProvider(Sorter sorter,
String sorterName,
Random random) {
super(sorter, sorterName);
this.random = random;
}
@Override
public BenchmarkRunner provide() {
BenchmarkRunner br = new BenchmarkRunner((lst) -> sorter.sort(lst),
sorterName);
for (int i = 0; i < BenchmarkRunner.NUMBER_OF_LISTS; ++i) {
br.addList(createRandomList());
}
return br;
}
private List<Integer> createRandomList() {
List<Integer> list = new ArrayList<>(BenchmarkRunner.LIST_LENGTH);
for (int i = 0; i < BenchmarkRunner.LIST_LENGTH; ++i) {
list.add(random.nextInt());
}
return list;
}
}
io.github.coderodde.util.demo.Demo.java:
package io.github.coderodde.util.demo;
import io.github.coderodde.statistics.run.RunStatistics;
import io.github.coderodde.statistics.run.Runner;
import io.github.coderodde.util.ObjectBucketSort;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
public class Demo {
public static void main(String[] args) {
BenchmarkRunner.Sorter sort1 = (lst -> Collections.sort(lst));
BenchmarkRunner.Sorter sort2 = (lst -> ObjectBucketSort.sort(lst));
long seed = System.currentTimeMillis();
Random rnd1 = new Random(seed);
Random rnd2 = new Random(seed);
final String str1 = "Arrays.sort";
final String str2 = "OCS.sort";
BenchmarkRunnerProvider brp1;
BenchmarkRunnerProvider brp2;
BenchmarkRunnerProvider brp3;
BenchmarkRunnerProvider brp4;
BenchmarkRunnerProvider brp5;
BenchmarkRunnerProvider brp6;
BenchmarkRunnerProvider brp7;
BenchmarkRunnerProvider brp8;
brp1 = new SparseBenchmarkProvider(sort1, str1, rnd1);
brp2 = new SparseBenchmarkProvider(sort2, str2, rnd2);
brp3 = new DenseBenchmarkProvider(sort1, str1, rnd1);
brp4 = new DenseBenchmarkProvider(sort2, str2, rnd2);
brp5 = new AscendingBenchmarkProvider(sort1, str1);
brp6 = new AscendingBenchmarkProvider(sort2, str2);
brp7 = new DescendingBenchmarkProvider(sort1, str1);
brp8 = new DescendingBenchmarkProvider(sort2, str2);
List<BenchmarkRunner> benchmarkRunners = new ArrayList<>(8);
for (BenchmarkRunnerProvider brp :
new BenchmarkRunnerProvider[]{ brp1, brp2, brp3, brp4,
brp5, brp6, brp7, brp8 }) {
BenchmarkRunner br = brp.provide();
RunStatistics rs = Runner.measure(br,
br.getNumberOfIterations());
benchmarkRunners.add(br);
System.out.printf(
"[%s %s]\n",
br.getSorterName(),
brp.getClass().getSimpleName());
System.out.println(rs);
System.out.println();
}
System.out.println("Algorithms agree: " + eq(benchmarkRunners));
}
private static <T> boolean eq(List<BenchmarkRunner> benchmarkRunners) {
BenchmarkRunner br1 = benchmarkRunners.get(0);
BenchmarkRunner br2 = benchmarkRunners.get(1);
BenchmarkRunner br3 = benchmarkRunners.get(2);
BenchmarkRunner br4 = benchmarkRunners.get(3);
BenchmarkRunner br5 = benchmarkRunners.get(4);
BenchmarkRunner br6 = benchmarkRunners.get(5);
BenchmarkRunner br7 = benchmarkRunners.get(6);
BenchmarkRunner br8 = benchmarkRunners.get(7);
return br1.equals(br2) &&
br3.equals(br4) &&
br5.equals(br6) &&
br6.equals(br7) &&
br7.equals(br8);
}
}
Typical output
[Arrays.sort SparseBenchmarkProvider]
min = 531,566,200 ns, max = 672,968,100 ns, mean = 577918600 ns, median = 550253900 ns, sd = 52843165.593 ns
[OCS.sort SparseBenchmarkProvider]
min = 1,572,636,900 ns, max = 1,821,448,400 ns, mean = 1702789700 ns, median = 1734070900 ns, sd = 101202366.774 ns
[Arrays.sort DenseBenchmarkProvider]
min = 355,720,500 ns, max = 360,599,600 ns, mean = 358244400 ns, median = 358792700 ns, sd = 1740666.223 ns
[OCS.sort DenseBenchmarkProvider]
min = 46,216,200 ns, max = 49,644,200 ns, mean = 47113560 ns, median = 46492300 ns, sd = 1278291.314 ns
[Arrays.sort AscendingBenchmarkProvider]
min = 3,755,000 ns, max = 23,740,000 ns, mean = 7857020 ns, median = 3832900 ns, sd = 7942575.863 ns
[OCS.sort AscendingBenchmarkProvider]
min = 229,201,100 ns, max = 589,209,000 ns, mean = 484994140 ns, median = 534701000 ns, sd = 129834965.528 ns
[Arrays.sort DescendingBenchmarkProvider]
min = 12,214,200 ns, max = 19,883,200 ns, mean = 14614880 ns, median = 12547000 ns, sd = 3036998.386 ns
[OCS.sort DescendingBenchmarkProvider]
min = 256,077,100 ns, max = 519,751,000 ns, mean = 404646240 ns, median = 401456000 ns, sd = 90362445.816 ns
Algorithms agree: true
While inferior in general, it out-performs the Arrays.sort on dense data (\$k << n\$), where the running time complexity of my object bucket sort is, in fact, \$\Theta(n + k \log k)\$, where \$n\$ is the size of the input list \$(x_0, x_1, \ldots, x_{n - 1})\$ and \$k = \vert \{ x_0, x_1, \ldots, x_{n - 1} \} \vert\$ is the number of distinct keys.
Critique request
As always, I am eager to receive any constructive commentary.
Outro
You will need this for compiling and running.