#!/bin/sh -e

runs=8
top=`pwd`

BENCHCLANG="clang++ -std=c++17 -O3 -march=native"
BENCHGCC="g++ -O3 -march=native"
# these are for compiling bench-* including any code in library headers
# while each library makes its own selection of compiler for its precompiled code


# =====

X=djbsort
url=https://sorting.cr.yp.to
version="installed version = $(djbsort-speed | head -1)"
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cd $X
  # assume djbsort installed already, so nothing to build here
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -ldjbsort -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=radixwrapper
url='based on https://cr.yp.to/2026/20260202-sort.c'
version='version included in sortbench'
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cp $X.h $X/$X.h
  cp $X.c $X/$X.cc
  cd $X

  $BENCHCLANG -c $X.cc
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    $X.o -ldjbsort -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=aspas
url=https://github.com/vtsynergy/aspas_sort
version=738ec7a1c051f6005ecb0cfec159eff9387b5f9d # 2018-02-17, latest as of 2026-01-22
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I include -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=far
url=https://github.com/simd-sorting/fast-and-robust
version=054f2e2e9f7c00be4dc8d69567f92ddf9832a8f3 # 2021-08-20, latest as of 2026-01-22
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    # seems noticeably faster with gcc than with clang
    $BENCHGCC \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I avx2_sort_demo -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-far.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=herf
url='based on https://stereopsis.com/radix.html but simplified for int32'
version='version included in sortbench'
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cp $X.h $X/$X.h
  cp $X.c $X/$X.cc
  cd $X
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    $X.cc -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=sid1607
url=https://github.com/sid1607/avx2-merge-sort
version=f1c4e0f5f0e28cde5d3cdf16ebf0ce745201366a # 2016-12-12, latest as of 2026-02-08
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  # same flags as in Makefile
  # (not calling make here since that produces a test program without merge_sort.o)
  g++ -m64 -std=c++0x -O3 -Wall -mavx2 -c merge_sort.cpp

  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    merge_sort.o -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=vxsort
url=https://github.com/damageboy/vxsort-cpp
version=2c7f79ba539a5c1ad2acf1c2e3c0fa828da25635 # 2023-05-10, latest as of 2026-01-22
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  sed -i 's/^cmake_minimum_required(.*)$/cmake_minimum_required(VERSION 3.15)/' CMakeLists.txt
  sed -i 's/include(ConfigGBench)/# include(ConfigGBench)/' CMakeLists.txt
  sed -i 's/include(ConfigGTest)/# include(ConfigGTest)/' CMakeLists.txt
  sed -i 's!add_subdirectory(${PROJECT_SOURCE_DIR}/bench/)!# add_subdirectory(${PROJECT_SOURCE_DIR}/bench/)!' CMakeLists.txt
  sed -i 's!add_subdirectory(${PROJECT_SOURCE_DIR}/tests/)!# add_subdirectory(${PROJECT_SOURCE_DIR}/tests/)!' CMakeLists.txt

  ( mkdir build-release
    cd build-release
    cmake ..
    make -j 4
  )

  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG -std=c++17 \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I vxsort \
    -I vxsort/vector_machine \
    -I build-release/_deps/cpu_features-src/include \
    -L build-release/vxsort \
    -L build-release/_deps/cpu_features-build \
    -lvxsort \
    -lcpu_features -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=x86simdsort
url=https://github.com/intel/x86-simd-sort.git
version=6a7a01da4b0dfde108aa626a2364c954e2c50fe1 # 2025-09-11, latest as of 2026-01-22
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  ( # many -Wmaybe-uninitialized errors with current compiler
    # so save output to a file
    exec > mesonlog 2>&1

    meson setup --buildtype release builddir && cd builddir
    meson compile
  )

  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHGCC -std=c++20 \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I lib -L builddir -L builddir/lib \
    -lx86simdsortcpp -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      env LD_LIBRARY_PATH=$top/$X/builddir${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH \
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=vqsort
url=https://github.com/google/highway
version=271a9a0ed9de1232d9117f1572c3fe28f8542ec1 # 2026-01-20
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X

  ( git checkout $version
    mkdir -p build && cd build
    cmake -DBUILD_TESTING=OFF -DHWY_ENABLE_TESTS=OFF -DHWY_TEST_STANDALONE=ON ..
    make -j 4
  )

  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I . -I build/googletest-src/googletest/include -L build -L build/lib \
    -lhwy_contrib -lhwy -pthread -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=stdsort
url='std::sort'
version='installed version'
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cd $X
  # will use whichever version comes with OS, so nothing to build here
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

echo '[1;34m===== create plot32.pdf, plot64.pdf [0m'

cd $top
for bits in 32 64
do
  ./plot $bits \
  stdsort black \
  herf slategray \
  aspas darkcyan \
  sid1607 black \
  vqsort red \
  vxsort darkviolet \
  x86simdsort darkgreen \
  far sienna \
  djbsort blue \
  radixwrapper orangered
done
