#!/bin/sh

# This script if executed as follows:
#
#              ./histRocFilter.sh s b  
#
#  expects an input stream like this:
#
#              s 34.2
#              b 44.7
#              b 32.2
#              s  2.1
#              b 82.2
#
# where the labels can be arbitrary so long as there are two types
# and the script then outputs co-ords for a ROC curve in the form of (x,y) pairs, y being the signal efficiency, x being the background efficiency. 
# In the example above, "s" represents signal, and "b" background.
# It is assumed that high values of x are more signal like, and that a selection cut is therefore high-sided.

if [ "$#" -lt 2 ]; then
  cat << EOF

Usage:

rocFilter.sh  signalLabel backgroundLabel

EOF
exit 2 
fi

export s=$1
export b=$2
export DEFAULT_TOL=0
export tol=${3:-DEFAULT_TOL}

export af=`mktemp`

# Record the input stream

# Regularise the inputs to one label,value per line, and then sort by x values
awk 'BEGIN{j=0} {for (i=1; i<=NF; ++i) {j=1-j; if (j!=0) { printf $i " "} else {print $i}}}' | sort -r -n -k 2 > $af

#grep "^$s " $af | sort -n -k 2 > $sf
#grep "^$b " $af | sort -n -k 2 > $bf

export ns=`grep -c "^$s " < $af`
export nb=`grep -c "^$b " < $af`
awk 'BEGIN{first=(1==1);s="'$s'"; b="'$b'"; ns='$ns'; nb='$nb';ts=0;tb=0} 

function out() {
    x=tb/nb; y=ts/ns;
    if (first || '$tol'==0 || '$tol'>0 && (x-lastx)*(x-lastx)+(y-lasty)*(y-lasty)<'$tol'*'$tol') {
      print x,y
      first=(1==0)
    } else {
      steps=100
      for (i=0;i<steps;++i) {
        print (x*(steps-i)+lastx*i)/steps, (y*(steps-i)+lasty*i)/steps
      }
    }
    lastx=x;lasty=y
}

BEGIN { out(); }

{
    if ($1==s) {
        ++ts

    } else {
        ++tb

    }

    out()

}' < $af

rm -f $af
