#!/usr/bin/perl -w
# p2n Testbed
# p2n input is "phonemes" on argv
# p2n output is one line to stdout

if ( @ARGV < 2 ) {
  print "** Phoneme to Number Tester **\n";
  print "usage: p2nT worker file(s)\n";
  print "  worker is program to be tested\n";
  print "  . input is .phn file on standard in\n";
  print "  . output is one recognized word per line\n";
  print "  file(s) is a list of test files\n";
  print "example: p2nT p2n ~don/corpus/*\n";
  exit;
}

$worker = shift @ARGV; # the student's program

# foreach file mentioned on ARGV do the conversion

# here is a list of numbers that we must be able to recognize:
foreach $word qw(
  and dash double eight eighteen eighth eighty eleven eleventh
  fifteen fifth fifty five forty four fourteen fourth hundred is
  it's nine nineteen ninety oh one right seven seventeen
  seventeenth seventh seventy six sixteen sixth sixty ten third
  thirteen thirteenth thirty thousand three twelve twenty two zero
) { $vocab{$word} = 1 }

sub normalize { # text
  my ( $txt ) = @_;
  $txt = " $txt ";
  $txt =~ s/[.][a-z]+//g;
  $txt =~ s/[<][a-z]+[>]//g;
  $txt =~ s/[^a-z' ]//g;
  $txt =~ s/ +/ /g;
  $out = " "; foreach $w (split / +/, $txt) { $out .= "$w " if ( defined ( $vocab{$w} ) ) }
  return $out;
}

sub consider {
  ( $file ) = @_;
  $base = $file;
  $base =~ s/.phn$//;
  $base =~ s/.txt$//;
  $base =~ s/.wrd$//;
  $base =~ s/.wav$//;
  return if ( $done{$base} );
  $done{$base} = $count++;
  # print "doing $base\n";
  # let the test program do its translation
  $res0 = `$worker < $base.phn`;
  # ignore output lines that start with # (comments, intermediate results)
  $res1 = " "; foreach $line ( split /\n/, $res0 ) { next if ( $line =~ /^#/ ); $res1 .= "$line " }
  # find out what the human transcriber did
  $txt0 = `cat $base.txt`;
  $txt1 = normalize ( $txt0 );
  if ( $res1 eq $txt1 ) { print "okay $base ($res1)\n"; $okay++; return }
  print "err  $base\n";
  # provide the phonemes for convenient comparison
  $phons = " ";
  foreach $line ( split /\n/, `cat $base.phn` ) {
    $line =~ s/[\r\n]+//;
    next if ( $line =~ /MillisecondsPerFrame:/ );
    next if ( $line =~ /END OF HEADER/ );
    if ( $line !~ /^(\d+) (\d+) (.*)/ ) {
      print "weird: ($line)\n"; next }
    $phons .= "$3 "; }
  print "  #  phn: ($phons)\n";
  print "  #  tru: ($txt1)\n";
  print "  #  stu: ($res1)\n";
  print "###\n$res0\n###\n";
}

$okay = 0; $count = 0;
foreach $arg (@ARGV) { foreach $file (glob $arg) { consider $file } }

$ave = 0; if ( $count ) { $ave = 100 * $okay / $count }

printf "score: $okay/$count (%.1f%%)\n", $ave;