#!/usr/bin/perl -w
# p2n Testbed
# p2n input is "phonemes" on argv
# p2n output is one line to stdout

if ( @ARGV < 2 ) {
  print "** Phoneme to Number Tester **\n";
  print "usage: p2nT worker file(s)\n";
  print "  worker is program to be tested\n";
  print "  . input is .phn file on standard in\n";
  print "  . output is one recognized word per line\n";
  print "  file(s) is a list of test files\n";
  print "example: p2nT p2n ~don/corpus/*\n";
  exit;
}

$worker = shift @ARGV; # the student's program

# foreach file mentioned on ARGV do the conversion

sub normalize { # text
  my ( $txt ) = @_;
  $txt = " $txt ";
  $txt =~ s/[.][a-z]+//g;
  $txt =~ s/[<][a-z]+[>]//g;
  $txt =~ s/[^a-z' ]//g;
  $txt =~ s/ +/ /g;
  return $txt;
}

sub consider {
  ( $file ) = @_;
  $base = $file;
  $base =~ s/.phn$//;
  $base =~ s/.txt$//;
  $base =~ s/.wrd$//;
  $base =~ s/.wav$//;
  return if ( $done{$base} );
  $done{$base} = $count++;
  # print "doing $base\n";
  # let the test program do its translation
  $res0 = `$worker < $base.phn`;
  # ignore output lines that start with # (comments, intermediate results)
  $res1 = " "; foreach $line ( split /\n/, $res0 ) { next if ( $line =~ /^#/ ); $res1 .= "$line " }
  # find out what the human transcriber did
  $txt0 = `cat $base.txt`;
  $txt1 = normalize ( $txt0 );
  if ( $res1 eq $txt1 ) { print "okay $base ($res1)\n"; $okay++; return }
  print "err  $base\n";
  # provide the phonemes for convenient comparison
  $phons = " ";
  foreach $line ( split /\n/, `cat $base.phn` ) {
    $line =~ s/[\r\n]+//;
    next if ( $line =~ /MillisecondsPerFrame:/ );
    next if ( $line =~ /END OF HEADER/ );
    if ( $line !~ /^(\d+) (\d+) (.*)/ ) {
      print "weird: ($line)\n"; next }
    $phons .= "$3 "; }
  print "  #  phn: ($phons)\n";
  print "  #  tru: ($txt1)\n";
  print "  #  stu: ($res1)\n";
  print "###\n$res0\n###\n";
}

$okay = 0; $count = 0;
foreach $arg (@ARGV) { foreach $file (glob $arg) { consider $file } }

$ave = 0; if ( $count ) { $ave = 100 * $okay / $count }

printf "score: $okay/$count (%.1f%%)\n", $ave;