#!/usr/bin/perl -w # p2n Testbed # p2n input is "phonemes" on argv # p2n output is one line to stdout if ( @ARGV < 2 ) { print "** Phoneme to Number Tester **\n"; print "usage: p2nT worker file(s)\n"; print " worker is program to be tested\n"; print " . input is .phn file on standard in\n"; print " . output is one recognized word per line\n"; print " file(s) is a list of test files\n"; print "example: p2nT p2n ~don/corpus/*\n"; exit; } $worker = shift @ARGV; # the student's program # foreach file mentioned on ARGV do the conversion sub normalize { # text my ( $txt ) = @_; $txt = " $txt "; $txt =~ s/[.][a-z]+//g; $txt =~ s/[<][a-z]+[>]//g; $txt =~ s/[^a-z' ]//g; $txt =~ s/ +/ /g; return $txt; } sub consider { ( $file ) = @_; $base = $file; $base =~ s/.phn$//; $base =~ s/.txt$//; $base =~ s/.wrd$//; $base =~ s/.wav$//; return if ( $done{$base} ); $done{$base} = $count++; # print "doing $base\n"; # let the test program do its translation $res0 = `$worker < $base.phn`; # ignore output lines that start with # (comments, intermediate results) $res1 = " "; foreach $line ( split /\n/, $res0 ) { next if ( $line =~ /^#/ ); $res1 .= "$line " } # find out what the human transcriber did $txt0 = `cat $base.txt`; $txt1 = normalize ( $txt0 ); if ( $res1 eq $txt1 ) { print "okay $base ($res1)\n"; $okay++; return } print "err $base\n"; # provide the phonemes for convenient comparison $phons = " "; foreach $line ( split /\n/, `cat $base.phn` ) { $line =~ s/[\r\n]+//; next if ( $line =~ /MillisecondsPerFrame:/ ); next if ( $line =~ /END OF HEADER/ ); if ( $line !~ /^(\d+) (\d+) (.*)/ ) { print "weird: ($line)\n"; next } $phons .= "$3 "; } print " # phn: ($phons)\n"; print " # tru: ($txt1)\n"; print " # stu: ($res1)\n"; print "###\n$res0\n###\n"; } $okay = 0; $count = 0; foreach $arg (@ARGV) { foreach $file (glob $arg) { consider $file } } $ave = 0; if ( $count ) { $ave = 100 * $okay / $count } printf "score: $okay/$count (%.1f%%)\n", $ave;