#! /usr/bin/perl -w # p2n Testbed # p2n input is "phonemes" on argv # p2n output is one line to stdout if ( @ARGV < 2 ) { print "** Phoneme to Number Tester **\n"; print "usage: nDriver worker file(s)\n"; print " worker is program to be tested\n"; print " . input is .phn file on standard in\n"; print " . output is one recognized word per line\n"; print " file(s) is a list of test files\n"; print "example: nDriver Don1 ~don/corpus/*\n"; exit; } $worker = shift @ARGV; # the student's program # foreach file mentioned on ARGV do the conversion # important words foreach $word qw( a and double eight eighteen eighteenth eighth eightieth eighty eleven eleventh fifteen fifteenth fifth fiftieth fifty first five fortieth forty four fourteen fourteenth fourth half hundred hundredth nine nineteen nineteenth ninetieth ninety ninth oh one second seven seventeen seventeenth seventh seventieth seventy six sixteen sixteenth sixth sixtieth sixty ten tenth third thirteen thirteenth thirtieth thirty thousand three triple twelfth twelve twentieth twenty two zero ) { $vocab{$word} = 1 } # # maybe words # foreach $word qw( # dash hyphen number # area code # north south east west # avenue road street # o'clock # ) { $vocab{$word} = 1 } sub normalize { # text my ( $txt ) = @_; $txt0 = $txt; $txt0 =~ s/\n *$//; $txt =~ s/\n/ /g; $txt = " $txt "; $txt =~ s/[.][a-z]+//g; # .bn .ls # replace with space (background speech) $txt =~ s// /g $txt =~ s/<[a-z]+>/ /g; # replace <[a-z]*> with space # delete any utterance that has < > [ ] * if ( $txt =~ /[^a-z' ]/ ) { # print " # txt is ( $txt0 )\n"; return "" } $txt =~ s/ +/ /g; $out = " "; foreach $w (split / +/, $txt) { $out .= "$w " if ( defined ( $vocab{$w} ) ) } return $out; # format is: " word word word " } sub consider { ( $file ) = @_; $base = $file; $base =~ s/.phn$//; $base =~ s/.txt$//; $base =~ s/.wrd$//; $base =~ s/.wav$//; return if -e "$base.p2nX"; # marked as impossible return if ( $done{$base} ); $done{$base} = 1; # print "doing $base\n"; # find out what the human transcriber did $txt0 = `cat $base.txt`; $txt1 = normalize ( $txt0 ); if ( $txt1 eq "" ) { $phons = " "; foreach $line ( split /\n/, `cat $base.phn` ) { $line =~ s/[\r\n]+//; next if ( $line =~ /MillisecondsPerFrame:/ ); next if ( $line =~ /END OF HEADER/ ); if ( $line !~ /^(\d+) (\d+) (.*)/ ) { print "weird: ($line)\n"; next } $phons .= "$3 "; } # print " # phn: ($phons) SKIP\n"; return } # let the test program do its translation $res0 = `$worker < $base.phn`; # ignore output lines that start with # (debug lines) $res1 = " "; foreach $line ( split /\n/, $res0 ) { next if ( $line =~ /^#/ ); $res1 .= "$line " } $res1 = normalize ( $res1 ); $count++; # count this one if ( $res1 eq $txt1 ) { $okay++; $ave = 0; if ( $count ) { $ave = 100 * $okay / $count } $score = sprintf "%4d/%-4d (%.1f%%)", $okay, $count, $ave; print "$score okay $base ($res1)\n"; return } print "err $base\n"; # provide the phonemes for convenient comparison $phons = " "; foreach $line ( split /\n/, `cat $base.phn` ) { $line =~ s/[\r\n]+//; next if ( $line =~ /MillisecondsPerFrame:/ ); next if ( $line =~ /END OF HEADER/ ); if ( $line !~ /^(\d+) (\d+) (.*)/ ) { print "weird: ($line)\n"; next } $phons .= "$3 "; } print " # phn: ($phons)\n"; print " # tru: ($txt1)\n"; print " # stu: ($res1)\n"; print "###\n$res0\n###\n"; return; # unless you want Keep/Move print "Keep or Move [Km]: "; chomp ( $ans = ); if ( $ans eq "m" ) { print "moving $base to numbers/err/\n"; print `mv $base.* numbers/err` } } $okay = 0; $count = 0; foreach $arg (@ARGV) { foreach $file (glob $arg) { consider $file } } $ave = 0; if ( $count ) { $ave = 100 * $okay / $count } printf "score: $okay/$count (%.1f%%)\n", $ave;