$option_pho = 0; # Print pho output $option_rest = 1; $option_trans = "IPA"; $long_mult = 1.25; # How much longer to make long vowels while (substr(@ARGV[0], 0, 1) eq "-") { $option_trans = "K" if ($ARGV[0] eq "-K"); $option_trans = "IPA" if ($ARGV[0] eq "-IPA"); $option_pho = 1 if ($ARGV[0] eq "-SAMPA"); shift @ARGV; } %IPA_trans = ( 9 => "ŋ", th => "θ", dh => "ð", sh => "ʃ", zh => "ʒ", hh => "h", ch => "tʃ", jh => "dʒ", y => "j", yh => "ɣ", gh => "x", wh => "wh", iy => "i", ih => "ɪ", eh => "ɛ", ae => "æ", ax => "ə", ah => "ʌ", aa => "ɑ", ao => "ɔ", oh => "o", uh => "ʊ", uw => "u", er => "ɚ", em => "m", en => "n", el => "l", ay => "aɪ", ey => "eɪ", ow => "əʊ", aw => "aʊ", oy => "ɔɪ", ia => "ɪə", ea => "eə", ua => "ʊə", ei => "əɪ", iw => "[iw]", ew => "[ew]", uy => "[uy]", ); %K_trans = ( 9 => "ŋ", th => "θ", dh => "ð", sh => "ʃ", zh => "ʒ", hh => "h", ch => "tʃ", jh => "dʒ", y => "j", yh => "x", gh => "x", wh => "hw", iy => "i:", ih => "i", eh => "e", ae => "a", ax => "ə", ah => "ʌ", aa => "ɑ", ao => "ɔ:", oh => "o", uh => "u", uw => "u:", er => "ə:", em => "m", en => "n", el => "l", ay => "æi", ey => "e:", ow => "ɔu", aw => "ɑu", oy => "ɔi", ia => "ia", ea => "ea", ua => "ua", ei => "əɪ", iw => "iu", ew => "ɛu", uy => "ui", ); %K2_trans = ( 9 => "ŋ", th => "θ", dh => "ð", sh => "ʃ", zh => "ʒ", hh => "h", ch => "tʃ", jh => "dʒ", y => "j", yh => "x", gh => "x", wh => "hw", iy => "i", ih => "ɪ", eh => "ɛ", ae => "æ", ax => "ə", ah => "ʌ", aa => "ɑ", ao => "ɔ", oh => "o", uh => "ʊ", uw => "u", er => "ɜ:", em => "m", en => "n", el => "l", ay => "æi", ey => "e", ow => "əʊ", aw => "ɑu", oy => "ɔi", ia => "ia", ea => "ea", ua => "ua", ei => "əɪ", iw => "iu", ew => "ɛu", uy => "ui", ); %SAMPA_trans = ( 9 => "N", th => "T", dh => "D", sh => "S", zh => "Z", hh => "h", ch => "tS", jh => "dZ", y => "j", yh => "j", # "G", gh => "k_h", # "x", wh => "h w", # "wh", iy => "i", ih => "I", eh => "E", ae => "{", ax => "\@", ah => "V", aa => "A", ao => "O", oh => "\@U", # "o", uh => "U", uw => "u", er => "r=", em => "@ m", en => "@ n", el => "@ l", ay => "AI", # "aI", ey => "EI", ow => "\@U", aw => "aU", oy => "OI", ia => "i \@", ea => "E \@", ua => "U \@", ei => "\@ I", iw => "I u", ew => "E u", uy => "U i", ); if ($option_pho) { %trans = %SAMPA_trans; $option_rest = 0; } elsif ($option_trans eq "K") { %trans = %K_trans; $option_rest = 1; } elsif ($option_trans eq "K2") { %trans = %K2_trans; $option_rest = 1; } elsif ($option_trans eq "IPA") { %trans = %IPA_trans; $option_rest = 1; } $dur_short = 75; $dur_long = 200; $dur_mid = 100; $pitch_short = 100; $pitch_mid = 102; $pitch_long = 103; if ($option_pho) { print ";;T=1.1\n"; print "_ 300 5 100\n"; } # Read everything at once @all = <>; $all = join("", @all); # Split up into translated sections @trans = (); while ($all =~ /(.*?)<\/trans>/is) { $bef = $`; $aft = $'; $arg = $1; $trans = $2; if ($arg =~ /ortho=IPA/i) { %trans = %IPA_trans; } elsif ($arg =~ /ortho=K2/i) { %trans = %K2_trans; } elsif ($arg =~ /ortho=K/i) { %trans = %K_trans; } if ($option_pho) { $trans =~ s/\<[^\>]*\>//g; while ($trans =~ /\[([^\]]+)\]/) { $f0 = $1; $f0 =~ s/[^0-9;,]+//; push @f0, split(/[\;]+/, $f0); } } $trans =~ s/\[.*?\]//sg; $trans = trans($trans); push @trans, $trans; $all = "$bef$trans$aft"; } if ($option_rest) { print $all; } else { for (@trans) { print "$_"; } } sub trans($) { my $str = $_[0]; my $out; my $i; my $bef; my $aft; my $a;; my $x; $str =~ s/\([^\)]+\)//g; $str =~ s/hh([^h])/[HH]$1/g; while ($str =~ /(.h)/) { $str = "$`\[" . uc($1) . "]$'"; } while ($str =~ /([aeiou].)/) { $str = "$`\[" . uc($1) . "]$'"; } $str =~ s/9/[9]/g; while ($str =~ /([pbtdkgmnfvszlwyr])/) { $str = "$`\[" . uc($1) . "]$'"; } my @out = (); my @dur = (); my @phone = (); my @pitch = (); my $pitch = 100; $dur = $dur_short; $pitch = $pitch_short; if (1 == 1) { while ($str =~ /([\'\,\. \n]*)\[([A-Z0-9]+)\]([^\[]*)/) { $bb = $`; $bef = $1; $x = lc($2); $aft = $3; $aa = $'; if ($bef =~ /\'/) { $dur = $dur_long; $pitch = $pitch_long; } elsif ($bef =~ /\,/) { $dur = $dur_mid; $pitch = $pitch_mid; } elsif ($bef =~ /[ \n\.]/) { $dur = $dur_short; $pitch = $pitch_short; } # Extend natural duration if ($aft =~ /\:/) { $dur *= $long_mult; } $y = $x; if ($trans{$y}) { $y = $trans{$y}; } $str = "$bb$bef$y$aft$aa"; # If there is more than one phone, push each separately @z = split(/\s+/, $y); for $z (@z) { push @out, $z; push @phone, $x; push @pitch, "50 $pitch"; if ($x !~ /[aeiouAEIOU]/) { push @dur, $dur_short / ($#z + 1) ; } else { push @dur, $dur / ($#z + 1); } } while ($aft =~ /\|/g) { push @out, "_"; push @dur, $dur_long; push @phone, "_"; push @pitch, "50 $pitch"; } } } if ($str !~ /
/) { $str =~ s/\n/
\n/g; } # Put in the pitches, if available $j = 0; for $i (0 .. $#out) { if ($phone[$i] =~ /[aeiou]/) { if ($j <= $#f0) { $x = $f0[$j]; $x =~ s/[^0-9,]//g; @x = split(/\,/, $x); $d = int(100 / ($#x + 2)); $t = $d/2; $pitch[$i] = ""; for $f0 (@x) { $pitch[$i] .= " $t $f0"; $t += $d; } # $pitch[$i] = $f0[$j]; $j++; } } elsif ($phone[$i] =~ /ptkfscx/) { $pitch[$i] = "50 $pitch_short"; # $pitch[$i] = ""; } else { $pitch[$i] = ""; } } # Adjust durations of the syllables prior to a pause for $i (1 .. $#out) { if ($out[$i] =~ /^\_/) { for ($j = $i - 1; $j >= 0; $j--) { if ($phone[$j] =~ /[aeiou]/) { $dur[$j] = $dur_mid if ($dur[$j] < $dur_mid); last; } } } } $out = "_ 10 50 100\n"; for $i (0 .. $#out) { if ($pitch[$i]) { $out .= "$out[$i] $dur[$i] $pitch[$i]\n"; } else { $out .= "$out[$i] $dur[$i]\n"; } } # $out = join("\n", @out); $out .= "_ 10 50 100\n"; $out =~ s/ +/ /g; return $option_pho ? $out : $str; }