$option_pho = 0; # Print pho output
$option_rest = 1;
$option_trans = "IPA";
$long_mult = 1.25; # How much longer to make long vowels
while (substr(@ARGV[0], 0, 1) eq "-")
{
$option_trans = "K" if ($ARGV[0] eq "-K");
$option_trans = "IPA" if ($ARGV[0] eq "-IPA");
$option_pho = 1 if ($ARGV[0] eq "-SAMPA");
shift @ARGV;
}
%IPA_trans = (
9 => "ŋ",
th => "θ",
dh => "ð",
sh => "ʃ",
zh => "ʒ",
hh => "h",
ch => "tʃ",
jh => "dʒ",
y => "j",
yh => "ɣ",
gh => "x",
wh => "wh",
iy => "i",
ih => "ɪ",
eh => "ɛ",
ae => "æ",
ax => "ə",
ah => "ʌ",
aa => "ɑ",
ao => "ɔ",
oh => "o",
uh => "ʊ",
uw => "u",
er => "ɚ",
em => "m",
en => "n",
el => "l",
ay => "aɪ",
ey => "eɪ",
ow => "əʊ",
aw => "aʊ",
oy => "ɔɪ",
ia => "ɪə",
ea => "eə",
ua => "ʊə",
ei => "əɪ",
iw => "[iw]",
ew => "[ew]",
uy => "[uy]",
);
%K_trans = (
9 => "ŋ",
th => "θ",
dh => "ð",
sh => "ʃ",
zh => "ʒ",
hh => "h",
ch => "tʃ",
jh => "dʒ",
y => "j",
yh => "x",
gh => "x",
wh => "hw",
iy => "i:",
ih => "i",
eh => "e",
ae => "a",
ax => "ə",
ah => "ʌ",
aa => "ɑ",
ao => "ɔ:",
oh => "o",
uh => "u",
uw => "u:",
er => "ə:",
em => "m",
en => "n",
el => "l",
ay => "æi",
ey => "e:",
ow => "ɔu",
aw => "ɑu",
oy => "ɔi",
ia => "ia",
ea => "ea",
ua => "ua",
ei => "əɪ",
iw => "iu",
ew => "ɛu",
uy => "ui",
);
%K2_trans = (
9 => "ŋ",
th => "θ",
dh => "ð",
sh => "ʃ",
zh => "ʒ",
hh => "h",
ch => "tʃ",
jh => "dʒ",
y => "j",
yh => "x",
gh => "x",
wh => "hw",
iy => "i",
ih => "ɪ",
eh => "ɛ",
ae => "æ",
ax => "ə",
ah => "ʌ",
aa => "ɑ",
ao => "ɔ",
oh => "o",
uh => "ʊ",
uw => "u",
er => "ɜ:",
em => "m",
en => "n",
el => "l",
ay => "æi",
ey => "e",
ow => "əʊ",
aw => "ɑu",
oy => "ɔi",
ia => "ia",
ea => "ea",
ua => "ua",
ei => "əɪ",
iw => "iu",
ew => "ɛu",
uy => "ui",
);
%SAMPA_trans = (
9 => "N",
th => "T",
dh => "D",
sh => "S",
zh => "Z",
hh => "h",
ch => "tS",
jh => "dZ",
y => "j",
yh => "j", # "G",
gh => "k_h", # "x",
wh => "h w", # "wh",
iy => "i",
ih => "I",
eh => "E",
ae => "{",
ax => "\@",
ah => "V",
aa => "A",
ao => "O",
oh => "\@U", # "o",
uh => "U",
uw => "u",
er => "r=",
em => "@ m",
en => "@ n",
el => "@ l",
ay => "AI", # "aI",
ey => "EI",
ow => "\@U",
aw => "aU",
oy => "OI",
ia => "i \@",
ea => "E \@",
ua => "U \@",
ei => "\@ I",
iw => "I u",
ew => "E u",
uy => "U i",
);
if ($option_pho)
{
%trans = %SAMPA_trans;
$option_rest = 0;
} elsif ($option_trans eq "K")
{
%trans = %K_trans;
$option_rest = 1;
} elsif ($option_trans eq "K2")
{
%trans = %K2_trans;
$option_rest = 1;
} elsif ($option_trans eq "IPA")
{
%trans = %IPA_trans;
$option_rest = 1;
}
$dur_short = 75;
$dur_long = 200;
$dur_mid = 100;
$pitch_short = 100;
$pitch_mid = 102;
$pitch_long = 103;
if ($option_pho)
{
print ";;T=1.1\n";
print "_ 300 5 100\n";
}
# Read everything at once
@all = <>;
$all = join("", @all);
# Split up into translated sections
@trans = ();
while ($all =~ /(.*?)<\/trans>/is)
{
$bef = $`;
$aft = $';
$arg = $1;
$trans = $2;
if ($arg =~ /ortho=IPA/i)
{
%trans = %IPA_trans;
} elsif ($arg =~ /ortho=K2/i)
{
%trans = %K2_trans;
} elsif ($arg =~ /ortho=K/i)
{
%trans = %K_trans;
}
if ($option_pho)
{
$trans =~ s/\<[^\>]*\>//g;
while ($trans =~ /\[([^\]]+)\]/)
{
$f0 = $1;
$f0 =~ s/[^0-9;,]+//;
push @f0, split(/[\;]+/, $f0);
}
}
$trans =~ s/\[.*?\]//sg;
$trans = trans($trans);
push @trans, $trans;
$all = "$bef$trans$aft";
}
if ($option_rest)
{
print $all;
} else
{
for (@trans)
{
print "$_";
}
}
sub trans($)
{
my $str = $_[0];
my $out;
my $i;
my $bef;
my $aft;
my $a;;
my $x;
$str =~ s/\([^\)]+\)//g;
$str =~ s/hh([^h])/[HH]$1/g;
while ($str =~ /(.h)/) { $str = "$`\[" . uc($1) . "]$'"; }
while ($str =~ /([aeiou].)/) { $str = "$`\[" . uc($1) . "]$'"; }
$str =~ s/9/[9]/g;
while ($str =~ /([pbtdkgmnfvszlwyr])/) { $str = "$`\[" . uc($1) . "]$'"; }
my @out = ();
my @dur = ();
my @phone = ();
my @pitch = ();
my $pitch = 100;
$dur = $dur_short;
$pitch = $pitch_short;
if (1 == 1)
{
while ($str =~ /([\'\,\. \n]*)\[([A-Z0-9]+)\]([^\[]*)/)
{
$bb = $`;
$bef = $1;
$x = lc($2);
$aft = $3;
$aa = $';
if ($bef =~ /\'/)
{
$dur = $dur_long;
$pitch = $pitch_long;
} elsif ($bef =~ /\,/)
{
$dur = $dur_mid;
$pitch = $pitch_mid;
} elsif ($bef =~ /[ \n\.]/)
{
$dur = $dur_short;
$pitch = $pitch_short;
}
# Extend natural duration
if ($aft =~ /\:/)
{
$dur *= $long_mult;
}
$y = $x;
if ($trans{$y}) { $y = $trans{$y}; }
$str = "$bb$bef$y$aft$aa";
# If there is more than one phone, push each separately
@z = split(/\s+/, $y);
for $z (@z)
{
push @out, $z;
push @phone, $x;
push @pitch, "50 $pitch";
if ($x !~ /[aeiouAEIOU]/) { push @dur, $dur_short / ($#z + 1) ; }
else { push @dur, $dur / ($#z + 1); }
}
while ($aft =~ /\|/g)
{
push @out, "_";
push @dur, $dur_long;
push @phone, "_";
push @pitch, "50 $pitch";
}
}
}
if ($str !~ /
/) { $str =~ s/\n/
\n/g; }
# Put in the pitches, if available
$j = 0;
for $i (0 .. $#out)
{
if ($phone[$i] =~ /[aeiou]/)
{
if ($j <= $#f0)
{
$x = $f0[$j];
$x =~ s/[^0-9,]//g;
@x = split(/\,/, $x);
$d = int(100 / ($#x + 2));
$t = $d/2;
$pitch[$i] = "";
for $f0 (@x)
{
$pitch[$i] .= " $t $f0";
$t += $d;
}
# $pitch[$i] = $f0[$j];
$j++;
}
} elsif ($phone[$i] =~ /ptkfscx/)
{
$pitch[$i] = "50 $pitch_short";
# $pitch[$i] = "";
} else
{
$pitch[$i] = "";
}
}
# Adjust durations of the syllables prior to a pause
for $i (1 .. $#out)
{
if ($out[$i] =~ /^\_/)
{
for ($j = $i - 1; $j >= 0; $j--)
{
if ($phone[$j] =~ /[aeiou]/)
{
$dur[$j] = $dur_mid if ($dur[$j] < $dur_mid);
last;
}
}
}
}
$out = "_ 10 50 100\n";
for $i (0 .. $#out)
{
if ($pitch[$i])
{
$out .= "$out[$i] $dur[$i] $pitch[$i]\n";
} else
{
$out .= "$out[$i] $dur[$i]\n";
}
}
# $out = join("\n", @out);
$out .= "_ 10 50 100\n";
$out =~ s/ +/ /g;
return $option_pho ? $out : $str;
}