SamSharp/Transitions.cs

205 lines
7.6 KiB
C#

using System;
using static SAMSharp.Sam;
using static SAMSharp.Renderer;
namespace SAMSharp
{
class Transitions
{
// CREATE TRANSITIONS
//
// Linear transitions are now created to smoothly connect each
// phoeneme. This transition is spread between the ending frames
// of the old phoneme (outBlendLength), and the beginning frames
// of the new phoneme (inBlendLength).
//
// To determine how many frames to use, the two phonemes are
// compared using the blendRank[] table. The phoneme with the
// smaller score is used. In case of a tie, a blend of each is used:
//
// if blendRank[phoneme1] == blendRank[phomneme2]
// // use lengths from each phoneme
// outBlendFrames = outBlend[phoneme1]
// inBlendFrames = outBlend[phoneme2]
// else if blendRank[phoneme1] < blendRank[phoneme2]
// // use lengths from first phoneme
// outBlendFrames = outBlendLength[phoneme1]
// inBlendFrames = inBlendLength[phoneme1]
// else
// // use lengths from the second phoneme
// // note that in and out are swapped around!
// outBlendFrames = inBlendLength[phoneme2]
// inBlendFrames = outBlendLength[phoneme2]
//
// Blend lengths can't be less than zero.
//
// For most of the parameters, SAM interpolates over the range of the last
// outBlendFrames-1 and the first inBlendFrames.
//
// The exception to this is the Pitch[] parameter, which is interpolates the
// pitch from the center of the current phoneme to the center of the next
// phoneme.
//written by me because of different table positions.
// mem[47] = ...
// 168=pitches
// 169=frequency1
// 170=frequency2
// 171=frequency3
// 172=amplitude1
// 173=amplitude2
// 174=amplitude3
static byte Read(byte p, byte Y)
{
switch (p)
{
case 168: return pitches[Y];
case 169: return frequency1[Y];
case 170: return frequency2[Y];
case 171: return frequency3[Y];
case 172: return amplitude1[Y];
case 173: return amplitude2[Y];
case 174: return amplitude3[Y];
default:
return 0;
}
}
static void Write(byte p, byte Y, byte value)
{
switch (p)
{
case 168: pitches[Y] = value; return;
case 169: frequency1[Y] = value; return;
case 170: frequency2[Y] = value; return;
case 171: frequency3[Y] = value; return;
case 172: amplitude1[Y] = value; return;
case 173: amplitude2[Y] = value; return;
case 174: amplitude3[Y] = value; return;
default:
return;
}
}
// linearly interpolate values
static void interpolate(byte width, byte table, byte frame, char mem53)
{
bool sign = (mem53 < 0);
byte remainder = (byte)(Math.Abs(mem53) % width);
byte div = (byte)(mem53 / width);
byte error = 0;
byte pos = width;
byte val = (byte)(Read(table, frame) + div);
while (--pos != 0)
{
error += remainder;
if (error >= width)
{ // accumulated a whole integer error, so adjust output
error -= width;
if (sign) val--;
else if (val != 0) val++; // if input is 0, we always leave it alone
}
Write(table, ++frame, val); // Write updated value back to next frame.
val += div;
}
}
static void interpolate_pitch(byte pos, byte mem49, byte phase3)
{
// unlike the other values, the pitches[] interpolates from
// the middle of the current phoneme to the middle of the
// next phoneme
// half the width of the current and next phoneme
byte cur_width = (byte)(phonemeLengthOutput[pos] / 2);
byte next_width = (byte)(phonemeLengthOutput[pos + 1] / 2);
// sum the values
byte width = (byte)(cur_width + next_width);
char pitch = (char)(pitches[next_width + mem49] - pitches[mem49 - cur_width]);
interpolate(width, 168, phase3, pitch);
}
public static byte CreateTransitions()
{
byte mem49 = 0;
byte pos = 0;
while (true)
{
byte next_rank;
byte rank;
byte speedcounter;
byte phase1;
byte phase2;
byte phase3;
byte transition;
byte phoneme = phonemeIndexOutput[pos];
byte next_phoneme = phonemeIndexOutput[pos + 1];
if (next_phoneme == 255) break; // 255 == end_token
// get the ranking of each phoneme
next_rank = blendRank[next_phoneme];
rank = blendRank[phoneme];
// compare the rank - lower rank value is stronger
if (rank == next_rank)
{
// same rank, so use out blend lengths from each phoneme
phase1 = outBlendLength[phoneme];
phase2 = outBlendLength[next_phoneme];
}
else if (rank < next_rank)
{
// next phoneme is stronger, so us its blend lengths
phase1 = inBlendLength[next_phoneme];
phase2 = outBlendLength[next_phoneme];
}
else
{
// current phoneme is stronger, so use its blend lengths
// note the out/in are swapped
phase1 = outBlendLength[phoneme];
phase2 = inBlendLength[phoneme];
}
mem49 += phonemeLengthOutput[pos];
speedcounter = (byte)(mem49 + phase2);
phase3 = (byte)(mem49 - phase1);
transition = (byte)(phase1 + phase2); // total transition?
if (((transition - 2) & 128) == 0)
{
byte table = 169;
interpolate_pitch(pos, mem49, phase3);
while (table < 175)
{
// tables:
// 168 pitches[]
// 169 frequency1
// 170 frequency2
// 171 frequency3
// 172 amplitude1
// 173 amplitude2
// 174 amplitude3
char value = (char)(Read(table, speedcounter) - Read(table, phase3));
interpolate(transition, table, phase3, value);
table++;
}
}
++pos;
}
// add the length of this phoneme
return (byte)(mem49 + phonemeLengthOutput[pos]);
}
}
}