204 lines
7.6 KiB
C#
204 lines
7.6 KiB
C#
using System;
|
|
|
|
using static SAMSharp.Sam;
|
|
using static SAMSharp.Renderer;
|
|
|
|
namespace SAMSharp
|
|
{
|
|
class Transitions
|
|
{
|
|
// CREATE TRANSITIONS
|
|
//
|
|
// Linear transitions are now created to smoothly connect each
|
|
// phoeneme. This transition is spread between the ending frames
|
|
// of the old phoneme (outBlendLength), and the beginning frames
|
|
// of the new phoneme (inBlendLength).
|
|
//
|
|
// To determine how many frames to use, the two phonemes are
|
|
// compared using the blendRank[] table. The phoneme with the
|
|
// smaller score is used. In case of a tie, a blend of each is used:
|
|
//
|
|
// if blendRank[phoneme1] == blendRank[phomneme2]
|
|
// // use lengths from each phoneme
|
|
// outBlendFrames = outBlend[phoneme1]
|
|
// inBlendFrames = outBlend[phoneme2]
|
|
// else if blendRank[phoneme1] < blendRank[phoneme2]
|
|
// // use lengths from first phoneme
|
|
// outBlendFrames = outBlendLength[phoneme1]
|
|
// inBlendFrames = inBlendLength[phoneme1]
|
|
// else
|
|
// // use lengths from the second phoneme
|
|
// // note that in and out are swapped around!
|
|
// outBlendFrames = inBlendLength[phoneme2]
|
|
// inBlendFrames = outBlendLength[phoneme2]
|
|
//
|
|
// Blend lengths can't be less than zero.
|
|
//
|
|
// For most of the parameters, SAM interpolates over the range of the last
|
|
// outBlendFrames-1 and the first inBlendFrames.
|
|
//
|
|
// The exception to this is the Pitch[] parameter, which is interpolates the
|
|
// pitch from the center of the current phoneme to the center of the next
|
|
// phoneme.
|
|
|
|
//written by me because of different table positions.
|
|
// mem[47] = ...
|
|
// 168=pitches
|
|
// 169=frequency1
|
|
// 170=frequency2
|
|
// 171=frequency3
|
|
// 172=amplitude1
|
|
// 173=amplitude2
|
|
// 174=amplitude3
|
|
static byte Read(byte p, byte Y)
|
|
{
|
|
switch (p)
|
|
{
|
|
case 168: return pitches[Y];
|
|
case 169: return frequency1[Y];
|
|
case 170: return frequency2[Y];
|
|
case 171: return frequency3[Y];
|
|
case 172: return amplitude1[Y];
|
|
case 173: return amplitude2[Y];
|
|
case 174: return amplitude3[Y];
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void Write(byte p, byte Y, byte value)
|
|
{
|
|
switch (p)
|
|
{
|
|
case 168: pitches[Y] = value; return;
|
|
case 169: frequency1[Y] = value; return;
|
|
case 170: frequency2[Y] = value; return;
|
|
case 171: frequency3[Y] = value; return;
|
|
case 172: amplitude1[Y] = value; return;
|
|
case 173: amplitude2[Y] = value; return;
|
|
case 174: amplitude3[Y] = value; return;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
// linearly interpolate values
|
|
static void interpolate(byte width, byte table, byte frame, char mem53)
|
|
{
|
|
bool sign = (mem53 < 0);
|
|
byte remainder = (byte)(Math.Abs(mem53) % width);
|
|
byte div = (byte)(mem53 / width);
|
|
|
|
byte error = 0;
|
|
byte pos = width;
|
|
byte val = (byte)(Read(table, frame) + div);
|
|
|
|
while (--pos != 0)
|
|
{
|
|
error += remainder;
|
|
if (error >= width)
|
|
{ // accumulated a whole integer error, so adjust output
|
|
error -= width;
|
|
if (sign) val--;
|
|
else if (val != 0) val++; // if input is 0, we always leave it alone
|
|
}
|
|
Write(table, ++frame, val); // Write updated value back to next frame.
|
|
val += div;
|
|
}
|
|
}
|
|
|
|
static void interpolate_pitch(byte pos, byte mem49, byte phase3)
|
|
{
|
|
// unlike the other values, the pitches[] interpolates from
|
|
// the middle of the current phoneme to the middle of the
|
|
// next phoneme
|
|
|
|
// half the width of the current and next phoneme
|
|
byte cur_width = (byte)(phonemeLengthOutput[pos] / 2);
|
|
byte next_width = (byte)(phonemeLengthOutput[pos + 1] / 2);
|
|
// sum the values
|
|
byte width = (byte)(cur_width + next_width);
|
|
char pitch = (char)(pitches[next_width + mem49] - pitches[mem49 - cur_width]);
|
|
interpolate(width, 168, phase3, pitch);
|
|
}
|
|
|
|
|
|
public static byte CreateTransitions()
|
|
{
|
|
byte mem49 = 0;
|
|
byte pos = 0;
|
|
while (true)
|
|
{
|
|
byte next_rank;
|
|
byte rank;
|
|
byte speedcounter;
|
|
byte phase1;
|
|
byte phase2;
|
|
byte phase3;
|
|
byte transition;
|
|
|
|
byte phoneme = phonemeIndexOutput[pos];
|
|
byte next_phoneme = phonemeIndexOutput[pos + 1];
|
|
|
|
if (next_phoneme == 255) break; // 255 == end_token
|
|
|
|
// get the ranking of each phoneme
|
|
next_rank = blendRank[next_phoneme];
|
|
rank = blendRank[phoneme];
|
|
|
|
// compare the rank - lower rank value is stronger
|
|
if (rank == next_rank)
|
|
{
|
|
// same rank, so use out blend lengths from each phoneme
|
|
phase1 = outBlendLength[phoneme];
|
|
phase2 = outBlendLength[next_phoneme];
|
|
}
|
|
else if (rank < next_rank)
|
|
{
|
|
// next phoneme is stronger, so us its blend lengths
|
|
phase1 = inBlendLength[next_phoneme];
|
|
phase2 = outBlendLength[next_phoneme];
|
|
}
|
|
else
|
|
{
|
|
// current phoneme is stronger, so use its blend lengths
|
|
// note the out/in are swapped
|
|
phase1 = outBlendLength[phoneme];
|
|
phase2 = inBlendLength[phoneme];
|
|
}
|
|
|
|
mem49 += phonemeLengthOutput[pos];
|
|
|
|
speedcounter = (byte)(mem49 + phase2);
|
|
phase3 = (byte)(mem49 - phase1);
|
|
transition = (byte)(phase1 + phase2); // total transition?
|
|
|
|
if (((transition - 2) & 128) == 0)
|
|
{
|
|
byte table = 169;
|
|
interpolate_pitch(pos, mem49, phase3);
|
|
while (table < 175)
|
|
{
|
|
// tables:
|
|
// 168 pitches[]
|
|
// 169 frequency1
|
|
// 170 frequency2
|
|
// 171 frequency3
|
|
// 172 amplitude1
|
|
// 173 amplitude2
|
|
// 174 amplitude3
|
|
|
|
char value = (char)(Read(table, speedcounter) - Read(table, phase3));
|
|
interpolate(transition, table, phase3, value);
|
|
table++;
|
|
}
|
|
}
|
|
++pos;
|
|
}
|
|
|
|
// add the length of this phoneme
|
|
return (byte)(mem49 + phonemeLengthOutput[pos]);
|
|
}
|
|
}
|
|
}
|