SamSharp/Transitions.cs

using System;

using static SAMSharp.Sam;
using static SAMSharp.Renderer;

namespace SAMSharp
{
    class Transitions
    {
        // CREATE TRANSITIONS
        //
        // Linear transitions are now created to smoothly connect each
        // phoeneme. This transition is spread between the ending frames
        // of the old phoneme (outBlendLength), and the beginning frames
        // of the new phoneme (inBlendLength).
        //
        // To determine how many frames to use, the two phonemes are
        // compared using the blendRank[] table. The phoneme with the
        // smaller score is used. In case of a tie, a blend of each is used:
        //
        //      if blendRank[phoneme1] ==  blendRank[phomneme2]
        //          // use lengths from each phoneme
        //          outBlendFrames = outBlend[phoneme1]
        //          inBlendFrames = outBlend[phoneme2]
        //      else if blendRank[phoneme1] < blendRank[phoneme2]
        //          // use lengths from first phoneme
        //          outBlendFrames = outBlendLength[phoneme1]
        //          inBlendFrames = inBlendLength[phoneme1]
        //      else
        //          // use lengths from the second phoneme
        //          // note that in and out are swapped around!
        //          outBlendFrames = inBlendLength[phoneme2]
        //          inBlendFrames = outBlendLength[phoneme2]
        //
        //  Blend lengths can't be less than zero.
        //
        // For most of the parameters, SAM interpolates over the range of the last
        // outBlendFrames-1 and the first inBlendFrames.
        //
        // The exception to this is the Pitch[] parameter, which is interpolates the
        // pitch from the center of the current phoneme to the center of the next
        // phoneme.

        //written by me because of different table positions.
        // mem[47] = ...
        // 168=pitches
        // 169=frequency1
        // 170=frequency2
        // 171=frequency3
        // 172=amplitude1
        // 173=amplitude2
        // 174=amplitude3
        static byte Read(byte p, byte Y)
        {
            switch (p)
            {
                case 168: return pitches[Y];
                case 169: return frequency1[Y];
                case 170: return frequency2[Y];
                case 171: return frequency3[Y];
                case 172: return amplitude1[Y];
                case 173: return amplitude2[Y];
                case 174: return amplitude3[Y];
                default:
                    return 0;
            }
        }

        static void Write(byte p, byte Y, byte value)
        {
            switch (p)
            {
                case 168: pitches[Y] = value; return;
                case 169: frequency1[Y] = value; return;
                case 170: frequency2[Y] = value; return;
                case 171: frequency3[Y] = value; return;
                case 172: amplitude1[Y] = value; return;
                case 173: amplitude2[Y] = value; return;
                case 174: amplitude3[Y] = value; return;
                default:
                    return;
            }
        }


        // linearly interpolate values
        static void interpolate(byte width, byte table, byte frame, char mem53)
        {
            bool sign = (mem53 < 0);
            byte remainder = (byte)(Math.Abs(mem53) % width);
            byte div = (byte)(mem53 / width);

            byte error = 0;
            byte pos = width;
            byte val = (byte)(Read(table, frame) + div);

            while (--pos != 0)
            {
                error += remainder;
                if (error >= width)
                { // accumulated a whole integer error, so adjust output
                    error -= width;
                    if (sign) val--;
                    else if (val != 0) val++; // if input is 0, we always leave it alone
                }
                Write(table, ++frame, val); // Write updated value back to next frame.
                val += div;
            }
        }

        static void interpolate_pitch(byte pos, byte mem49, byte phase3)
        {
            // unlike the other values, the pitches[] interpolates from
            // the middle of the current phoneme to the middle of the
            // next phoneme

            // half the width of the current and next phoneme
            byte cur_width = (byte)(phonemeLengthOutput[pos] / 2);
            byte next_width = (byte)(phonemeLengthOutput[pos + 1] / 2);
            // sum the values
            byte width = (byte)(cur_width + next_width);
            char pitch = (char)(pitches[next_width + mem49] - pitches[mem49 - cur_width]);
            interpolate(width, 168, phase3, pitch);
        }


        public static byte CreateTransitions()
        {
            byte mem49 = 0;
            byte pos = 0;
            while (true)
            {
                byte next_rank;
                byte rank;
                byte speedcounter;
                byte phase1;
                byte phase2;
                byte phase3;
                byte transition;

                byte phoneme = phonemeIndexOutput[pos];
                byte next_phoneme = phonemeIndexOutput[pos + 1];

                if (next_phoneme == 255) break; // 255 == end_token

                // get the ranking of each phoneme
                next_rank = blendRank[next_phoneme];
                rank = blendRank[phoneme];

                // compare the rank - lower rank value is stronger
                if (rank == next_rank)
                {
                    // same rank, so use out blend lengths from each phoneme
                    phase1 = outBlendLength[phoneme];
                    phase2 = outBlendLength[next_phoneme];
                }
                else if (rank < next_rank)
                {
                    // next phoneme is stronger, so us its blend lengths
                    phase1 = inBlendLength[next_phoneme];
                    phase2 = outBlendLength[next_phoneme];
                }
                else
                {
                    // current phoneme is stronger, so use its blend lengths
                    // note the out/in are swapped
                    phase1 = outBlendLength[phoneme];
                    phase2 = inBlendLength[phoneme];
                }

                mem49 += phonemeLengthOutput[pos];

                speedcounter = (byte)(mem49 + phase2);
                phase3 = (byte)(mem49 - phase1);
                transition = (byte)(phase1 + phase2); // total transition?

                if (((transition - 2) & 128) == 0)
                {
                    byte table = 169;
                    interpolate_pitch(pos, mem49, phase3);
                    while (table < 175)
                    {
                        // tables:
                        // 168  pitches[]
                        // 169  frequency1
                        // 170  frequency2
                        // 171  frequency3
                        // 172  amplitude1
                        // 173  amplitude2
                        // 174  amplitude3

                        char value = (char)(Read(table, speedcounter) - Read(table, phase3));
                        interpolate(transition, table, phase3, value);
                        table++;
                    }
                }
                ++pos;
            }

            // add the length of this phoneme
            return (byte)(mem49 + phonemeLengthOutput[pos]);
        }
    }
}