using System; using static SAMSharp.Sam; using static SAMSharp.Renderer; namespace SAMSharp { class Transitions { // CREATE TRANSITIONS // // Linear transitions are now created to smoothly connect each // phoeneme. This transition is spread between the ending frames // of the old phoneme (outBlendLength), and the beginning frames // of the new phoneme (inBlendLength). // // To determine how many frames to use, the two phonemes are // compared using the blendRank[] table. The phoneme with the // smaller score is used. In case of a tie, a blend of each is used: // // if blendRank[phoneme1] == blendRank[phomneme2] // // use lengths from each phoneme // outBlendFrames = outBlend[phoneme1] // inBlendFrames = outBlend[phoneme2] // else if blendRank[phoneme1] < blendRank[phoneme2] // // use lengths from first phoneme // outBlendFrames = outBlendLength[phoneme1] // inBlendFrames = inBlendLength[phoneme1] // else // // use lengths from the second phoneme // // note that in and out are swapped around! // outBlendFrames = inBlendLength[phoneme2] // inBlendFrames = outBlendLength[phoneme2] // // Blend lengths can't be less than zero. // // For most of the parameters, SAM interpolates over the range of the last // outBlendFrames-1 and the first inBlendFrames. // // The exception to this is the Pitch[] parameter, which is interpolates the // pitch from the center of the current phoneme to the center of the next // phoneme. //written by me because of different table positions. // mem[47] = ... // 168=pitches // 169=frequency1 // 170=frequency2 // 171=frequency3 // 172=amplitude1 // 173=amplitude2 // 174=amplitude3 static byte Read(byte p, byte Y) { switch (p) { case 168: return pitches[Y]; case 169: return frequency1[Y]; case 170: return frequency2[Y]; case 171: return frequency3[Y]; case 172: return amplitude1[Y]; case 173: return amplitude2[Y]; case 174: return amplitude3[Y]; default: return 0; } } static void Write(byte p, byte Y, byte value) { switch (p) { case 168: pitches[Y] = value; return; case 169: frequency1[Y] = value; return; case 170: frequency2[Y] = value; return; case 171: frequency3[Y] = value; return; case 172: amplitude1[Y] = value; return; case 173: amplitude2[Y] = value; return; case 174: amplitude3[Y] = value; return; default: return; } } // linearly interpolate values static void interpolate(byte width, byte table, byte frame, char mem53) { bool sign = (mem53 < 0); byte remainder = (byte)(Math.Abs(mem53) % width); byte div = (byte)(mem53 / width); byte error = 0; byte pos = width; byte val = (byte)(Read(table, frame) + div); while (--pos != 0) { error += remainder; if (error >= width) { // accumulated a whole integer error, so adjust output error -= width; if (sign) val--; else if (val != 0) val++; // if input is 0, we always leave it alone } Write(table, ++frame, val); // Write updated value back to next frame. val += div; } } static void interpolate_pitch(byte pos, byte mem49, byte phase3) { // unlike the other values, the pitches[] interpolates from // the middle of the current phoneme to the middle of the // next phoneme // half the width of the current and next phoneme byte cur_width = (byte)(phonemeLengthOutput[pos] / 2); byte next_width = (byte)(phonemeLengthOutput[pos + 1] / 2); // sum the values byte width = (byte)(cur_width + next_width); char pitch = (char)(pitches[next_width + mem49] - pitches[mem49 - cur_width]); interpolate(width, 168, phase3, pitch); } public static byte CreateTransitions() { byte mem49 = 0; byte pos = 0; while (true) { byte next_rank; byte rank; byte speedcounter; byte phase1; byte phase2; byte phase3; byte transition; byte phoneme = phonemeIndexOutput[pos]; byte next_phoneme = phonemeIndexOutput[pos + 1]; if (next_phoneme == 255) break; // 255 == end_token // get the ranking of each phoneme next_rank = blendRank[next_phoneme]; rank = blendRank[phoneme]; // compare the rank - lower rank value is stronger if (rank == next_rank) { // same rank, so use out blend lengths from each phoneme phase1 = outBlendLength[phoneme]; phase2 = outBlendLength[next_phoneme]; } else if (rank < next_rank) { // next phoneme is stronger, so us its blend lengths phase1 = inBlendLength[next_phoneme]; phase2 = outBlendLength[next_phoneme]; } else { // current phoneme is stronger, so use its blend lengths // note the out/in are swapped phase1 = outBlendLength[phoneme]; phase2 = inBlendLength[phoneme]; } mem49 += phonemeLengthOutput[pos]; speedcounter = (byte)(mem49 + phase2); phase3 = (byte)(mem49 - phase1); transition = (byte)(phase1 + phase2); // total transition? if (((transition - 2) & 128) == 0) { byte table = 169; interpolate_pitch(pos, mem49, phase3); while (table < 175) { // tables: // 168 pitches[] // 169 frequency1 // 170 frequency2 // 171 frequency3 // 172 amplitude1 // 173 amplitude2 // 174 amplitude3 char value = (char)(Read(table, speedcounter) - Read(table, phase3)); interpolate(transition, table, phase3, value); table++; } } ++pos; } // add the length of this phoneme return (byte)(mem49 + phonemeLengthOutput[pos]); } } }