#include "timestretcher.h" #include "common.h" #include "correlation.h" #include "effect.h" #include "interpolation.h" #include "linalg.h" #include "mengumath.h" #include "vecdeque.h" #include #include #include #include #include #include #include #include #include using namespace Mengu; using namespace dsp; void TimeStretcher::set_stretch_factor(const float &scale) { _stretch_factor = scale; } Effect::InputDomain TimeStretcher::get_input_domain() { return InputDomain::Time; } std::vector TimeStretcher::get_property_descs() const { return { EffectPropDesc { .type = EffectPropType::Slider, .name = "stretch_factor", .desc = "Scales the length of pushed signals by this amount", .slider_data = { .min_value = 0.5, .max_value = 2, .scale = Exp, } } }; }; void TimeStretcher::set_property(uint32_t id, EffectPropPayload data) { switch (id) { case 0: if (data.type == Slider) { set_stretch_factor(data.value); } break; }; } EffectPropPayload TimeStretcher::get_property(uint32_t id) const { switch (id) { case 0: return EffectPropPayload { .type = Slider, .value = _stretch_factor, }; }; return EffectPropPayload {}; } static float _wrap_phase(float phase) { return fposmod(phase + MATH_PI, MATH_TAU) - MATH_PI; } // difference between 2 unwrapped phases. since, phases are preiodic, picks the closest one to the estimate static float _phase_diff(float next, float prev, float est = 0.0f) { return _wrap_phase(next - prev - est) + est; } PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(bool preserve_formants) { _preserve_formants = preserve_formants; reset(); } void PhaseVocoderTimeStretcher::push_signal(const Complex *input, const uint32_t &size) { _raw_buffer.extend_back(input, size); } uint32_t PhaseVocoderTimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) { while (n_transformed_ready() < size && _raw_buffer.size() >= WindowSize) { std::array sample; _raw_buffer.to_array(sample.data(), WindowSize); _load_new_freq_window(sample); std::array curr_freqs; std::copy(_lpc.get_freq_spectrum().cbegin(), _lpc.get_freq_spectrum().cbegin() + WindowSize / 2, curr_freqs.begin()); float analysis_hop_sizef = SynthesisHopSize / _stretch_factor; _stretched_sample_truncated += std::modf(analysis_hop_sizef, &analysis_hop_sizef); uint32_t analysis_hop_size = (uint32_t) analysis_hop_sizef; if (_stretched_sample_truncated) { analysis_hop_size += 1; _stretched_sample_truncated -= 1; } std::array amplitudes = _calc_scaled_magnitudes(); std::array new_phases = _calc_scaled_phases(curr_freqs, analysis_hop_size); // std::array new_samples = _calc_new_samples(amplitudes.data(), curr_raw_phases.data()); std::array new_samples = _calc_new_samples(sample, amplitudes.data(), new_phases.data()); mix_and_extend(_transformed_buffer, new_samples, WindowSize - SynthesisHopSize, hann_window); // _transformed_buffer.extend_back(new_samples.data(), WindowSize); std::transform(new_phases.cbegin(), new_phases.cend(), _last_scaled_phases.begin(), [] (float unwrapped) { return _wrap_phase(unwrapped); } ); _raw_buffer.pop_front_many(nullptr, analysis_hop_size); } return _transformed_buffer.pop_front_many(output, MIN(n_transformed_ready(), size)); } uint32_t PhaseVocoderTimeStretcher::n_transformed_ready() const { return MAX(SynthesisHopSize, _transformed_buffer.size()) - (SynthesisHopSize); } void PhaseVocoderTimeStretcher::reset() { _prev_raw_mag2s.fill(0.0f); _prev_raw_phases.fill(0.0f); _last_scaled_phases.fill(0.0); _raw_buffer.resize(0); _transformed_buffer.resize(SynthesisHopSize, 0); _stretched_sample_truncated = 0.0; } // void PhaseVocoderTimeStretcher::set_stretch_factor(const float &stretch_factor) { // _stretch_factor = stretch_factor; // // _last_scaled_phases.fill(0.0f); // }; std::array PhaseVocoderTimeStretcher::_calc_scaled_magnitudes() { std::array mags; if (_preserve_formants) { const std::array &envelope = _lpc.get_envelope(); const std::array &residuals = _lpc.get_residuals(); for (uint32_t i = 0; i < WindowSize / 2; i++) { const uint32_t stretched_ind = i * _stretch_factor; if (stretched_ind < WindowSize / 2 && std::isfinite(residuals[i] * envelope[stretched_ind])) { mags[i] = residuals[i] * envelope[stretched_ind]; } else { mags[i] = 0.0f; } } } else { const std::array &freqs = _lpc.get_freq_spectrum(); for (uint32_t i = 0; i < WindowSize / 2; i++) { mags[i] = sqrt(std::norm(freqs[i])); } } return mags; } std::array PhaseVocoderTimeStretcher::_calc_scaled_phases( const std::array &curr_freqs, const uint32_t hopsize) { std::array curr_mag2s; std::array curr_phases; std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(), [] (Complex freq) { return std::norm(freq); } ); std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(), [] (Complex freq) { return std::arg(freq); } ); std::array phase_deltas; float stretch_factor = (float) SynthesisHopSize / hopsize; for (int i = 0; i < WindowSize / 2; i++) { const float est = i * MATH_TAU * ((float) hopsize / WindowSize);// / stretch_factor; phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est); } std::array new_phases; for (int i = 0; i < WindowSize / 2; i++) { new_phases[i] = _last_scaled_phases[i] + (phase_deltas[i] * stretch_factor); } _replace_prev_freqs(curr_mag2s, curr_phases); return new_phases; } std::array PhaseVocoderTimeStretcher::_load_new_freq_window(const std::array &sample) { _lpc.load_sample(sample.data()); std::array new_freqs; for (uint32_t i = 0; i < WindowSize / 2; i++) { new_freqs[i] = _lpc.get_freq_spectrum()[i]; } return new_freqs; } void PhaseVocoderTimeStretcher::_replace_prev_freqs(const std::array &curr_mag2s, const std::array &curr_phases) { std::copy(curr_mag2s.cbegin(), curr_mag2s.cend(), _prev_raw_mag2s.begin()); std::copy(curr_phases.cbegin(), curr_phases.cend(), _prev_raw_phases.begin()); } std::array PhaseVocoderTimeStretcher::_calc_new_samples( const std::array &raw_samples, const float *amplitudes, const float *phases) { std::array freqs = {0.0}; for (uint32_t i = 0; i < WindowSize / 2; i++) { freqs[i] = 2.0f * std::polar(amplitudes[i], phases[i]); } std::array new_samples; _lpc.get_fft().inverse_transform(freqs.data(), new_samples.data()); // Make shifted as loud as raw samples _loudness_norm.normalize(new_samples.data(), raw_samples.data(), new_samples.data()); return new_samples; } PhaseVocoderDoneRightTimeStretcher::PhaseVocoderDoneRightTimeStretcher(bool preserve_formants) : PhaseVocoderTimeStretcher(preserve_formants) {} std::array PhaseVocoderDoneRightTimeStretcher::_calc_scaled_phases( const std::array &curr_freqs, const uint32_t hopsize) { std::array curr_mag2s; std::array curr_phases; std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(), [] (Complex freq) { return std::norm(freq); } ); std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(), [] (Complex freq) { return std::arg(freq); } ); float stretch_factor = (float) SynthesisHopSize / hopsize; std::array time_phase_deltas; for (int i = 0; i < WindowSize / 2; i++) { const float est = i * MATH_TAU * ((float) hopsize / WindowSize); time_phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est); } std::array freq_phase_deltas; for (uint32_t i = 1; i < WindowSize / 2 - 1; i++) { const float up_delta = _wrap_phase(curr_phases[i + 1] - curr_phases[i]); const float down_delta = _wrap_phase(curr_phases[i] - curr_phases[i - 1]); freq_phase_deltas[i] = 0.5f * (up_delta + down_delta); } freq_phase_deltas[0] = _wrap_phase(curr_phases[1] - curr_phases[0]); freq_phase_deltas[WindowSize / 2 - 1] = _wrap_phase(curr_phases[WindowSize / 2 - 1] - curr_phases[WindowSize / 2 - 2]); std::array new_phases = _propagate_phase_gradients ( time_phase_deltas, freq_phase_deltas, _last_scaled_phases, _prev_raw_mag2s, curr_mag2s, stretch_factor, 1e-3f ); // for (int i = 0; i < WindowSize / 2; i++) { // new_phases[i] = _last_scaled_phases[i] + (time_phase_deltas[i] * stretch_factor); // } _replace_prev_freqs(curr_mag2s, curr_phases); return new_phases; } std::array PhaseVocoderDoneRightTimeStretcher::_propagate_phase_gradients( const std::array &phase_time_deltas, const std::array &phase_freq_deltas, const std::array &last_stretched_phases, const std::array &prev_freq_mags, const std::array &next_freq_mags, const float stretch_factor, const float tolerance) { // sort indexes to the next frequencies based on the magnitude of that bin, in descending order // also, bins with magnitude under the threshold do not propagate in the frequency domain float max_mag = 0.0f; for (uint32_t i = 0; i < WindowSize / 2; i++) { max_mag = MAX(MAX(max_mag, prev_freq_mags[i]), next_freq_mags[i]); } const float abs_tol = max_mag * (tolerance * tolerance); // Used to store indexs to each frequencing in the propagation_queue struct FreqBin { enum { Prev, Next, }; uint8_t frame; uint32_t bin; }; const auto freq_bin_cmp = [&prev_freq_mags, &next_freq_mags] (FreqBin a, FreqBin b) { float a_mag = (a.frame == FreqBin::Prev) ? prev_freq_mags[a.bin] : next_freq_mags[a.bin]; float b_mag = (b.frame == FreqBin::Prev) ? prev_freq_mags[b.bin] : next_freq_mags[b.bin]; return a_mag < b_mag; }; // Max Heap of the frequency bins to propagate. (Listen I REALLY don't want allocate dynamically) std::array propagation_queue; uint32_t propagation_queue_size = WindowSize / 2; for (uint32_t i = 0; i < WindowSize / 2; i++) { propagation_queue[i] = FreqBin { .frame = FreqBin::Prev, .bin = i }; } std::make_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp); // Set of frequency bins to propagate to bool can_recieve_propagation[WindowSize / 2]; uint32_t n_can_recieve_propagation = WindowSize / 2; for (uint32_t i = 0; i < WindowSize / 2; i++) { if ((next_freq_mags[i] < abs_tol)) { can_recieve_propagation[i] = false; n_can_recieve_propagation -= 1; } else { can_recieve_propagation[i] = true; } } // perform propagation in all dimension std::array new_phases {0}; while (n_can_recieve_propagation > 0) { std::pop_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp); FreqBin next_bin = propagation_queue[propagation_queue_size - 1]; propagation_queue_size -= 1; const uint32_t freq_ind = next_bin.bin; if (next_bin.frame == FreqBin::Prev) { if (can_recieve_propagation[freq_ind]) { new_phases[freq_ind] = last_stretched_phases[freq_ind] + (phase_time_deltas[freq_ind] * stretch_factor); //remove from set can_recieve_propagation[freq_ind] = false; n_can_recieve_propagation -= 1; // push to the heap propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_ind}; propagation_queue_size += 1; std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp); } } else { if (freq_ind > 0 && can_recieve_propagation[freq_ind - 1]) { const uint32_t freq_down = freq_ind - 1; new_phases[freq_down] = new_phases[freq_ind] - (0.5 * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]) * stretch_factor); //remove from set can_recieve_propagation[freq_down] = false; n_can_recieve_propagation -= 1; // push to the heap propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_down}; propagation_queue_size += 1; std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp); } if ((freq_ind < WindowSize / 2 - 1) && can_recieve_propagation[freq_ind + 1]) { const uint32_t freq_up = freq_ind + 1; new_phases[freq_up] = new_phases[freq_ind] + (0.5 * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]) * stretch_factor); //remove from set can_recieve_propagation[freq_up] = false; n_can_recieve_propagation -= 1; // push to the heap propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_up}; propagation_queue_size += 1; std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp); } } } // std::array new_phases {0}; // auto freq_ind_cmp = [next_freq_mags] (uint32_t a, uint32_t b) { return next_freq_mags[a] > next_freq_mags[b]; }; // std::array freq_inds; // for (uint32_t i = 0; i < WindowSize / 2; i++) { freq_inds[i] = i;} // std::sort(freq_inds.begin(), freq_inds.end(), freq_ind_cmp); // std::array prop_source_mag {0.0f}; // for (uint32_t i = 0; i < WindowSize / 2; i++) { // new_phases[i] = last_stretched_phases[i] + phase_time_deltas[i] * stretch_factor; // prop_source_mag[i] = prev_freq_mags[i]; // } // for (const uint32_t freq_ind: freq_inds) { // if (freq_ind > 0 && prop_source_mag[freq_ind - 1] < next_freq_mags[freq_ind]) { // const uint32_t freq_down = freq_ind - 1; // new_phases[freq_down] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]); // prop_source_mag[freq_down] = next_freq_mags[freq_ind]; // } // if (freq_ind < WindowSize / 2 && prop_source_mag[freq_ind + 1] < next_freq_mags[freq_ind]) { // const uint32_t freq_up = freq_ind + 1; // new_phases[freq_up] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]); // prop_source_mag[freq_up] = next_freq_mags[freq_ind]; // } // prop_source_mag[freq_ind] = 2e10; // } return new_phases; } OLATimeStretcher::OLATimeStretcher(uint32_t w_size) { _window_size = w_size; _overlap = _window_size / 5; _selection_window = _window_size / 2; _transformed_buffer.resize(_window_size); } template static void mix_into_extend_by_pointer(const Complex *new_data, T &output, const uint32_t &window_size, const uint32_t &overlap_size) { uint32_t i = 0; for (; i < overlap_size; i++) { const float w = hann_window((float) i / overlap_size); const uint32_t output_ind = output.size() - overlap_size + i; output[output_ind] = lerp(new_data[i], output[output_ind], w); } for (; i < window_size; i++) { output.push_back(new_data[i]); } } void OLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) { _raw_buffer.extend_back(input, size); } uint32_t OLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) { // Do stretchy // Theoretical interval between samples per process const uint32_t sample_skip = (_window_size - _overlap) / _stretch_factor; // based on the example given by https://www.surina.net/article/time-and-pitch-scaling.html const uint32_t length_for_process = MAX(sample_skip, _window_size + _selection_window); while (_raw_buffer.size() > length_for_process) { std::vector new_data(_window_size + _selection_window); _raw_buffer.to_array(new_data.data(), _window_size + _selection_window); std::vector prev_tail(_overlap); _transformed_buffer.pop_back_many(prev_tail.data(), _overlap); // find best start for overlap uint32_t overlap_ind = find_max_correlation_quad(prev_tail.data(), new_data.data(), _overlap, _selection_window); mix_into_extend_by_pointer(new_data.data() + overlap_ind, prev_tail, _window_size, _overlap); for (auto v: prev_tail) { _transformed_buffer.push_back(v); } _raw_buffer.pop_front_many(nullptr, sample_skip); } uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready())); for (uint32_t i = n; i < size; i++ ) { output[i] = 0; } return n; } uint32_t OLATimeStretcher::n_transformed_ready() const { return _transformed_buffer.size() - _overlap; } void OLATimeStretcher::reset() { _raw_buffer.resize(0); _transformed_buffer.resize(_window_size, 0); } WSOLATimeStretcher::WSOLATimeStretcher() { // _transformed_buffer.resize(MaxBackWindowOverlap); } void WSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) { _raw_buffer.extend_back(input, size); // perform the stretchy // if (_raw_buffer.size() > SampleProcSize) { // std::array samples; // _raw_buffer.to_array(samples.data(), SampleProcSize); // // do the stretchy // uint32_t frames_used = _stretch_sample_and_add(samples.data()); // // delete everything used // _raw_buffer.pop_front_many(nullptr, frames_used); // _last_overlap_start -= frames_used; // _next_overlap_start -= frames_used; // } } uint32_t WSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) { // lazily perform the stretchy while ((_raw_buffer.size() > SampleProcSize) && (n_transformed_ready() < size)) { std::array samples; _raw_buffer.to_array(samples.data(), SampleProcSize); // do the stretchy uint32_t frames_used = _stretch_sample_and_add(samples.data()); // delete everything used _raw_buffer.pop_front_many(nullptr, frames_used); _last_overlap_start -= frames_used; _next_overlap_start -= frames_used; } uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready())); for (uint32_t i = n; i < size; i++) { output[i] = 0; } return n; } uint32_t WSOLATimeStretcher::n_transformed_ready() const { return _transformed_buffer.size(); } void WSOLATimeStretcher::reset() { _raw_buffer.resize(0); _transformed_buffer.resize(0); } uint32_t WSOLATimeStretcher::_stretch_sample_and_add(const Complex *sample) { // base overlap const uint32_t overlap_size = WindowSize / 4; // search forward for better overlap point const uint32_t search_window = WindowSize / 5; const uint32_t flat_duration = WindowSize - 2 * overlap_size; // consider te amount of frames skiped through truncation double sample_skipd; double dropped_per_window = std::modf((WindowSize - overlap_size) / _stretch_factor, &sample_skipd); const uint32_t sample_skip = sample_skipd; while (_next_overlap_start + WindowSize < SampleProcSize) { // Find insertion that best fits the new sample uint32_t prev_not_overlapped = find_max_correlation( sample + _next_overlap_start, sample + _last_overlap_start, overlap_size, search_window ); uint32_t actual_last_overlap = _last_overlap_start + prev_not_overlapped; const uint32_t actually_overlapped = overlap_size - prev_not_overlapped; Complex overlap_buffer[SampleProcSize / 2]; overlap_add( sample + actual_last_overlap, sample + _next_overlap_start, overlap_buffer, actually_overlapped, hamming_window ); // append new data _transformed_buffer.extend_back(sample + _last_overlap_start, prev_not_overlapped); _transformed_buffer.extend_back(overlap_buffer, actually_overlapped); // take 2 * prev_not_overlapped from both sides of the flat duration _transformed_buffer.extend_back(sample + _next_overlap_start + actually_overlapped, flat_duration); // set for next cycle _last_overlap_start = _next_overlap_start + actually_overlapped + flat_duration; _next_overlap_start = _next_overlap_start + sample_skip; _stretched_sample_truncated += dropped_per_window; if (_stretched_sample_truncated > 1.0) { _next_overlap_start += 1; _stretched_sample_truncated -= 1.0; } } return MIN(_last_overlap_start, _next_overlap_start); } PSOLATimeStretcher::PSOLATimeStretcher() { _transformed_buffer.resize(MaxBackWindowOverlap); } void PSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) { _raw_buffer.extend_back(input, size); } uint32_t PSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) { // lazily perform the stretchy while ((_raw_buffer.size() > SampleProcSize) && (size > n_transformed_ready())) { std::array samples; std::array windowed_samples; _raw_buffer.to_array(samples.data(), SampleProcSize); _raw_buffer.to_array(windowed_samples.data(), SampleProcSize); window_ends(windowed_samples.data(), SampleProcSize, SampleProcSize / 10, hann_window); int est_freq = _est_fund_frequency(windowed_samples.data()); uint32_t est_period = (1.0 / est_freq) * SampleProcSize / 2; std::vector est_peaks = _find_upcoming_peaks(samples.data(), est_period); _stretch_peaks_and_add(samples.data(), est_peaks); // delete everything used _raw_buffer.pop_front_many(nullptr, est_peaks.back() + 1); // +1 because est_peaks are the INDEX of the peaks, not the num of frames used } uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready())); for (uint32_t i = n; i < size; i++) { output[i] = 0; } return n; } uint32_t PSOLATimeStretcher::n_transformed_ready() const { return _transformed_buffer.size() - MaxBackWindowOverlap; } void PSOLATimeStretcher::reset() { _transformed_buffer.resize(MaxBackWindowOverlap, 0); _raw_buffer.resize(0); } int PSOLATimeStretcher::_est_fund_frequency(const Complex *samples) { // Todo: move all this to a PitchDetecter object or something _lpc.load_sample(samples); const std::array residuals = _lpc.get_residuals(); // find candidate from residual peaks (only use positive half of the spectrum) std::vector srhs = calc_srhs(residuals.data(), residuals.size() / 2, MinFreqInd, MaxFreqInd, 10); float max_srhs = -10e32; int max_pitch_ind = MaxFreqInd; for (int i = 0; i < srhs.size(); i++) { if (srhs[i] > max_srhs) { max_pitch_ind = MinFreqInd + i; max_srhs = srhs[i]; } } return max_pitch_ind; } std::vector PSOLATimeStretcher::_find_upcoming_peaks(const Complex *samples, const uint32_t est_period) { // assume that peaks are around est_period apart, but give some sllack as pitches change slightly const uint32_t search_start = 0.8 * est_period; const uint32_t search_end = 1.2 * est_period; std::vector peaks; uint32_t last_peak = 0; while ((last_peak + est_period) < SampleProcSize) { uint32_t peak = last_peak + search_start; float peak_size = 0.0f; for (uint32_t i = last_peak + search_start; i < MIN(SampleProcSize, last_peak + search_end); i++) { if (samples[i].real() > peak_size) { peak_size = samples[i].real(); peak = i; } } peaks.push_back(peak); last_peak = peak; } return peaks; } // // overlap and extend without applying a window function first // // overlap_size may be larger than new_data.size(), in which case only new_data.size() is added. the offset is the same template static void _mix_and_extend_no_window(T &array, const NewT new_data, const uint32_t &overlap_size) { uint32_t i = 0; for(; i < MIN(overlap_size, new_data.size()); i++) { uint32_t ind = array.size() - overlap_size + i; array[ind] = array[ind] + new_data[i]; } for (; i < new_data.size(); i++) { array.push_back(new_data[i]); } } void PSOLATimeStretcher::_stretch_peaks_and_add(const Complex *samples, const std::vector &est_peaks) { // accumulate by collecting the left halves and right halves if each peak sepeartly std::vector> right_windows; std::vector> left_windows; uint32_t last_peak = 0; for (uint32_t next_peak: est_peaks) { std::vector left_window; std::vector right_window; for (uint32_t i = last_peak; i < next_peak; i++) { float w = (float) (i - last_peak) / (next_peak - last_peak); w = hann_window(w); left_window.emplace_back(w * samples[i]); right_window.emplace_back((1.0f - w) * samples[i]); } left_windows.push_back(std::move(left_window)); right_windows.push_back(std::move(right_window)); last_peak = next_peak; } // arrange the peaks together last_peak = 0; for (uint32_t i = 0; i < left_windows.size(); i++) { const std::vector left_window = std::move(left_windows[i]); const std::vector right_window = std::move(right_windows[i]); // use overlapsize of previous period to make the right window continuous with the previous left window _mix_and_extend_no_window(_transformed_buffer, right_window, _next_right_window_overlap); // calculate the frames overlap taking the truncated part of previous processes into account int curr_period = est_peaks[i] - last_peak; double overlap_sized; _stretched_sample_truncated += std::abs(std::modf((2.0 - _stretch_factor) * curr_period, &overlap_sized)); uint32_t overlap_size = std::abs(overlap_sized); if (_stretched_sample_truncated > 1.0f) { overlap_size += 1; _stretched_sample_truncated -= 1.0f; } if (_stretch_factor >= 1.0f) { if (_stretch_factor > 2.0f) { // pad the gap between each side with 0s uint32_t padding_size = std::move(overlap_size); for (uint32_t j = 0; j < padding_size; j++) { _transformed_buffer.push_back(0); } _transformed_buffer.extend_back(left_window.data(), left_window.size()); } else { // some parts are overlapped // mix it with the left window of the next _mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size); } // the end of the _transform buffer is the end of the left window, which is equivalenting to setting both of these values to 0 _next_right_window_overlap = 0; } else { // since the left peak ends before the right peak ends, we have to overlap both the right and left peaks into the transformed data _mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size); _next_right_window_overlap = overlap_size - curr_period; } last_peak = est_peaks[i]; } }