ladspa-pitchshift/timestretcher.cpp

810 lines
30 KiB
C++

#include "timestretcher.h"
#include "common.h"
#include "correlation.h"
#include "effect.h"
#include "interpolation.h"
#include "linalg.h"
#include "mengumath.h"
#include "vecdeque.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <complex>
#include <cstdint>
#include <iostream>
#include <numeric>
#include <vector>
using namespace Mengu;
using namespace dsp;
void TimeStretcher::set_stretch_factor(const float &scale) {
_stretch_factor = scale;
}
Effect::InputDomain TimeStretcher::get_input_domain() {
return InputDomain::Time;
}
std::vector<EffectPropDesc> TimeStretcher::get_property_descs() const {
return {
EffectPropDesc {
.type = EffectPropType::Slider,
.name = "stretch_factor",
.desc = "Scales the length of pushed signals by this amount",
.slider_data = {
.min_value = 0.5,
.max_value = 2,
.scale = Exp,
}
}
};
};
void TimeStretcher::set_property(uint32_t id, EffectPropPayload data) {
switch (id) {
case 0:
if (data.type == Slider) {
set_stretch_factor(data.value);
}
break;
};
}
EffectPropPayload TimeStretcher::get_property(uint32_t id) const {
switch (id) {
case 0:
return EffectPropPayload {
.type = Slider,
.value = _stretch_factor,
};
};
return EffectPropPayload {};
}
static float _wrap_phase(float phase) {
return fposmod(phase + MATH_PI, MATH_TAU) - MATH_PI;
}
// difference between 2 unwrapped phases. since, phases are preiodic, picks the closest one to the estimate
static float _phase_diff(float next, float prev, float est = 0.0f) {
return _wrap_phase(next - prev - est) + est;
}
PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(bool preserve_formants) {
_preserve_formants = preserve_formants;
reset();
}
void PhaseVocoderTimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
_raw_buffer.extend_back(input, size);
}
uint32_t PhaseVocoderTimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
while (n_transformed_ready() < size && _raw_buffer.size() >= WindowSize) {
std::array<Complex, WindowSize> sample;
_raw_buffer.to_array(sample.data(), WindowSize);
_load_new_freq_window(sample);
std::array<Complex, WindowSize / 2> curr_freqs;
std::copy(_lpc.get_freq_spectrum().cbegin(), _lpc.get_freq_spectrum().cbegin() + WindowSize / 2, curr_freqs.begin());
float analysis_hop_sizef = SynthesisHopSize / _stretch_factor;
_stretched_sample_truncated += std::modf(analysis_hop_sizef, &analysis_hop_sizef);
uint32_t analysis_hop_size = (uint32_t) analysis_hop_sizef;
if (_stretched_sample_truncated) {
analysis_hop_size += 1;
_stretched_sample_truncated -= 1;
}
std::array<float, WindowSize / 2> amplitudes = _calc_scaled_magnitudes();
std::array<float, WindowSize / 2> new_phases = _calc_scaled_phases(curr_freqs, analysis_hop_size);
// std::array<Complex, WindowSize> new_samples = _calc_new_samples(amplitudes.data(), curr_raw_phases.data());
std::array<Complex, WindowSize> new_samples = _calc_new_samples(sample, amplitudes.data(), new_phases.data());
mix_and_extend(_transformed_buffer, new_samples, WindowSize - SynthesisHopSize, hann_window);
// _transformed_buffer.extend_back(new_samples.data(), WindowSize);
std::transform(new_phases.cbegin(), new_phases.cend(), _last_scaled_phases.begin(),
[] (float unwrapped) { return _wrap_phase(unwrapped); }
);
_raw_buffer.pop_front_many(nullptr, analysis_hop_size);
}
return _transformed_buffer.pop_front_many(output, MIN(n_transformed_ready(), size));
}
uint32_t PhaseVocoderTimeStretcher::n_transformed_ready() const {
return MAX(SynthesisHopSize, _transformed_buffer.size()) - (SynthesisHopSize);
}
void PhaseVocoderTimeStretcher::reset() {
_prev_raw_mag2s.fill(0.0f);
_prev_raw_phases.fill(0.0f);
_last_scaled_phases.fill(0.0);
_raw_buffer.resize(0);
_transformed_buffer.resize(SynthesisHopSize, 0);
_stretched_sample_truncated = 0.0;
}
// void PhaseVocoderTimeStretcher::set_stretch_factor(const float &stretch_factor) {
// _stretch_factor = stretch_factor;
// // _last_scaled_phases.fill(0.0f);
// };
std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_calc_scaled_magnitudes() {
std::array<float, WindowSize / 2> mags;
if (_preserve_formants) {
const std::array<float, WindowSize> &envelope = _lpc.get_envelope();
const std::array<float, WindowSize> &residuals = _lpc.get_residuals();
for (uint32_t i = 0; i < WindowSize / 2; i++) {
const uint32_t stretched_ind = i * _stretch_factor;
if (stretched_ind < WindowSize / 2 && std::isfinite(residuals[i] * envelope[stretched_ind])) {
mags[i] = residuals[i] * envelope[stretched_ind];
}
else {
mags[i] = 0.0f;
}
}
}
else {
const std::array<Complex, WindowSize> &freqs = _lpc.get_freq_spectrum();
for (uint32_t i = 0; i < WindowSize / 2; i++) {
mags[i] = sqrt(std::norm(freqs[i]));
}
}
return mags;
}
std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_calc_scaled_phases(
const std::array<Complex, WindowSize / 2> &curr_freqs,
const uint32_t hopsize) {
std::array<float, WindowSize / 2> curr_mag2s;
std::array<float, WindowSize / 2> curr_phases;
std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(),
[] (Complex freq) { return std::norm(freq); }
);
std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(),
[] (Complex freq) { return std::arg(freq); }
);
std::array<float, WindowSize / 2> phase_deltas;
float stretch_factor = (float) SynthesisHopSize / hopsize;
for (int i = 0; i < WindowSize / 2; i++) {
const float est = i * MATH_TAU * ((float) hopsize / WindowSize);// / stretch_factor;
phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est);
}
std::array<float, WindowSize / 2> new_phases;
for (int i = 0; i < WindowSize / 2; i++) {
new_phases[i] = _last_scaled_phases[i] + (phase_deltas[i] * stretch_factor);
}
_replace_prev_freqs(curr_mag2s, curr_phases);
return new_phases;
}
std::array<Complex, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_load_new_freq_window(const std::array<Complex, WindowSize> &sample) {
_lpc.load_sample(sample.data());
std::array<Complex, WindowSize / 2> new_freqs;
for (uint32_t i = 0; i < WindowSize / 2; i++) {
new_freqs[i] = _lpc.get_freq_spectrum()[i];
}
return new_freqs;
}
void PhaseVocoderTimeStretcher::_replace_prev_freqs(const std::array<float, WindowSize / 2> &curr_mag2s, const std::array<float, WindowSize / 2> &curr_phases) {
std::copy(curr_mag2s.cbegin(), curr_mag2s.cend(), _prev_raw_mag2s.begin());
std::copy(curr_phases.cbegin(), curr_phases.cend(), _prev_raw_phases.begin());
}
std::array<Complex, PhaseVocoderTimeStretcher::WindowSize> PhaseVocoderTimeStretcher::_calc_new_samples(
const std::array<Complex, WindowSize> &raw_samples,
const float *amplitudes,
const float *phases) {
std::array<Complex, WindowSize> freqs = {0.0};
for (uint32_t i = 0; i < WindowSize / 2; i++) {
freqs[i] = 2.0f * std::polar(amplitudes[i], phases[i]);
}
std::array<Complex, WindowSize> new_samples;
_lpc.get_fft().inverse_transform(freqs.data(), new_samples.data());
// Make shifted as loud as raw samples
_loudness_norm.normalize(new_samples.data(), raw_samples.data(), new_samples.data());
return new_samples;
}
PhaseVocoderDoneRightTimeStretcher::PhaseVocoderDoneRightTimeStretcher(bool preserve_formants) : PhaseVocoderTimeStretcher(preserve_formants) {}
std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderDoneRightTimeStretcher::_calc_scaled_phases(
const std::array<Complex, WindowSize / 2> &curr_freqs,
const uint32_t hopsize) {
std::array<float, WindowSize / 2> curr_mag2s;
std::array<float, WindowSize / 2> curr_phases;
std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(),
[] (Complex freq) { return std::norm(freq); }
);
std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(),
[] (Complex freq) { return std::arg(freq); }
);
float stretch_factor = (float) SynthesisHopSize / hopsize;
std::array<float, WindowSize / 2> time_phase_deltas;
for (int i = 0; i < WindowSize / 2; i++) {
const float est = i * MATH_TAU * ((float) hopsize / WindowSize);
time_phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est);
}
std::array<float, WindowSize / 2> freq_phase_deltas;
for (uint32_t i = 1; i < WindowSize / 2 - 1; i++) {
const float up_delta = _wrap_phase(curr_phases[i + 1] - curr_phases[i]);
const float down_delta = _wrap_phase(curr_phases[i] - curr_phases[i - 1]);
freq_phase_deltas[i] = 0.5f * (up_delta + down_delta);
}
freq_phase_deltas[0] = _wrap_phase(curr_phases[1] - curr_phases[0]);
freq_phase_deltas[WindowSize / 2 - 1] = _wrap_phase(curr_phases[WindowSize / 2 - 1] - curr_phases[WindowSize / 2 - 2]);
std::array<float, WindowSize / 2> new_phases = _propagate_phase_gradients (
time_phase_deltas,
freq_phase_deltas,
_last_scaled_phases,
_prev_raw_mag2s,
curr_mag2s,
stretch_factor,
1e-3f
);
// for (int i = 0; i < WindowSize / 2; i++) {
// new_phases[i] = _last_scaled_phases[i] + (time_phase_deltas[i] * stretch_factor);
// }
_replace_prev_freqs(curr_mag2s, curr_phases);
return new_phases;
}
std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderDoneRightTimeStretcher::_propagate_phase_gradients(
const std::array<float, WindowSize / 2> &phase_time_deltas,
const std::array<float, WindowSize / 2> &phase_freq_deltas,
const std::array<float, WindowSize / 2> &last_stretched_phases,
const std::array<float, WindowSize / 2> &prev_freq_mags,
const std::array<float, WindowSize / 2> &next_freq_mags,
const float stretch_factor,
const float tolerance) {
// sort indexes to the next frequencies based on the magnitude of that bin, in descending order
// also, bins with magnitude under the threshold do not propagate in the frequency domain
float max_mag = 0.0f;
for (uint32_t i = 0; i < WindowSize / 2; i++) {
max_mag = MAX(MAX(max_mag, prev_freq_mags[i]), next_freq_mags[i]);
}
const float abs_tol = max_mag * (tolerance * tolerance);
// Used to store indexs to each frequencing in the propagation_queue
struct FreqBin {
enum {
Prev,
Next,
};
uint8_t frame;
uint32_t bin;
};
const auto freq_bin_cmp = [&prev_freq_mags, &next_freq_mags] (FreqBin a, FreqBin b) {
float a_mag = (a.frame == FreqBin::Prev) ? prev_freq_mags[a.bin] : next_freq_mags[a.bin];
float b_mag = (b.frame == FreqBin::Prev) ? prev_freq_mags[b.bin] : next_freq_mags[b.bin];
return a_mag < b_mag;
};
// Max Heap of the frequency bins to propagate. (Listen I REALLY don't want allocate dynamically)
std::array<FreqBin, WindowSize * 3> propagation_queue;
uint32_t propagation_queue_size = WindowSize / 2;
for (uint32_t i = 0; i < WindowSize / 2; i++) {
propagation_queue[i] = FreqBin { .frame = FreqBin::Prev, .bin = i };
}
std::make_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
// Set of frequency bins to propagate to
bool can_recieve_propagation[WindowSize / 2];
uint32_t n_can_recieve_propagation = WindowSize / 2;
for (uint32_t i = 0; i < WindowSize / 2; i++) {
if ((next_freq_mags[i] < abs_tol)) {
can_recieve_propagation[i] = false;
n_can_recieve_propagation -= 1;
}
else {
can_recieve_propagation[i] = true;
}
}
// perform propagation in all dimension
std::array<float, WindowSize / 2> new_phases {0};
while (n_can_recieve_propagation > 0) {
std::pop_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
FreqBin next_bin = propagation_queue[propagation_queue_size - 1];
propagation_queue_size -= 1;
const uint32_t freq_ind = next_bin.bin;
if (next_bin.frame == FreqBin::Prev) {
if (can_recieve_propagation[freq_ind]) {
new_phases[freq_ind] = last_stretched_phases[freq_ind] + (phase_time_deltas[freq_ind] * stretch_factor);
//remove from set
can_recieve_propagation[freq_ind] = false;
n_can_recieve_propagation -= 1;
// push to the heap
propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_ind};
propagation_queue_size += 1;
std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
}
}
else {
if (freq_ind > 0 && can_recieve_propagation[freq_ind - 1]) {
const uint32_t freq_down = freq_ind - 1;
new_phases[freq_down] = new_phases[freq_ind] - (0.5 * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]) * stretch_factor);
//remove from set
can_recieve_propagation[freq_down] = false;
n_can_recieve_propagation -= 1;
// push to the heap
propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_down};
propagation_queue_size += 1;
std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
}
if ((freq_ind < WindowSize / 2 - 1) && can_recieve_propagation[freq_ind + 1]) {
const uint32_t freq_up = freq_ind + 1;
new_phases[freq_up] = new_phases[freq_ind] + (0.5 * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]) * stretch_factor);
//remove from set
can_recieve_propagation[freq_up] = false;
n_can_recieve_propagation -= 1;
// push to the heap
propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_up};
propagation_queue_size += 1;
std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
}
}
}
// std::array<float, WindowSize / 2> new_phases {0};
// auto freq_ind_cmp = [next_freq_mags] (uint32_t a, uint32_t b) { return next_freq_mags[a] > next_freq_mags[b]; };
// std::array<uint32_t, WindowSize / 2> freq_inds;
// for (uint32_t i = 0; i < WindowSize / 2; i++) { freq_inds[i] = i;}
// std::sort(freq_inds.begin(), freq_inds.end(), freq_ind_cmp);
// std::array<float, WindowSize / 2> prop_source_mag {0.0f};
// for (uint32_t i = 0; i < WindowSize / 2; i++) {
// new_phases[i] = last_stretched_phases[i] + phase_time_deltas[i] * stretch_factor;
// prop_source_mag[i] = prev_freq_mags[i];
// }
// for (const uint32_t freq_ind: freq_inds) {
// if (freq_ind > 0 && prop_source_mag[freq_ind - 1] < next_freq_mags[freq_ind]) {
// const uint32_t freq_down = freq_ind - 1;
// new_phases[freq_down] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]);
// prop_source_mag[freq_down] = next_freq_mags[freq_ind];
// }
// if (freq_ind < WindowSize / 2 && prop_source_mag[freq_ind + 1] < next_freq_mags[freq_ind]) {
// const uint32_t freq_up = freq_ind + 1;
// new_phases[freq_up] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]);
// prop_source_mag[freq_up] = next_freq_mags[freq_ind];
// }
// prop_source_mag[freq_ind] = 2e10;
// }
return new_phases;
}
OLATimeStretcher::OLATimeStretcher(uint32_t w_size) {
_window_size = w_size;
_overlap = _window_size / 5;
_selection_window = _window_size / 2;
_transformed_buffer.resize(_window_size);
}
template<class T>
static void mix_into_extend_by_pointer(const Complex *new_data,
T &output,
const uint32_t &window_size,
const uint32_t &overlap_size) {
uint32_t i = 0;
for (; i < overlap_size; i++) {
const float w = hann_window((float) i / overlap_size);
const uint32_t output_ind = output.size() - overlap_size + i;
output[output_ind] = lerp(new_data[i], output[output_ind], w);
}
for (; i < window_size; i++) {
output.push_back(new_data[i]);
}
}
void OLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
_raw_buffer.extend_back(input, size);
}
uint32_t OLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
// Do stretchy
// Theoretical interval between samples per process
const uint32_t sample_skip = (_window_size - _overlap) / _stretch_factor;
// based on the example given by https://www.surina.net/article/time-and-pitch-scaling.html
const uint32_t length_for_process = MAX(sample_skip, _window_size + _selection_window);
while (_raw_buffer.size() > length_for_process) {
std::vector<Complex> new_data(_window_size + _selection_window);
_raw_buffer.to_array(new_data.data(), _window_size + _selection_window);
std::vector<Complex> prev_tail(_overlap);
_transformed_buffer.pop_back_many(prev_tail.data(), _overlap);
// find best start for overlap
uint32_t overlap_ind = find_max_correlation_quad(prev_tail.data(), new_data.data(), _overlap, _selection_window);
mix_into_extend_by_pointer(new_data.data() + overlap_ind, prev_tail, _window_size, _overlap);
for (auto v: prev_tail) {
_transformed_buffer.push_back(v);
}
_raw_buffer.pop_front_many(nullptr, sample_skip);
}
uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
for (uint32_t i = n; i < size; i++ ) {
output[i] = 0;
}
return n;
}
uint32_t OLATimeStretcher::n_transformed_ready() const {
return _transformed_buffer.size() - _overlap;
}
void OLATimeStretcher::reset() {
_raw_buffer.resize(0);
_transformed_buffer.resize(_window_size, 0);
}
WSOLATimeStretcher::WSOLATimeStretcher() {
// _transformed_buffer.resize(MaxBackWindowOverlap);
}
void WSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
_raw_buffer.extend_back(input, size);
// perform the stretchy
// if (_raw_buffer.size() > SampleProcSize) {
// std::array<Complex, SampleProcSize> samples;
// _raw_buffer.to_array(samples.data(), SampleProcSize);
// // do the stretchy
// uint32_t frames_used = _stretch_sample_and_add(samples.data());
// // delete everything used
// _raw_buffer.pop_front_many(nullptr, frames_used);
// _last_overlap_start -= frames_used;
// _next_overlap_start -= frames_used;
// }
}
uint32_t WSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
// lazily perform the stretchy
while ((_raw_buffer.size() > SampleProcSize) && (n_transformed_ready() < size)) {
std::array<Complex, SampleProcSize> samples;
_raw_buffer.to_array(samples.data(), SampleProcSize);
// do the stretchy
uint32_t frames_used = _stretch_sample_and_add(samples.data());
// delete everything used
_raw_buffer.pop_front_many(nullptr, frames_used);
_last_overlap_start -= frames_used;
_next_overlap_start -= frames_used;
}
uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
for (uint32_t i = n; i < size; i++) {
output[i] = 0;
}
return n;
}
uint32_t WSOLATimeStretcher::n_transformed_ready() const {
return _transformed_buffer.size();
}
void WSOLATimeStretcher::reset() {
_raw_buffer.resize(0);
_transformed_buffer.resize(0);
}
uint32_t WSOLATimeStretcher::_stretch_sample_and_add(const Complex *sample) {
// base overlap
const uint32_t overlap_size = WindowSize / 4;
// search forward for better overlap point
const uint32_t search_window = WindowSize / 5;
const uint32_t flat_duration = WindowSize - 2 * overlap_size;
// consider te amount of frames skiped through truncation
double sample_skipd;
double dropped_per_window = std::modf((WindowSize - overlap_size) / _stretch_factor, &sample_skipd);
const uint32_t sample_skip = sample_skipd;
while (_next_overlap_start + WindowSize < SampleProcSize) {
// Find insertion that best fits the new sample
uint32_t prev_not_overlapped = find_max_correlation(
sample + _next_overlap_start,
sample + _last_overlap_start,
overlap_size,
search_window
);
uint32_t actual_last_overlap = _last_overlap_start + prev_not_overlapped;
const uint32_t actually_overlapped = overlap_size - prev_not_overlapped;
Complex overlap_buffer[SampleProcSize / 2];
overlap_add(
sample + actual_last_overlap,
sample + _next_overlap_start,
overlap_buffer,
actually_overlapped,
hamming_window
);
// append new data
_transformed_buffer.extend_back(sample + _last_overlap_start, prev_not_overlapped);
_transformed_buffer.extend_back(overlap_buffer, actually_overlapped);
// take 2 * prev_not_overlapped from both sides of the flat duration
_transformed_buffer.extend_back(sample + _next_overlap_start + actually_overlapped, flat_duration);
// set for next cycle
_last_overlap_start = _next_overlap_start + actually_overlapped + flat_duration;
_next_overlap_start = _next_overlap_start + sample_skip;
_stretched_sample_truncated += dropped_per_window;
if (_stretched_sample_truncated > 1.0) {
_next_overlap_start += 1;
_stretched_sample_truncated -= 1.0;
}
}
return MIN(_last_overlap_start, _next_overlap_start);
}
PSOLATimeStretcher::PSOLATimeStretcher() {
_transformed_buffer.resize(MaxBackWindowOverlap);
}
void PSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
_raw_buffer.extend_back(input, size);
}
uint32_t PSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
// lazily perform the stretchy
while ((_raw_buffer.size() > SampleProcSize) && (size > n_transformed_ready())) {
std::array<Complex, SampleProcSize> samples;
std::array<Complex, SampleProcSize> windowed_samples;
_raw_buffer.to_array(samples.data(), SampleProcSize);
_raw_buffer.to_array(windowed_samples.data(), SampleProcSize);
window_ends(windowed_samples.data(), SampleProcSize, SampleProcSize / 10, hann_window);
int est_freq = _est_fund_frequency(windowed_samples.data());
uint32_t est_period = (1.0 / est_freq) * SampleProcSize / 2;
std::vector<uint32_t> est_peaks = _find_upcoming_peaks(samples.data(), est_period);
_stretch_peaks_and_add(samples.data(), est_peaks);
// delete everything used
_raw_buffer.pop_front_many(nullptr, est_peaks.back() + 1); // +1 because est_peaks are the INDEX of the peaks, not the num of frames used
}
uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
for (uint32_t i = n; i < size; i++) {
output[i] = 0;
}
return n;
}
uint32_t PSOLATimeStretcher::n_transformed_ready() const {
return _transformed_buffer.size() - MaxBackWindowOverlap;
}
void PSOLATimeStretcher::reset() {
_transformed_buffer.resize(MaxBackWindowOverlap, 0);
_raw_buffer.resize(0);
}
int PSOLATimeStretcher::_est_fund_frequency(const Complex *samples) {
// Todo: move all this to a PitchDetecter object or something
_lpc.load_sample(samples);
const std::array<float, SampleProcSize> residuals = _lpc.get_residuals();
// find candidate from residual peaks (only use positive half of the spectrum)
std::vector<float> srhs = calc_srhs(residuals.data(), residuals.size() / 2, MinFreqInd, MaxFreqInd, 10);
float max_srhs = -10e32;
int max_pitch_ind = MaxFreqInd;
for (int i = 0; i < srhs.size(); i++) {
if (srhs[i] > max_srhs) {
max_pitch_ind = MinFreqInd + i;
max_srhs = srhs[i];
}
}
return max_pitch_ind;
}
std::vector<uint32_t> PSOLATimeStretcher::_find_upcoming_peaks(const Complex *samples, const uint32_t est_period) {
// assume that peaks are around est_period apart, but give some sllack as pitches change slightly
const uint32_t search_start = 0.8 * est_period;
const uint32_t search_end = 1.2 * est_period;
std::vector<uint32_t> peaks;
uint32_t last_peak = 0;
while ((last_peak + est_period) < SampleProcSize) {
uint32_t peak = last_peak + search_start;
float peak_size = 0.0f;
for (uint32_t i = last_peak + search_start; i < MIN(SampleProcSize, last_peak + search_end); i++) {
if (samples[i].real() > peak_size) {
peak_size = samples[i].real();
peak = i;
}
}
peaks.push_back(peak);
last_peak = peak;
}
return peaks;
}
// // overlap and extend without applying a window function first
// // overlap_size may be larger than new_data.size(), in which case only new_data.size() is added. the offset is the same
template<class T, class NewT>
static void _mix_and_extend_no_window(T &array, const NewT new_data, const uint32_t &overlap_size) {
uint32_t i = 0;
for(; i < MIN(overlap_size, new_data.size()); i++) {
uint32_t ind = array.size() - overlap_size + i;
array[ind] = array[ind] + new_data[i];
}
for (; i < new_data.size(); i++) {
array.push_back(new_data[i]);
}
}
void PSOLATimeStretcher::_stretch_peaks_and_add(const Complex *samples, const std::vector<uint32_t> &est_peaks) {
// accumulate by collecting the left halves and right halves if each peak sepeartly
std::vector<std::vector<Complex>> right_windows;
std::vector<std::vector<Complex>> left_windows;
uint32_t last_peak = 0;
for (uint32_t next_peak: est_peaks) {
std::vector<Complex> left_window;
std::vector<Complex> right_window;
for (uint32_t i = last_peak; i < next_peak; i++) {
float w = (float) (i - last_peak) / (next_peak - last_peak);
w = hann_window(w);
left_window.emplace_back(w * samples[i]);
right_window.emplace_back((1.0f - w) * samples[i]);
}
left_windows.push_back(std::move(left_window));
right_windows.push_back(std::move(right_window));
last_peak = next_peak;
}
// arrange the peaks together
last_peak = 0;
for (uint32_t i = 0; i < left_windows.size(); i++) {
const std::vector<Complex> left_window = std::move(left_windows[i]);
const std::vector<Complex> right_window = std::move(right_windows[i]);
// use overlapsize of previous period to make the right window continuous with the previous left window
_mix_and_extend_no_window(_transformed_buffer, right_window, _next_right_window_overlap);
// calculate the frames overlap taking the truncated part of previous processes into account
int curr_period = est_peaks[i] - last_peak;
double overlap_sized;
_stretched_sample_truncated += std::abs(std::modf((2.0 - _stretch_factor) * curr_period, &overlap_sized));
uint32_t overlap_size = std::abs(overlap_sized);
if (_stretched_sample_truncated > 1.0f) {
overlap_size += 1;
_stretched_sample_truncated -= 1.0f;
}
if (_stretch_factor >= 1.0f) {
if (_stretch_factor > 2.0f) {
// pad the gap between each side with 0s
uint32_t padding_size = std::move(overlap_size);
for (uint32_t j = 0; j < padding_size; j++) {
_transformed_buffer.push_back(0);
}
_transformed_buffer.extend_back(left_window.data(), left_window.size());
}
else {
// some parts are overlapped
// mix it with the left window of the next
_mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size);
}
// the end of the _transform buffer is the end of the left window, which is equivalenting to setting both of these values to 0
_next_right_window_overlap = 0;
}
else {
// since the left peak ends before the right peak ends, we have to overlap both the right and left peaks into the transformed data
_mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size);
_next_right_window_overlap = overlap_size - curr_period;
}
last_peak = est_peaks[i];
}
}