init

2023-10-29 07:15:42 +06:00 · 2023-10-29 07:15:42 +06:00 · c7398da895
commit c7398da895
32 changed files with 97059 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+build/
--- a/49
+++ b/49
@ -0,0 +1,49 @@
+TARGET_EXEC := final_program.so
+
+BUILD_DIR := ./build
+SRC_DIRS := .
+
+# Find all the C and C++ files we want to compile
+# Note the single quotes around the * expressions. The shell will incorrectly expand these otherwise, but we want to send the * directly to the find command.
+SRCS := $(shell find $(SRC_DIRS) -name '*.cpp' -or -name '*.c' -or -name '*.s')
+
+# Prepends BUILD_DIR and appends .o to every src file
+# As an example, ./your_dir/hello.cpp turns into ./build/./your_dir/hello.cpp.o
+OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
+
+# String substitution (suffix version without %).
+# As an example, ./build/hello.cpp.o turns into ./build/hello.cpp.d
+DEPS := $(OBJS:.o=.d)
+
+# Every folder in ./src will need to be passed to GCC so that it can find header files
+INC_DIRS := $(shell find $(SRC_DIRS) -path .git -prune -type d)
+# Add a prefix to INC_DIRS. So moduleA would become -ImoduleA. GCC understands this -I flag
+INC_FLAGS := $(addprefix -I,$(INC_DIRS))
+
+# The -MMD and -MP flags together generate Makefiles for us!
+# These files will have .d instead of .o as the output.
+CPPFLAGS := $(INC_FLAGS) -MMD -MP -fPIC 
+
+# The final build step.
+$(BUILD_DIR)/$(TARGET_EXEC): $(OBJS)
+	$(CXX) -shared $(OBJS) -o $@ $(LDFLAGS)
+
+# Build step for C source
+$(BUILD_DIR)/%.c.o: %.c
+	mkdir -p $(dir $@)
+	$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
+
+# Build step for C++ source
+$(BUILD_DIR)/%.cpp.o: %.cpp
+	mkdir -p $(dir $@)
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
+
+
+.PHONY: clean
+clean:
+	rm -r $(BUILD_DIR)
+
+# Include the .d makefiles. The - at the front suppresses the errors of missing
+# Makefiles. Initially, all the .d files will be missing, and we don't want those
+# errors to show up.
+-include $(DEPS)
--- a/common.h
+++ b/common.h
@ -0,0 +1,23 @@
+#ifndef MENGA_DSP_COMMON
+#define MENGA_DSP_COMMON
+
+#include <complex>
+
+
+typedef std::complex<float> Complex;
+
+namespace Mengu {
+
+// returns a Complex number with the same norm as x but with the angle phase
+template <typename T>
+std::complex<T> with_phase(const std::complex<T> &x, T phase);
+
+// A complex number with the same phase as x, but with distance from the origin amp
+template <typename T>
+std::complex<T> with_amp(const std::complex<T> &x, T amp) {
+    return x / std::sqrt(std::norm(x)) * amp;
+}
+
+}
+
+#endif
--- a/correlation.cpp
+++ b/correlation.cpp
@ -0,0 +1,92 @@
+#include "correlation.h"
+#include "common.h"
+#include <iostream>
+#include "mengumath.h"
+#include <cstdint>
+
+float Mengu::dsp::correlation(const Complex *s1, const Complex *s2, const int length, const int n) {
+    float total = 0;
+    for (uint32_t i = 0; i < length; i++) {
+        total += s1[i].real() * s2[n + i].real();
+    }
+
+    return total;
+}
+
+float Mengu::dsp::autocorrelation(const Complex *s, const int length, const int n) {
+    return correlation(s, s, length, n);
+}
+
+int Mengu::dsp::find_max_correlation(const Complex *s1, const Complex *s2, const int length, const int search_window_size) {
+    float max_corr = correlation(s1, s2, length, 0);
+    int max_lag = 0;
+
+    for (int i = 1; i < search_window_size; i++) {
+        float corr = correlation(s1, s2, length, i);
+
+        if (corr > max_corr) {
+            max_corr = corr;
+            max_lag = i;
+        }
+    }
+
+    return max_lag;
+}
+
+int Mengu::dsp::find_max_correlation_quad(const Complex *s1, const Complex *s2, const int length, const int search_window_size) {
+    float max_corr = -1e10;
+    int max_lag = 0;
+
+    float *scaled_s1 = new float[length];
+
+    for (int i = 0; i < length; i++) {
+        scaled_s1[i] = s1[i].real() * i * (length - i);
+    }
+
+    // do max iteration
+    for (int i = 0; i < search_window_size; i++) {
+        float corr = 0.0f; 
+        
+        for (int j = 0; j < length; j++) {
+            corr += scaled_s1[j] * s2[i + j].real();
+        }
+
+        if (corr > max_corr) {
+            max_corr = corr;
+            max_lag = i;
+        }
+    }
+
+    delete[] scaled_s1;
+
+    return max_lag;
+}
+
+std::vector<float> Mengu::dsp::calc_srhs(const float *envelope,
+                                         const int &size,
+                                         const int &min_freq_ind,
+                                         const int &max_freq_ind,
+                                         const int &n_harm,
+                                         const int &step) {
+    std::vector<float> output;
+    for (int freq_ind = min_freq_ind; freq_ind < max_freq_ind; freq_ind += step) {
+        output.push_back(calc_srh(envelope, size, freq_ind, n_harm));
+    }
+    return output;
+}
+
+float Mengu::dsp::calc_srh(const float *envelope, const int &size, const int &freq_ind, const int &n_harm) {
+    const int pos_n_harm = MIN(n_harm, size / freq_ind);
+    float pos_interference = 0;
+    for (int k = 1; k < pos_n_harm; k++) {
+        pos_interference += envelope[freq_ind * k];
+    }
+
+    const int neg_n_harm = MIN(n_harm, (int) ((float) size / freq_ind + 0.5));
+    float neg_interference = 0;
+    for (int k = 2; k < neg_n_harm; k++) {
+        neg_interference += envelope[(int) (freq_ind * k - 0.5)];
+    }
+
+    return pos_interference - neg_interference;
+}
--- a/correlation.h
+++ b/correlation.h
@ -0,0 +1,177 @@
+/**
+ * @file correlation.h
+ * @author 9exa
+ * @brief Correlation/Convolution functions between signals
+ * @version 0.1
+ * @date 2023-04-30
+ * 
+ * @copyright Copyright (c) 2023
+ */
+
+#ifndef MENGA_CORRELATION
+#define MENGA_CORRELATION
+
+#include "common.h"
+#include "fft.h"
+#include "linalg.h"
+#include "mengumath.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+#include <vector>
+
+namespace Mengu {
+namespace dsp {
+
+// The (real finite non-circular) cross-correlation of two signals of equal length, on the offset n
+// basically just a dot product
+float correlation(const Complex *s1, const Complex *s2, const int length, const int n);
+
+// The (real finite non-circular) cross-correlation of of a signal on itself, on the offset n
+float autocorrelation(const Complex *s, const int length, const int n);
+
+// Find the offset/lag that corresponds to the max cross-correlation between s1 and s2 explored up to length
+// s2 is assumed to be at least length + search_window_size long
+int find_max_correlation(const Complex *s1, const Complex *s2, const int length, const int search_window_size);
+
+// Max correlation where portions toward the center are weighted more
+int find_max_correlation_quad(const Complex *s1, const Complex *s2, const int length, const int search_window_size);
+
+// find the sr harmonics of (the positive half of) a frequency amplitude spectrum
+std::vector<float> calc_srhs(const float *envelope,
+                             const int &size,
+                             const int &min_freq_ind,
+                             const int &max_freq_ind,
+                             const int &n_harm = 8,
+                             const int &step = 1);
+
+// the srh of only one frequency
+float calc_srh(const float *envelope, const int &size, const int &freq_ind, const int &n_harm);
+
+// performs and stores results of LinearPredictiveCoding. expects a fixed process size so it can be put on the stack
+template<uint32_t SampleSize, uint32_t NParams>
+class LPC {
+public:
+    LPC(): 
+        _fft(SampleSize),
+        // _b(NParams + 1),
+        // _autocovariance_slice(NParams + 1) {
+        _b{0},
+        _autocovariance_slice{0} {
+        _b[0] = 1;
+    }
+
+    // perform LPC on a sample and set up the intermediate variables
+    void load_sample(const Complex *sample) {
+        _fft.transform(sample, _freq_spectrum.data());
+
+        // assumes that sample are all real numbers; In general-purpose dsp this will cause bugs
+
+        // multiplication in the frequency domain is convolution (reversed correlation) in the real domain
+        std::array<Complex, SampleSize> freq_squared;
+        std::array<Complex, SampleSize> autocovariance_comp;
+        std::transform(
+            _freq_spectrum.cbegin(),
+            _freq_spectrum.cend(),
+            freq_squared.begin(),
+            [] (Complex f) { return std::norm(f); }
+        );
+        _fft.inverse_transform(freq_squared.data(), autocovariance_comp.data());
+        std::transform(
+            autocovariance_comp.cbegin(),
+            autocovariance_comp.cend(),
+            _autocovariance.begin(),
+            [] (Complex c) { return c.real(); }
+        );
+
+        std::copy(
+            _autocovariance.cbegin(),
+            _autocovariance.cbegin() + NParams + 1,
+            _autocovariance_slice.begin()
+        );
+
+        std::array<float, NParams + 1> a = solve_sym_toeplitz(_autocovariance_slice, _b);
+        std::array<Complex, SampleSize> a_comp{0};
+        const float a0 = a[0];
+        std::transform(a.cbegin(), a.cend(), a_comp.begin(),
+            [a0] (float f) { return Complex(f / a0); }
+        );
+        
+        std::array<Complex, SampleSize> A;
+        _fft.transform(a_comp.data(), A.data());
+
+        // calc envelope
+        std::transform(A.cbegin(), A.cend(), _envelope.begin(),
+            // try to prevent infs
+            [] (Complex c) { return 1.0f / (sqrt(std::norm(c))); }
+        );
+
+        // calce residuals
+        for (uint32_t i = 0; i < SampleSize; i++) {
+            _residuals[i] = std::sqrt(std::norm(_freq_spectrum[i] * A[i]));
+        }
+        // std::transform(_freq_spectrum.cbegin(), _freq_spectrum.cend(), A.cbegin(), _residuals.begin(),
+        //     [] (Complex x_val, Complex a_val) { return std::sqrt(std::norm(x_val * a_val)); }
+        // );
+    }
+    
+    // The dft of the loaded samples
+    const std::array<Complex, SampleSize> &get_freq_spectrum() const {
+        return _freq_spectrum;
+    }
+    
+    // The correlation of the signal with itself
+    const std::array<float, SampleSize> &get_autocovariance() const {
+        return _autocovariance;
+    }
+    
+    // Normalized copy of autocovariance
+    std::array<float, SampleSize> get_autocorrelation() const {
+        std::array<float, SampleSize> autocorrelation;
+        float max_cov = *std::max_element(_autocovariance);
+        std::transform(
+            _autocovariance.cbegin(),
+            _autocovariance.cend(),
+            autocorrelation.begin(),
+            [max_cov] (float cov) {return cov / max_cov;}
+        );
+    }
+
+    // Envelope of the frequency spectrum
+    const std::array<float, SampleSize> &get_envelope() const {
+        return _envelope;
+    }
+
+    // LCP residuals of the frequency
+    const std::array<float, SampleSize> &get_residuals() const {
+        return _residuals;
+    }
+
+    // useful for inversion
+    const FFT &get_fft() const {
+        return _fft;
+    }
+
+private:
+    // results to be getted
+    std::array<Complex, SampleSize> _freq_spectrum;
+    std::array<float, SampleSize> _autocovariance;
+    std::array<float, SampleSize> _envelope;
+    std::array<float, SampleSize> _residuals; 
+
+    // intermediates
+    FFT _fft;
+    std::array<float, NParams + 1> _b;
+    std::array<float, NParams + 1> _autocovariance_slice;
+    // std::vector<float> _b;
+    // std::vector<float> _autocovariance_slice;
+    
+};
+
+}
+}
+
+#endif
--- a/cyclequeue.h
+++ b/cyclequeue.h
@ -0,0 +1,263 @@
+/**
+ * @file cyclevector.h
+ * @author 9exa
+ * @brief An user-determined-size array where pushing an item removes one from the other end. 
+ *  Useful for not having to reallocate memory for rapidly appended contiguious data (i.e. sampling)
+ * @version 0.1
+ * @date 2023-02-21
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+
+#ifndef MENGA_CYCLE_QUEUE
+#define MENGA_CYCLE_QUEUE
+
+#include <cstdint>
+#include <vector>
+#include <cstring>
+#include <iostream>
+#include "mengumath.h"
+
+namespace Mengu {
+
+template <class T>
+class CycleQueue {
+private:
+    T *_data = nullptr;
+    uint32_t _size = 0;
+    // what part of the data array is the front of the queue and first to be replaced on a push_back()
+    uint32_t _front = 0; 
+    uint32_t _capacity = 0;
+
+    inline uint32_t start_inc_down1() {
+        // assert(size != 0)
+        // equiv to posmod (_front + _size - 1) % _size but faster?????
+        return _front == 0 ? _size - 1 : _front - 1;
+    }
+
+public:
+    CycleQueue() {}
+    CycleQueue(uint32_t size) {
+        if (size > 0) {
+            resize(size);
+        }
+    }
+    // implement "copy" so they don't share the same data array
+    CycleQueue(const CycleQueue &from) {
+        _capacity = from._capacity;
+        _data = new T[_capacity];
+        std::cout << "from something\n";
+        memcpy(_data, from._data, _capacity * sizeof(T));
+
+        _size = from._size;
+        _front = from._front;
+    }
+
+    ~CycleQueue() {
+        delete[] _data;
+    }
+
+    inline uint32_t size() const {
+        return _size;
+    }
+
+
+    inline void push_back(const T &x) {
+        if (_size == 0) return;
+        _data[_front] = x;
+        _front = (_front + 1) % _size;        
+    }
+
+    inline void push_front(const T &x) {
+        if (_size == 0) return;
+        _front = (_front == 0) ? _size - 1 : _front - 1;
+        _data[_front] = x;
+    }
+
+    void resize(const uint32_t &new_size) {
+        if (_capacity < new_size) {
+            uint32_t new_cap = MAX(_capacity, 1);
+            while (new_cap < new_size) {
+                new_cap = new_cap << 1; // if you leave out the new_cap = the optimizer just skips this loop. 
+                                // Which makes this infinite loop bug hard to spot
+
+            }
+            reserve(new_cap);
+        } else if (new_size < _size) {
+            //move element of an array such that those at the front are removed
+            uint32_t shift;
+            uint32_t i;
+            if (_front <= new_size) {
+                // shift elements after _front down
+                shift = _size - new_size;
+                i = _front;
+            }
+            else {
+                // shift elements before _front up
+                shift = _front - new_size;
+                i = 0;
+                _front = 0;
+            }
+            for (; i < new_size; i++) {
+                _data[i] = _data[i + shift];
+            }
+
+            // set the deleted slots to be ready for future resizes
+            for (; i < _size; i++) {
+                _data[i] = T();
+            }
+        }
+        else { // (new_size > size) but reserve() has yet to initialize values
+            uint32_t i = _size;
+            for (; i < _front; i++) {
+                _data[(i + _size) % new_size] = _data[i];
+                _data[i] = T();
+            }
+        }
+        _size = new_size;
+    }
+
+    inline uint32_t capacity() const {
+        return _capacity;
+    }
+
+    const T *data() const {
+        return _data;
+    }
+
+    void reserve(const uint32_t &new_cap) {
+        if (_capacity < new_cap) {
+            T *new_data = new T[new_cap];
+            // copy and initialise new array
+            uint32_t i = 0;
+            if (_data != nullptr) {
+                for (; i < _capacity; i++) {
+                    new_data[i] = std::move(_data[i]);
+                }
+                delete[] _data;
+
+            }
+            
+            for (; i< new_cap; i++) {
+                new_data[i] = T();
+            }
+
+            _data = new_data;
+            _capacity = new_cap;
+            if (_size > _capacity) {
+                resize(_capacity);
+            }
+        }
+    }
+
+    // rotates the data array so that ir begins with _friont
+    void make_contiguous() {
+        T *new_data = new T[_capacity];
+
+        uint32_t i = 0;
+
+        if (_data != nullptr) {
+            for (; i < _size; i++) {
+                new_data[i] = std::move(_data[(i + _front) % _size]);
+            }
+            delete[] _data;
+        }
+
+        for (; i< _capacity; i++) {
+            new_data[i] = T();
+        }
+
+        _data = new_data;
+
+    }
+
+    void set(int i, const T &item) {
+        if (i > _size) {
+            std::cout << "tried to set item " << i << "on CycleQueue of size i. Out of range"<< std::endl;
+        }
+        _data[posmod(i +_front, _size)] = item;
+    }
+
+    const T get(int i) const {
+        if (i > _size) {
+            std::cout << "tried to get item " << i << "on CycleQueue of size i. Out of range"<< std::endl;
+        }
+        return _data[posmod(i + _front, _size)];
+    }
+
+    // just moves the front of the Ccle queue by an amount
+    void rotate(int by) {
+        _front = posmod(_front + by, _size);
+    }
+
+    //// Operators
+    inline T &operator[](int i) {
+        return _data[posmod(i + _front, _size)];
+    }
+
+    inline const T &operator[](int i) const {
+        // std::cout << (i + _front) % _size << std::endl;
+        return _data[posmod(i + _front, _size)];
+    }
+
+    CycleQueue &operator=(const CycleQueue &from) {
+        if (from._size != _size) {
+            resize(from._size);
+        }
+        _front = from._front;
+        memcpy(_data, from._data, _size);
+
+        return *this;
+    };
+
+
+
+    //// Conversions
+    // converts the first 'size' items into a contiguous array. -1 does the whole queue
+    T* to_array(T *out, int size = -1) const {
+        if (size == -1) {
+            size = _size;
+        }
+        for (uint32_t i = 0; i < size; i++) {
+            out[i] = _data[(i + _front) % _size];
+        }
+
+        return out;
+    }
+
+    // converts the first 'size' items into a vector. -1 does the whole queue
+    std::vector<T> to_vector(int size = -1) const {
+        if (size == -1) {
+            size = _size;
+        }
+
+        std::vector<T> out;
+        out.resize(size);
+        for (uint32_t i = 0; i < size; i++) {
+            out[i] = _data[(i + _front) % _size];
+        }
+
+        return out;
+    }
+};
+
+template<typename T>
+std::string to_string(const CycleQueue<T> &cq) {
+    using std::to_string;
+    std::string out_string;
+
+    if (cq.size() == 0) {
+        return out_string;
+    }
+
+    out_string.reserve(to_string(cq[0]).size() * cq.size());
+    for (uint32_t i = 0; i < cq.size(); i++) {
+        out_string += to_string(cq[i]);
+    }
+    return out_string;
+}
+
+};
+
+#endif
--- a/effect.cpp
+++ b/effect.cpp
@ -0,0 +1,31 @@
+#include "effect.h"
+#include "mengumath.h"
+
+
+Mengu::dsp::EffectChain::EffectChain(uint32_t buffer_size) {
+    _buffer_size = Mengu::last_pow_2(buffer_size);
+    _input_buffer.resize(_buffer_size);
+    _transformed_buffer.resize(_buffer_size);
+}
+
+Mengu::dsp::EffectChain::~EffectChain() {
+    for (auto effect: _effects) {
+        
+    }
+}
+
+void Mengu::dsp::EffectChain::push_signal(const Complex *input, const uint32_t &size) {
+    for (uint32_t i = 0; i < size; i++) {
+        _input_buffer.push_back(input[i]);
+    }
+}
+
+void Mengu::dsp::EffectChain::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    for (uint32_t i = _buffer_size - size; i < _buffer_size; i++) {
+        output[i] = _transformed_buffer[i];
+    }
+}
+
+void Mengu::dsp::EffectChain::append_effect(Effect *effect) {
+    _effects.push_back(effect);
+}
--- a/effect.h
+++ b/effect.h
@ -0,0 +1,113 @@
+#ifndef MENGA_EFFECT
+#define MENGA_EFFECT
+
+#include <cstdint>
+#include "common.h"
+#include "cyclequeue.h"
+#include <vector>
+
+namespace Mengu {
+namespace dsp {
+
+// Types of Properties that an Effect has and how they can be edited by gui
+// If this was Rust (a better language) this would all be one enum
+enum EffectPropType {
+    Toggle, // Edited With a ToggleButton
+    Slider, // Edited with A Slider. A min and max value must be declared. Stores real number
+    Knob, // Edited with A Knob. A min and max value must be declared. Stores real number
+    Counter, // Edited with a counter. A step size must be declared. Stores real numbers, but often casted into an int
+};
+
+enum EffectPropContScale {
+    Linear,
+    Exp,
+};
+
+// Description for the editable properties of an Effect. 
+struct EffectPropDesc {
+    EffectPropType type;
+    const char *name;
+    const char *desc;
+    union {
+        struct {
+            float min_value;
+            float max_value;
+            float step_size;
+            EffectPropContScale scale;
+        } slider_data; // use by slider, knob and counter
+    };
+};
+
+// Data used to get and set EffectProperty data
+struct EffectPropPayload {
+    EffectPropType type;
+    union {
+        bool on; // Used by Toggle
+        float value; // used by slider, knob, and counter
+    };
+};
+
+
+// 
+
+// object that takes in the next value signal (time or frequency domain)
+// and can be queried for the next value in the process signal.
+class Effect {
+public:
+    virtual ~Effect() = default;
+    // tells an EffectChain what type of input the effect expects
+    enum InputDomain {
+        Time = 0,
+        Spectral = 1,
+        Frequency = 1
+    };    
+    virtual InputDomain get_input_domain() = 0;
+
+    // push new value of signal
+    virtual void push_signal(const Complex *input, const uint32_t &size) = 0;
+    // Last value of transformed signal
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) = 0;
+    // number of samples that can be output given the current pushed signals of the Effect
+    virtual uint32_t n_transformed_ready() const = 0;
+    // resets state of effect to make it reading to take in a new sample
+    virtual void reset() = 0;
+    // The properties that this Effect exposes to be changed by GUI. 
+    // The index that they are put in is considered the props id
+    virtual std::vector<EffectPropDesc> get_property_descs() const = 0;
+    // Sets a property with the specified id the value declared in the payload
+    virtual void set_property(uint32_t id, EffectPropPayload data) = 0;
+    // Gets the value of a property with the specified id
+    virtual EffectPropPayload get_property(uint32_t id) const = 0;
+};
+
+// Represents a series of effects chained consequtivly. Processed on demand
+class EffectChain {
+public:
+    EffectChain(uint32_t buffer_size);
+    ~EffectChain();
+
+    // push a new signal. Unlike an Effect the pushed signal can be an arbitrary size as it is stored in a ringbuffer
+    void push_signal(const Complex *input, const uint32_t &size);
+
+    // Last values of transformed signal
+    void pop_transformed_signal(Complex *output, const uint32_t &size);
+
+    // add an Effect
+    void append_effect(Effect *effect);
+
+    // apply all effects
+    void process();
+
+private:
+    uint32_t _buffer_size;
+    CycleQueue<Complex> _input_buffer;
+    std::vector<Complex> _transformed_buffer;
+    std::vector<Effect *> _effects;
+};
+
+}
+}
+
+
+
+#endif
--- a/fastmath.h
+++ b/fastmath.h
@ -0,0 +1,41 @@
+#ifndef MENGA_FAST_MATH
+#define MENGA_FAST_MATH
+#include "mengumath.h"
+#include <cmath>
+
+namespace Mengu {
+namespace dsp {
+// PADE approximations. Only accurate on certain ranges
+// use between -pi to +pi
+template<typename FloatType>
+FloatType sin(FloatType x) {
+    x = std::fmod(x + MATH_PI, MATH_TAU) - MATH_PI;
+    FloatType x2 = x * x;
+    FloatType numerator = x * ((11 * x2) * x2 + 2520);
+    FloatType denominator = 60 * x2 + 2520;
+    return numerator / denominator;
+}
+
+// use between -pi to +pi
+template<typename FloatType>
+FloatType cos(FloatType x) {
+    x = std::fmod(x + MATH_PI, MATH_TAU) - MATH_PI;
+    FloatType x2 = x * x;
+    FloatType numerator = 131040 + x2 * (62160 + x2 * (3814 - x2 * 59));
+    FloatType denominator = 131040 + x2 * (3360 + x2 * 34);
+    return numerator / denominator;
+}
+template<typename FloatType>
+FloatType tan(FloatType x) {
+    x = std::fmod(x + MATH_PI, MATH_TAU) - MATH_PI;
+    FloatType x2 = x * x;
+    FloatType numerator = x * (-135135 + x2 * (17325 + x2 * (-378 + x2)));
+    FloatType denominator = -135135 + x2 * (62370 + x2 * (-3150 + 28 * x2));
+    return numerator / denominator;
+}
+
+} // namespace dsp
+} // namespace Mengu
+
+
+#endif
--- a/fft.cpp
+++ b/fft.cpp
@ -0,0 +1,155 @@
+#include "fft.h"
+#include "common.h"
+#include "fastmath.h"
+#include "mengumath.h"
+#include <cstdint>
+#include <vector>
+
+
+
+using namespace Mengu;
+
+// helpers
+
+static void conjugate_arr(Complex *arr, const uint32_t a_size) {
+    for (uint32_t i = 0; i < a_size; i++) {
+        arr[i].imag(-arr[i].imag());
+    }
+}
+
+std::vector<Complex> dsp::SFT::perform(const std::vector<Complex> &input) {
+    const size_t outsize = input.size();
+    const Complex mitau(0, -MATH_TAU);
+
+    Complex *es = new Complex[outsize];
+    for (size_t j = 0; j < outsize; j++) {
+        float n = (float) j / outsize;
+        es[j] = std::exp(mitau * n);
+        // std::cout << es[j] << std::endl;
+    }
+
+    std::vector<Complex> outvec;
+    outvec.resize(outsize);
+    for (size_t k = 0; k < outsize; k++) {
+        for (size_t n = 0; n < outsize; n++) {
+            outvec[k] += input[n] * es[n * k % outsize];
+        }
+        outvec[k] /= outsize;
+    }
+
+    delete[] es;
+
+    return outvec;
+}
+
+dsp::FFT::FFT(uint32_t size) {
+    _fft_size = is_pow_2(size) ? size : next_pow_2(size);
+    _size = size;
+    _es = new complex<float>[_fft_size];
+    for (uint32_t i = 0; i < _fft_size; i++) {
+        _es[i] = std::polar(1.0f, -(float) MATH_TAU * i / _fft_size);
+    }
+    _inp_vec = new Complex[_fft_size];
+    _out_vec = new Complex[_fft_size];
+}
+
+dsp::FFT::~FFT() {
+    delete[] _es;
+    delete[] _inp_vec;
+    delete[] _out_vec;
+}
+
+void dsp::FFT::transform(const Complex *input, Complex *output) const {
+    for (uint32_t i = 0; i < _size; i++) {
+        _inp_vec[i] = input[i];
+    }
+    for (uint32_t i = _size; i < _fft_size; i++) {
+        _inp_vec[i] = Complex(0.0f);
+    }
+
+    _transform_rec(_inp_vec, _out_vec, _fft_size, 1);
+
+    for (uint32_t i = 0; i < _size; i++) {
+        output[i] = _out_vec[i] / sqrtf((float) _fft_size);
+    }
+}
+
+void dsp::FFT::transform(const CycleQueue<Complex>&input, Complex *output) const {
+    // _transform_rec(input, 0, output, _size, 1);
+    input.to_array(_inp_vec);
+
+    // zero pad the  input
+    for (uint32_t i = input.size(); i < _fft_size; i++) {
+        _inp_vec[i] = 0.0f;
+    }
+    
+
+    _transform_rec(_inp_vec, _out_vec, _fft_size, 1);
+    for (uint32_t i = 0; i < _size / 2; i++) {
+        output[i] = _out_vec[i] / sqrtf((float) _fft_size);
+    }
+}
+
+void dsp::FFT::inverse_transform(const Complex *input, Complex *output) const {
+
+    for (uint32_t i = 0; i < _size; i++) {
+        _inp_vec[i] = input[i];
+    }
+    // zero pad input buffer
+    for (uint32_t i = _size; i < _fft_size; i++) {
+        _inp_vec[i] = 0.0f;
+    }
+
+
+    conjugate_arr(_inp_vec, _size);
+    _transform_rec(_inp_vec, _out_vec, _fft_size, 1);
+    conjugate_arr(_out_vec, _size);
+
+    for (uint32_t i = 0; i < _size; i++) {
+        // only inverse-transforming first half of the freq spectrum. so multiply by 2
+        output[i] = _out_vec[i] / sqrtf((float) _fft_size);
+    }
+}
+
+void dsp::FFT::_transform_rec(const Complex *input, Complex *output, const uint32_t N, const uint32_t stride) const {
+    // recursive implementaion of radix-2 fft
+    // Thanks wikipedia
+    if (N == 1) {
+        output[0] = input[0];
+        return;
+    }
+
+    _transform_rec(input, output, N / 2, 2 * stride);
+    _transform_rec(input + stride, output + N / 2, N / 2, 2 * stride);
+    for (uint32_t k = 0; k < N / 2; k++) {
+        const Complex e = _es[k * stride];
+        const Complex p = output[k]; // kth even
+        const Complex q = e * output[k + N / 2]; //kth odd
+        output[k] = p + q;
+        output[k + N / 2] = p - q;
+    }
+}
+
+void dsp::FFT::_transform_rec(const CycleQueue<Complex> &input, uint32_t inp_ind, Complex *output, const uint32_t N, const uint32_t stride) const{
+    // recursive implementaion of radix-2 fft
+    // Thanks wikipedia
+    if (N == 1) {
+        output[0] = input[inp_ind];
+        return;
+    }
+
+    _transform_rec(input, inp_ind, output, N / 2, 2 * stride);
+    _transform_rec(input, inp_ind + stride, output + N / 2, N / 2, 2 * stride);
+
+    for (uint32_t k = 0; k < N / 2; k++) {
+        const Complex e = _es[k * stride];
+        const Complex p = output[k]; // kth even
+        const Complex q = e * output[k + N / 2]; //kth odd
+        output[k] = p + q;
+        output[k + N / 2] = p - q;
+    }
+}
+
+const Complex *dsp::FFT::get_es() const {
+    return _es;
+}
--- a/fft.h
+++ b/fft.h
@ -0,0 +1,83 @@
+#ifndef MENGA_FFT
+#define MENGA_FFT
+
+#include <valarray>
+#include <vector>
+#include "common.h"
+#include "cyclequeue.h"
+
+typedef std::complex<float> Complex;
+typedef std::valarray<Complex> CArray;
+
+namespace Mengu {
+namespace dsp {
+using namespace std;
+
+class SFT {
+    // Slow O(N^2) Fourier Transform
+public:
+    // SFT();
+    // out-of-place ft 
+    vector<Complex> perform(const vector<Complex> &input);
+private:
+    vector<Complex> _es;
+    // vector<Complex> _sines;
+};
+
+class FFTBuffer; // foward declaration
+
+class FFT {
+public:
+    // Fast Fourier transform that uses lookup tables and performs onto an established array
+    // Only works for arrays of a declared size. Designed to be cached and use many times
+    // To use it for different sample rates/lengths, create a new FFT of a different size
+
+    // The size needs to be a power of 2 in order for this to work properly
+    FFT(uint32_t size);
+    ~FFT();
+
+
+    // Both arrays must be at least as long as _size
+    void transform(const Complex *input, Complex *output) const;
+    void transform(const CycleQueue<Complex> &input, Complex *output) const; // to avoid having to reallocate array
+    
+    //unlike 'transform' this edits *input to avoid unnecissary memory allocation
+    void inverse_transform(const Complex *input, Complex *output) const;
+
+    const Complex *get_es() const;
+
+    uint32_t size() const { return _size; }
+
+private:
+    Complex *_es;
+    uint32_t _size;
+    uint32_t _fft_size; // size of arrays used in fft computations. must be a power of 2
+
+    // cache buffers used in intermediate calculation
+    Complex *_inp_vec;
+    Complex *_out_vec;
+
+    void _transform_rec(const Complex *input, Complex *output, const uint32_t N, const uint32_t stride) const;
+    void _transform_rec(const CycleQueue<Complex> &input, uint32_t inp_ind, Complex *output, const uint32_t N, const uint32_t stride) const;
+
+    friend class FFTBuffer;
+public:
+    
+
+};
+
+class FFTBuffer {
+    // class designed to do update a n FFT of a signal in real time
+public:
+    void push_signal(const Complex *x, const uint32_t &size) {}
+
+    // copies the last 'size'
+    void pop_transformed_signal(const Complex *output, const uint32_t &size) {}
+private:
+    CycleQueue<Complex> _buffer;
+};
+
+};
+};
+
+#endif
--- a/filter.cpp
+++ b/filter.cpp
@ -0,0 +1,31 @@
+#include "filter.h"
+#include "common.h"
+#include <complex>
+#include <cstdint>
+
+using namespace Mengu;
+using namespace dsp;
+
+Complex Mengu::dsp::quad_filter_trans(Complex z, float a1, float a2, float b0, float b1, float b2) {
+    Complex z1 = std::conj(z); //z^-1
+    Complex z2 = z1 * z1;
+    return (b0 + b1 * z1 + b2 * z2) / (1.0f + a1 * z1 + a2 * z2);
+}
+
+BiquadFilter::BiquadFilter(float pa1, float pa2, float pb0, float pb1, float pb2) :
+    a1(pa1), a2(pa2), b0(pb0), b1(pb1), b2(pb2) {}
+
+void BiquadFilter::transform(const float *input, float *output, uint32_t size) {
+    for (uint32_t i = 0; i < size; i++) {
+        float m = input[i] - a1 * _last_ms[_last_offset] - a2 * _last_ms[(_last_offset + 1) % 2];
+        output[i] = b0 * m + b1 * _last_ms[_last_offset] + b2 * _last_ms[(_last_offset + 1) % 2];
+
+        // push the last m
+        _last_offset = (_last_offset + 1) % 2;
+        _last_ms[_last_offset] = m;
+    }
+}
+
+void BiquadFilter::reset() {
+    _last_ms[0] = _last_ms[1] = 0.0f; 
+}
--- a/filter.h
+++ b/filter.h
@ -0,0 +1,44 @@
+/**
+ * @file filter.h
+ * @author your name (you@domain.com)
+ * @brief Filter functions
+ * @version 0.1
+ * @date 2023-06-10
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+#ifndef MENGU_FILTER
+#define MENGU_FILTER
+
+#include "common.h"
+#include <cstdint>
+namespace Mengu {
+namespace dsp {
+// the transfer function for a quadratic filter with denominator coefficients a1, a2 and numerator cofficients b0, b1, b2
+// assumes z is on a unit circle
+Complex quad_filter_trans(Complex z, float a1, float a2, float b0, float b1, float b2);
+
+
+class BiquadFilter {
+public:
+    float a1;
+    float a2;
+    float b0;
+    float b1;
+    float b2;
+    
+    BiquadFilter(float pa1, float pa2, float pb0, float pb1, float pb2);
+
+    void transform(const float *input, float *output, uint32_t size);
+    void reset();
+    
+private:
+    // store the last 2 raw inputs and intermediaries
+    float _last_ms[2] {0};
+    uint32_t _last_offset = 0;
+};
+
+}
+}
+#endif
--- a/formantshifter.cpp
+++ b/formantshifter.cpp
@ -0,0 +1,147 @@
+
+
+#include "formantshifter.h"
+#include "common.h"
+#include "effect.h"
+#include "interpolation.h"
+#include "loudness.h"
+#include "mengumath.h"
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+
+using namespace Mengu;
+using namespace dsp;
+
+
+
+LPCFormantShifter::LPCFormantShifter() {
+    _transformed_buffer.resize(OverlapSize);
+}
+
+Effect::InputDomain LPCFormantShifter::get_input_domain() {
+    return InputDomain::Time;
+};
+
+
+// push new value of signal
+void LPCFormantShifter::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+}
+
+// Last value of transformed signal
+uint32_t LPCFormantShifter::pop_transformed_signal(Complex *output, const uint32_t &size) {    
+    std::array<Complex, ProcSize> freq_shifted {0};
+    std::array<Complex, ProcSize> samples {0};
+    std::array<Complex, ProcSize> shifted_samples {0};
+
+    while (_raw_buffer.size() >= ProcSize && _transformed_buffer.size() < size + OverlapSize) {
+        
+        // Load sample segment. 0 unused frames
+        _raw_buffer.to_array(samples.data(), ProcSize);
+
+        // do the shifty
+        _lpc.load_sample(samples.data());
+        _shift_by_env(
+            _lpc.get_freq_spectrum().data(), 
+            freq_shifted.data(), 
+            _lpc.get_envelope().data(),
+            _shift_factor
+        );
+        _lpc.get_fft().inverse_transform(freq_shifted.data(), shifted_samples.data());
+
+        // Make downward shifts not quieter and upward shifts not louder
+        // Automatically adjusts for the fact that only half of the frequency spectrum is used
+        _loudness_norm.normalize(shifted_samples.data(), samples.data(), shifted_samples.data());
+
+        // copy to output
+        mix_and_extend(_transformed_buffer, shifted_samples, OverlapSize, hamming_window);
+
+        _raw_buffer.pop_front_many(nullptr, HopSize);
+
+    }
+
+    if (_transformed_buffer.size() < size + OverlapSize) {
+        // Not enough samples to output anything
+        return 0;
+    }
+    else {
+        return _transformed_buffer.pop_front_many(output, size);
+    }
+}
+
+// number of samples that can be output given the current pushed signals of the Effect
+uint32_t LPCFormantShifter::n_transformed_ready() const {
+    return _raw_buffer.size();
+};
+
+// resets state of effect to make it reading to take in a new sample
+void LPCFormantShifter::reset() {
+    _raw_sample_filter.reset();
+    _shifted_sample_filter.reset();
+}
+
+// The properties that this Effect exposes to be changed by GUI. 
+// The index that they are put in is considered the props id
+std::vector<EffectPropDesc> LPCFormantShifter::get_property_descs() const {
+    return {
+        EffectPropDesc {
+            .type = EffectPropType::Slider,
+            .name = "Formant Shift",
+            .desc = "Scales the formant of pushed signals by this amount",
+            .slider_data = {
+                .min_value = 0.5,
+                .max_value = 2,
+                .scale = Exp,
+            }
+        }
+    };
+}
+
+// Sets a property with the specified id the value declared in the payload
+void LPCFormantShifter::set_property(uint32_t id, EffectPropPayload data) {
+    switch (id) {
+        case 0:
+            if (data.type == Slider) {
+                _shift_factor = data.value;
+            }
+            break;
+        default:
+            break;
+    }
+}
+
+
+// Gets the value of a property with the specified id
+EffectPropPayload LPCFormantShifter::get_property(uint32_t id) const {
+    return EffectPropPayload {
+        .type = Slider,
+        .value = _shift_factor,
+    };
+};
+
+// rescale an array in the freqency domain by the shape of an envelope if it were to be shifted up or down
+void LPCFormantShifter::_shift_by_env(const Complex *input, 
+                          Complex *output, 
+                          const float *envelope, 
+                          const float shift_factor) {
+    for (uint32_t i = 0; i < ProcSize / 2; i++) {
+        uint32_t shifted_ind =  i / shift_factor;
+        if (shifted_ind < ProcSize / 2) {
+            float correction = envelope[shifted_ind] / envelope[i];
+            if (!std::isfinite(correction)) {
+                output[i] = Complex(0.0f);
+            }
+            else {
+                output[i] = correction * input[i];
+            }
+        }
+        else {
+            // output[i] = input[size - 1];
+            output[i] = Complex(0.0f);
+        }
+    }
+}
+
--- a/formantshifter.h
+++ b/formantshifter.h
@ -0,0 +1,75 @@
+/**
+ * @file formantshifter.h
+ * @author 9exa
+ * @brief Shifts the formants of a signal in real time.
+ * @date 2023-06-08 
+ */
+
+#ifndef MENGU_FORMANT_SHIFTER
+#define MENGU_FORMANT_SHIFTER
+
+#include "common.h"
+#include "correlation.h"
+#include "effect.h"
+#include "loudness.h"
+#include "vecdeque.h"
+#include <cstdint>
+
+namespace Mengu {
+namespace dsp {
+
+// shifts formants using lpc envelope estimation
+class LPCFormantShifter: public Effect {
+public:
+    LPCFormantShifter();
+    // tells an EffectChain what type of input the effect expects
+    virtual InputDomain get_input_domain() override;
+
+    // push new value of signal
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+
+    // Last value of transformed signal
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    // number of samples that can be output given the current pushed signals of the Effect
+    virtual uint32_t n_transformed_ready() const override;
+    
+    // resets state of effect to make it reading to take in a new sample
+    virtual void reset() override;
+
+    // The properties that this Effect exposes to be changed by GUI. 
+    // The index that they are put in is considered the props id
+    virtual std::vector<EffectPropDesc> get_property_descs() const override;
+    
+    // Sets a property with the specified id the value declared in the payload
+    virtual void set_property(uint32_t id, EffectPropPayload data) override;
+
+    // Gets the value of a property with the specified id
+    virtual EffectPropPayload get_property(uint32_t id) const override;
+private:
+    VecDeque<Complex> _raw_buffer;
+    VecDeque<Complex> _transformed_buffer;
+
+    static constexpr uint32_t ProcSize = 1 << 11;
+    static constexpr uint32_t HopSize = ProcSize * 4 / 5;
+    static constexpr uint32_t OverlapSize = ProcSize - HopSize;
+
+    LPC<ProcSize, 60> _lpc;
+
+    void _shift_by_env(const Complex *input, 
+                          Complex *output, 
+                          const float *envelope, 
+                          const float shift_factor);
+
+    float _shift_factor = 1.0f;
+
+    // Amplifies the formant_shifted samples so they have the same LUFS loudness as the raw_sample
+    LoudnessNormalizer<Complex, ProcSize, 2> _loudness_norm;
+
+    LUFSFilter _raw_sample_filter;
+    LUFSFilter _shifted_sample_filter;
+};
+
+}
+}
+#endif
--- a/interpolation.h
+++ b/interpolation.h
@ -0,0 +1,96 @@
+#ifndef MENGA_INTERPOLATION
+#define MENGA_INTERPOLATION
+
+#include "mengumath.h"
+#include "fastmath.h"
+#include <cmath>
+#include <cstdint>
+// Helper functions for various interpolation and windowing methods
+
+namespace Mengu {
+namespace dsp{
+
+// The hann function
+inline float hann(float a0, float x) {
+    return a0 - (1.0 - a0) * std::cos(MATH_TAU * x);
+    // equals
+    // a - (1-a) (c2 -s2)
+    // a (1 - c2 + s2) - (c2 - s2)
+    // a (2s2) - 2s2 - 1
+    // (a - 1) 2 s2
+}
+
+// root of the hann function designed to be used once each before and after processing
+inline float hann_root(float a0, float x) {
+    return std::sqrt(2.0 *(a0 - 1)) * std::sin(MATH_PI * x);
+}
+
+// the hann function, centered around 0
+// inline float hann_centered(float a0, float x) {
+//     return hann(a0, (x + 0.5f));
+// }
+
+
+// windows are 1 at 1, 0 at 0
+inline float hann_window(float x) {
+    return hann(0.5, 0.5f *  x);
+}
+
+inline float hamming_window(float x) {
+    return hann((float)25 / 46, 0.5f * x);
+}
+
+// windows are 1 at 1, 0 at 0
+inline float hann_window_root(float x) {
+    return std::sin(0.5 * MATH_PI * x);
+}
+
+// does windowing to both ends of a sequence, in place
+template<class T>
+inline void window_ends(T* a, uint32_t a_size, uint32_t window_size, float window_f (float)) {
+    window_size = MIN(a_size / 2, window_size);
+    for (uint32_t i = 0; i < window_size; i++) {
+        const float w = window_f((float) i / window_size);
+
+        a[i] *= w;
+        a[a_size - 1 - i] *= w;
+    }
+}
+
+// added the tail and head of an array after applying a window function to bothe sides
+template<class T>
+inline void overlap_add(const T *prev, const T *next, T *output, uint32_t size, float window_f (float)) {
+    for (uint32_t i = 0; i < size; i++) {
+        const float w = window_f((float) i / size);
+
+        output[i] = w * next[i] + (1- w) * prev[i];
+    }
+}
+
+// overlap and extend after applying a window function first
+// overlap_size may be larger than new_data.size(), in which case only new_data.size() is added. the offset is the same
+template<class T, class NewT>
+inline void mix_and_extend(T &array, const NewT &new_data, const uint32_t &overlap_size, float window_f (float)) {
+    
+    uint32_t i = 0;
+
+    for(; i < MIN(overlap_size, new_data.size()); i++) {
+        float w = (float) i / overlap_size;
+        float w1 = window_f(w);
+        float w2 = window_f(1.0-w);
+        w2 = 1.0f - w1;
+        uint32_t ind = array.size() - overlap_size + i;
+        array[ind] = array[ind] * w2 + new_data[i] * w1;
+    }
+
+    for (; i < new_data.size(); i++) {
+        array.push_back(new_data[i]);
+    }
+
+}
+
+}
+}
+
+
+#endif
--- a/linalg.cpp
+++ b/linalg.cpp
@ -0,0 +1,50 @@
+#include "linalg.h"
+#include <algorithm>
+#include <array>
+#include <vector>
+
+
+
+std::vector<float> Mengu::dsp::solve_sym_toeplitz(const std::vector<float> &cols, const std::vector<float> &y) {
+
+    // good ol dynamic programming to repeatedly do each step of levinson_recursion consequtively
+    std::vector<float> backward_vec = {1.0f / cols.at(0)}; // y.size  assumed small, so use vec
+    backward_vec.reserve(y.size());
+
+    std::vector<float> forward_vec(1);
+    
+    float error = 0.0f;
+
+    std::vector<float> result = {y.at(0) / cols.at(0)};
+    result.reserve(y.size());
+
+    for (int n = 1; n < y.size(); n++) {
+        // calculate the current error and backward vec
+        std::copy(backward_vec.crbegin(), backward_vec.crend(), forward_vec.begin());
+        error = dot(cols.data() + 1, backward_vec.data(), n);
+
+        float denom = 1.0f / (1 - error * error);
+
+        scalar_mul_inplace(denom, backward_vec.data(), backward_vec.size());
+        backward_vec.emplace(backward_vec.begin(), 0);
+
+
+        scalar_mul_inplace(-error * denom, forward_vec.data(), forward_vec.size());
+        forward_vec.push_back(0);
+
+        vec_add_inplace(forward_vec, backward_vec);
+
+
+        // calculate this iterations result
+        auto cols_iter = cols.crbegin() + (cols.size() - n - 1); // the elements n to 1
+        float result_error = dot(cols_iter, cols.crend() - 1, result.cbegin());
+
+        std::vector<float> scaled_backward_vec = scalar_mul(y[n] - result_error, backward_vec);
+        result.push_back(0);
+
+        vec_add_inplace(scaled_backward_vec, result);
+    }
+
+    return result;
+}
+
--- a/linalg.h
+++ b/linalg.h
@ -0,0 +1,148 @@
+/**
+ * @file linalg.h
+ * @author 9exa
+ * @brief objects and algorithms to solve linear equations
+ * @version 0.1
+ * @date 2023-05-04
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+#ifndef MENGA_LINALG
+#define MENGA_LINALG
+
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include <iostream>
+
+namespace Mengu {
+namespace dsp {
+
+//2-D matrix
+
+// dot_product
+
+// c-style dot product
+template<typename T>
+inline T dot(const T *a, const T *b, const int size) {
+    T total = T();
+    for (int i = 0; i < size; i++) {
+        total += a[i] * b[i];
+    }
+
+    return total;
+}
+
+// do product on iterator
+template<class InputIt1, class InputIt2>
+inline float dot(InputIt1 afirst, InputIt1 alast, InputIt2 bfirst) {
+    float total = 0.0f;
+    while (afirst != alast) {
+        total += *afirst * *bfirst;
+
+        afirst++;
+        bfirst++;
+    }
+
+    return total;
+}
+
+template<typename T>
+std::vector<T> scalar_mul(const T lambda, const std::vector<T> &a) {
+    std::vector<T> scaled;
+    for (T x: a) {
+        scaled.push_back(lambda * x);
+    }
+    return scaled;
+}
+
+template<typename T, size_t N>
+std::array<T, N> scalar_mul(const T lambda, const T *a, size_t size) {
+    std::array<T, N> scaled;
+    for (size_t i = 0; i < size; i++) {
+        scaled[i] = lambda * a[i];
+    }
+    return scaled;
+}
+
+template<typename T>
+void scalar_mul_inplace(const T lambda, T *a, const int size) {
+    for (int i = 0; i < size; i++) {
+            a[i] = lambda * a[i];
+        }
+}
+
+// add b to a inplace
+template<typename T>
+void vec_add_inplace(const std::vector<T> &b, std::vector<T> &a) {
+    for (int i = 0; i < a.size(); i++) {
+        a[i] += b.at(i);
+    }
+}
+template<typename T>
+void vec_add_inplace(const T *b, T *a, const int size) {
+    for (int i = 0; i < size; i++) {
+        a[i] += b[i];
+    }
+}
+
+// Solves a system of equations where the matrix is a symmetric TOEplitz matrix,
+// The matrix is represented as a list of its column values
+std::vector<float> solve_sym_toeplitz(const std::vector<float> &cols, const std::vector<float> &y);
+
+
+template <typename T, size_t N>
+std::array<T, N> solve_sym_toeplitz(const std::array<T, N> &cols, const std::array<T, N> &y) {
+    // good ol dynamic programming to repeatedly do each step of levinson_recursion consequtively
+    std::array<T, N> backward_vec;
+    std::array<T, N> forward_vec;
+    backward_vec[backward_vec.size() - 1] = forward_vec[0] = 1.0 / cols.at(0);
+    
+    float error = 0.0f;
+
+    std::array<T, N> result;
+    result[0] = {y.at(0) / cols.at(0)};
+
+    for (int n = 1; n < N; n++) {
+        // calculate the current error and backward vec
+        std::copy(backward_vec.crbegin(), backward_vec.crbegin() + n, forward_vec.begin());
+        error = dot(cols.data() + 1, backward_vec.data() + (N - n), n);
+
+        float denom = 1.0f / (1 - error * error);
+
+        scalar_mul_inplace(denom, backward_vec.data() + (N - n), n);
+        backward_vec[N - 1 - n] = T();
+
+
+        scalar_mul_inplace(-error * denom, forward_vec.data(), n);
+        forward_vec[n] = T();
+
+        vec_add_inplace(forward_vec.data(), backward_vec.data() + (N - n - 1), n + 1);
+
+
+        // calculate this iterations result
+        auto cols_iter = cols.crbegin() + (cols.size() - n - 1); // the elements n to 0
+        float result_error = dot(cols_iter, cols.crend() - 1, result.cbegin());
+
+        std::array<T, N> scaled_backward_vec = scalar_mul<T, N>(
+            y[n] - result_error, 
+            backward_vec.data() + (N - n - 1), 
+            n + 1);
+        result[n] = T();
+
+        vec_add_inplace(scaled_backward_vec.data(), result.data(), n + 1);
+    }
+
+    return result;
+}
+
+
+}
+}
+
+//
+
+
+#endif
--- a/loudness.cpp
+++ b/loudness.cpp
@ -0,0 +1,67 @@
+#include "loudness.h"
+#include "filter.h"
+#include "common.h"
+#include "mengumath.h"
+#include <complex>
+#include <cstdint>
+
+// Coefficients for LUFS_freq filteters
+// High shelf filter stage
+#define S1_A1 -1.69065929318241
+#define S1_A2 0.73248077421585
+#define S1_B0 1.53512485958697
+#define S1_B1 -2.69169618940638
+#define S1_B2 1.19839281085285
+// High pass Filter stage
+#define S2_A1 -1.99004745483398
+#define S2_A2 0.99007225036621
+#define S2_B0 1.0
+#define S2_B1 -2.0
+#define S2_B2 1.0
+
+#define LUFS_DEFAULT_SAMPLE_RATE 48000
+
+using namespace Mengu;
+using namespace dsp;
+
+float Mengu::dsp::LUFS_freq(Complex *freqs, uint32_t size, uint32_t sample_rate) {
+    // Adjust to the custum sample rate
+    float sample_rate_correction = (float) sample_rate / LUFS_DEFAULT_SAMPLE_RATE * 2;
+
+    float total_amp2 = 0.0f; // total squared amplitude of each freq bin
+    for (uint32_t i = 0; i < size; i++) {
+        // argument for the transfer functions
+        float omega = (float) MATH_PI * i / size * sample_rate_correction;
+        Complex z = std::polar(1.0f, omega);
+
+        // filtered frequency
+        Complex y = freqs[i] * quad_filter_trans(z, S1_A1, S1_A2, S1_B0, S1_B1, S1_B2);
+        y = y * quad_filter_trans(z, S2_A1, S2_A2, S2_B0, S2_B1, S2_B2);
+
+        total_amp2 += std::norm(y);
+    }
+
+    return -0.691 + 10 * Mengu::log10(total_amp2 / size);
+
+}
+
+Complex Mengu::dsp::LUFS_filter_transfer(float freq) {
+    float omega = (float) MATH_PI  * freq / LUFS_DEFAULT_SAMPLE_RATE * 2;
+    Complex z = std::polar(1.0f, omega);
+    Complex y = quad_filter_trans(z, S1_A1, S1_A2, S1_B0, S1_B1, S1_B2);
+    return y * quad_filter_trans(z, S2_A1, S2_A2, S2_B0, S2_B1, S2_B2);
+}
+
+LUFSFilter::LUFSFilter(): 
+    _high_shelf_filter(S1_A1, S1_A2, S1_B0, S1_B1, S1_B2),
+    _high_pass_filter(S2_A1, S2_A2, S2_B0, S2_B1, S2_B2) {}
+
+void LUFSFilter::transform(const float *input, float *output, uint32_t size) {
+    _high_shelf_filter.transform(input, output, size);
+    _high_pass_filter.transform(output, output, size);
+}
+
+void LUFSFilter::reset() {
+    _high_shelf_filter.reset();
+    _high_pass_filter.reset();
+}
--- a/loudness.h
+++ b/loudness.h
@ -0,0 +1,105 @@
+/**
+ * @file loudness.h
+ * @author your name (you@domain.com)
+ * @brief Functions for calculating loudness in a signal
+ * @version 0.1
+ * @date 2023-06-10
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+
+#ifndef MENGU_LOUDNESS
+#define MENGU_LOUDNESS
+
+#include "common.h"
+#include "filter.h"
+#include <cstdint>
+#include <stdint.h>
+
+namespace Mengu {
+namespace dsp {
+
+// Loudness Units relative to Full Scale of a sample in the frequency domain
+// first (positive) half of the frequency spectrum only
+// Only supports 1 channel
+float LUFS_freq(Complex *freqs, uint32_t size, uint32_t sample_rate = 48000);
+
+// The value of the transfer function associated with the described frequency bin
+Complex LUFS_filter_transfer(float freq);
+
+// Performs the filter step associated with LUFS
+class LUFSFilter {
+public:
+    LUFSFilter();
+
+    void transform(const float *input, float *output, uint32_t size);
+
+    void reset();
+
+private:
+    BiquadFilter _high_shelf_filter; // Stage 1 filter
+    BiquadFilter _high_pass_filter; // Stage 2 filter
+};
+
+// Scales a raw sample to have the same Loudness (in LUFS) as a reference sample
+// The raw sample and reference sample are assumed to come from their own persistant samples
+template<typename T, uint32_t N, int DefCorrection = 1>
+class LoudnessNormalizer {
+public:
+    void normalize(const T *raw_sample, const T *reference_sample, T *output) {
+        float filtered_raw[N];
+        float filtered_reference[N];
+        for (uint32_t i = 0; i < N; i++) {
+            filtered_raw[i] = _as_float(raw_sample[i]);
+            filtered_reference[i] = _as_float(reference_sample[i]); 
+        }
+
+        // perform filter
+        _raw_sample_filter.transform(filtered_raw, filtered_raw, N);
+        _reference_sample_filter.transform(filtered_reference, filtered_reference, N);
+
+        // Get the (unormalized) power of each filtered sample
+        float raw_power = 0.0f;
+        float reference_power = 0.0f;
+        for(uint32_t i = 0; i < N; i++) {
+            raw_power += filtered_raw[i] * filtered_raw[i];
+            reference_power += filtered_reference[i] * filtered_reference[i];
+        }
+        float correction = sqrt(reference_power / raw_power);
+        // std::cout << correction << ", " << raw_power << ", " << shifted_power << std::endl;
+        if (!std::isfinite(correction)) {
+            // just correct for 1/2 freq spectrum sampling if correction is not real
+            for (uint32_t i = 0; i < N; i++) {
+                output[i] = raw_sample[i] * (float) DefCorrection;
+            }
+        }
+        else {
+            // otherwise scale the shifted by the correction
+            for (uint32_t i = 0; i < N; i++) {
+                output[i] = raw_sample[i] * correction;
+            }
+        }
+
+    }
+    // resets memory on previous raw and reference samples
+    void reset() {
+        _raw_sample_filter.reset();
+        _reference_sample_filter.reset();
+    }
+private:
+    LUFSFilter _raw_sample_filter;
+    LUFSFilter _reference_sample_filter;
+
+
+    static float _as_float(T v) {
+        return Complex(v).real();
+    }
+};
+
+
+}
+}
+
+
+#endif
--- a/lv2.h
+++ b/lv2.h
@ -0,0 +1,470 @@
+// Copyright 2006-2020 David Robillard <d@drobilla.net>
+// Copyright 2006-2012 Steve Harris <steve@plugin.org.uk>
+// Copyright 2000-2002 Richard W.E. Furse, Paul Barton-Davis, Stefan Westerfeld.
+// SPDX-License-Identifier: ISC
+
+#ifndef LV2_H_INCLUDED
+#define LV2_H_INCLUDED
+
+/**
+   @defgroup lv2 LV2
+
+   The LV2 specification.
+
+   @{
+*/
+
+/**
+   @defgroup lv2core LV2 Core
+
+   Core LV2 specification.
+
+   See <http://lv2plug.in/ns/lv2core> for details.
+
+   @{
+*/
+
+#include <stdint.h>
+
+// clang-format off
+
+#define LV2_CORE_URI    "http://lv2plug.in/ns/lv2core"  ///< http://lv2plug.in/ns/lv2core
+#define LV2_CORE_PREFIX LV2_CORE_URI "#"                ///< http://lv2plug.in/ns/lv2core#
+
+#define LV2_CORE__AllpassPlugin      LV2_CORE_PREFIX "AllpassPlugin"       ///< http://lv2plug.in/ns/lv2core#AllpassPlugin
+#define LV2_CORE__AmplifierPlugin    LV2_CORE_PREFIX "AmplifierPlugin"     ///< http://lv2plug.in/ns/lv2core#AmplifierPlugin
+#define LV2_CORE__AnalyserPlugin     LV2_CORE_PREFIX "AnalyserPlugin"      ///< http://lv2plug.in/ns/lv2core#AnalyserPlugin
+#define LV2_CORE__AudioPort          LV2_CORE_PREFIX "AudioPort"           ///< http://lv2plug.in/ns/lv2core#AudioPort
+#define LV2_CORE__BandpassPlugin     LV2_CORE_PREFIX "BandpassPlugin"      ///< http://lv2plug.in/ns/lv2core#BandpassPlugin
+#define LV2_CORE__CVPort             LV2_CORE_PREFIX "CVPort"              ///< http://lv2plug.in/ns/lv2core#CVPort
+#define LV2_CORE__ChorusPlugin       LV2_CORE_PREFIX "ChorusPlugin"        ///< http://lv2plug.in/ns/lv2core#ChorusPlugin
+#define LV2_CORE__CombPlugin         LV2_CORE_PREFIX "CombPlugin"          ///< http://lv2plug.in/ns/lv2core#CombPlugin
+#define LV2_CORE__CompressorPlugin   LV2_CORE_PREFIX "CompressorPlugin"    ///< http://lv2plug.in/ns/lv2core#CompressorPlugin
+#define LV2_CORE__ConstantPlugin     LV2_CORE_PREFIX "ConstantPlugin"      ///< http://lv2plug.in/ns/lv2core#ConstantPlugin
+#define LV2_CORE__ControlPort        LV2_CORE_PREFIX "ControlPort"         ///< http://lv2plug.in/ns/lv2core#ControlPort
+#define LV2_CORE__ConverterPlugin    LV2_CORE_PREFIX "ConverterPlugin"     ///< http://lv2plug.in/ns/lv2core#ConverterPlugin
+#define LV2_CORE__DelayPlugin        LV2_CORE_PREFIX "DelayPlugin"         ///< http://lv2plug.in/ns/lv2core#DelayPlugin
+#define LV2_CORE__DistortionPlugin   LV2_CORE_PREFIX "DistortionPlugin"    ///< http://lv2plug.in/ns/lv2core#DistortionPlugin
+#define LV2_CORE__DynamicsPlugin     LV2_CORE_PREFIX "DynamicsPlugin"      ///< http://lv2plug.in/ns/lv2core#DynamicsPlugin
+#define LV2_CORE__EQPlugin           LV2_CORE_PREFIX "EQPlugin"            ///< http://lv2plug.in/ns/lv2core#EQPlugin
+#define LV2_CORE__EnvelopePlugin     LV2_CORE_PREFIX "EnvelopePlugin"      ///< http://lv2plug.in/ns/lv2core#EnvelopePlugin
+#define LV2_CORE__ExpanderPlugin     LV2_CORE_PREFIX "ExpanderPlugin"      ///< http://lv2plug.in/ns/lv2core#ExpanderPlugin
+#define LV2_CORE__ExtensionData      LV2_CORE_PREFIX "ExtensionData"       ///< http://lv2plug.in/ns/lv2core#ExtensionData
+#define LV2_CORE__Feature            LV2_CORE_PREFIX "Feature"             ///< http://lv2plug.in/ns/lv2core#Feature
+#define LV2_CORE__FilterPlugin       LV2_CORE_PREFIX "FilterPlugin"        ///< http://lv2plug.in/ns/lv2core#FilterPlugin
+#define LV2_CORE__FlangerPlugin      LV2_CORE_PREFIX "FlangerPlugin"       ///< http://lv2plug.in/ns/lv2core#FlangerPlugin
+#define LV2_CORE__FunctionPlugin     LV2_CORE_PREFIX "FunctionPlugin"      ///< http://lv2plug.in/ns/lv2core#FunctionPlugin
+#define LV2_CORE__GatePlugin         LV2_CORE_PREFIX "GatePlugin"          ///< http://lv2plug.in/ns/lv2core#GatePlugin
+#define LV2_CORE__GeneratorPlugin    LV2_CORE_PREFIX "GeneratorPlugin"     ///< http://lv2plug.in/ns/lv2core#GeneratorPlugin
+#define LV2_CORE__HighpassPlugin     LV2_CORE_PREFIX "HighpassPlugin"      ///< http://lv2plug.in/ns/lv2core#HighpassPlugin
+#define LV2_CORE__InputPort          LV2_CORE_PREFIX "InputPort"           ///< http://lv2plug.in/ns/lv2core#InputPort
+#define LV2_CORE__InstrumentPlugin   LV2_CORE_PREFIX "InstrumentPlugin"    ///< http://lv2plug.in/ns/lv2core#InstrumentPlugin
+#define LV2_CORE__LimiterPlugin      LV2_CORE_PREFIX "LimiterPlugin"       ///< http://lv2plug.in/ns/lv2core#LimiterPlugin
+#define LV2_CORE__LowpassPlugin      LV2_CORE_PREFIX "LowpassPlugin"       ///< http://lv2plug.in/ns/lv2core#LowpassPlugin
+#define LV2_CORE__MixerPlugin        LV2_CORE_PREFIX "MixerPlugin"         ///< http://lv2plug.in/ns/lv2core#MixerPlugin
+#define LV2_CORE__ModulatorPlugin    LV2_CORE_PREFIX "ModulatorPlugin"     ///< http://lv2plug.in/ns/lv2core#ModulatorPlugin
+#define LV2_CORE__MultiEQPlugin      LV2_CORE_PREFIX "MultiEQPlugin"       ///< http://lv2plug.in/ns/lv2core#MultiEQPlugin
+#define LV2_CORE__OscillatorPlugin   LV2_CORE_PREFIX "OscillatorPlugin"    ///< http://lv2plug.in/ns/lv2core#OscillatorPlugin
+#define LV2_CORE__OutputPort         LV2_CORE_PREFIX "OutputPort"          ///< http://lv2plug.in/ns/lv2core#OutputPort
+#define LV2_CORE__ParaEQPlugin       LV2_CORE_PREFIX "ParaEQPlugin"        ///< http://lv2plug.in/ns/lv2core#ParaEQPlugin
+#define LV2_CORE__PhaserPlugin       LV2_CORE_PREFIX "PhaserPlugin"        ///< http://lv2plug.in/ns/lv2core#PhaserPlugin
+#define LV2_CORE__PitchPlugin        LV2_CORE_PREFIX "PitchPlugin"         ///< http://lv2plug.in/ns/lv2core#PitchPlugin
+#define LV2_CORE__Plugin             LV2_CORE_PREFIX "Plugin"              ///< http://lv2plug.in/ns/lv2core#Plugin
+#define LV2_CORE__PluginBase         LV2_CORE_PREFIX "PluginBase"          ///< http://lv2plug.in/ns/lv2core#PluginBase
+#define LV2_CORE__Point              LV2_CORE_PREFIX "Point"               ///< http://lv2plug.in/ns/lv2core#Point
+#define LV2_CORE__Port               LV2_CORE_PREFIX "Port"                ///< http://lv2plug.in/ns/lv2core#Port
+#define LV2_CORE__PortProperty       LV2_CORE_PREFIX "PortProperty"        ///< http://lv2plug.in/ns/lv2core#PortProperty
+#define LV2_CORE__Resource           LV2_CORE_PREFIX "Resource"            ///< http://lv2plug.in/ns/lv2core#Resource
+#define LV2_CORE__ReverbPlugin       LV2_CORE_PREFIX "ReverbPlugin"        ///< http://lv2plug.in/ns/lv2core#ReverbPlugin
+#define LV2_CORE__ScalePoint         LV2_CORE_PREFIX "ScalePoint"          ///< http://lv2plug.in/ns/lv2core#ScalePoint
+#define LV2_CORE__SimulatorPlugin    LV2_CORE_PREFIX "SimulatorPlugin"     ///< http://lv2plug.in/ns/lv2core#SimulatorPlugin
+#define LV2_CORE__SpatialPlugin      LV2_CORE_PREFIX "SpatialPlugin"       ///< http://lv2plug.in/ns/lv2core#SpatialPlugin
+#define LV2_CORE__Specification      LV2_CORE_PREFIX "Specification"       ///< http://lv2plug.in/ns/lv2core#Specification
+#define LV2_CORE__SpectralPlugin     LV2_CORE_PREFIX "SpectralPlugin"      ///< http://lv2plug.in/ns/lv2core#SpectralPlugin
+#define LV2_CORE__UtilityPlugin      LV2_CORE_PREFIX "UtilityPlugin"       ///< http://lv2plug.in/ns/lv2core#UtilityPlugin
+#define LV2_CORE__WaveshaperPlugin   LV2_CORE_PREFIX "WaveshaperPlugin"    ///< http://lv2plug.in/ns/lv2core#WaveshaperPlugin
+#define LV2_CORE__appliesTo          LV2_CORE_PREFIX "appliesTo"           ///< http://lv2plug.in/ns/lv2core#appliesTo
+#define LV2_CORE__binary             LV2_CORE_PREFIX "binary"              ///< http://lv2plug.in/ns/lv2core#binary
+#define LV2_CORE__connectionOptional LV2_CORE_PREFIX "connectionOptional"  ///< http://lv2plug.in/ns/lv2core#connectionOptional
+#define LV2_CORE__control            LV2_CORE_PREFIX "control"             ///< http://lv2plug.in/ns/lv2core#control
+#define LV2_CORE__default            LV2_CORE_PREFIX "default"             ///< http://lv2plug.in/ns/lv2core#default
+#define LV2_CORE__designation        LV2_CORE_PREFIX "designation"         ///< http://lv2plug.in/ns/lv2core#designation
+#define LV2_CORE__documentation      LV2_CORE_PREFIX "documentation"       ///< http://lv2plug.in/ns/lv2core#documentation
+#define LV2_CORE__enabled            LV2_CORE_PREFIX "enabled"             ///< http://lv2plug.in/ns/lv2core#enabled
+#define LV2_CORE__enumeration        LV2_CORE_PREFIX "enumeration"         ///< http://lv2plug.in/ns/lv2core#enumeration
+#define LV2_CORE__extensionData      LV2_CORE_PREFIX "extensionData"       ///< http://lv2plug.in/ns/lv2core#extensionData
+#define LV2_CORE__freeWheeling       LV2_CORE_PREFIX "freeWheeling"        ///< http://lv2plug.in/ns/lv2core#freeWheeling
+#define LV2_CORE__hardRTCapable      LV2_CORE_PREFIX "hardRTCapable"       ///< http://lv2plug.in/ns/lv2core#hardRTCapable
+#define LV2_CORE__inPlaceBroken      LV2_CORE_PREFIX "inPlaceBroken"       ///< http://lv2plug.in/ns/lv2core#inPlaceBroken
+#define LV2_CORE__index              LV2_CORE_PREFIX "index"               ///< http://lv2plug.in/ns/lv2core#index
+#define LV2_CORE__integer            LV2_CORE_PREFIX "integer"             ///< http://lv2plug.in/ns/lv2core#integer
+#define LV2_CORE__isLive             LV2_CORE_PREFIX "isLive"              ///< http://lv2plug.in/ns/lv2core#isLive
+#define LV2_CORE__latency            LV2_CORE_PREFIX "latency"             ///< http://lv2plug.in/ns/lv2core#latency
+#define LV2_CORE__maximum            LV2_CORE_PREFIX "maximum"             ///< http://lv2plug.in/ns/lv2core#maximum
+#define LV2_CORE__microVersion       LV2_CORE_PREFIX "microVersion"        ///< http://lv2plug.in/ns/lv2core#microVersion
+#define LV2_CORE__minimum            LV2_CORE_PREFIX "minimum"             ///< http://lv2plug.in/ns/lv2core#minimum
+#define LV2_CORE__minorVersion       LV2_CORE_PREFIX "minorVersion"        ///< http://lv2plug.in/ns/lv2core#minorVersion
+#define LV2_CORE__name               LV2_CORE_PREFIX "name"                ///< http://lv2plug.in/ns/lv2core#name
+#define LV2_CORE__optionalFeature    LV2_CORE_PREFIX "optionalFeature"     ///< http://lv2plug.in/ns/lv2core#optionalFeature
+#define LV2_CORE__port               LV2_CORE_PREFIX "port"                ///< http://lv2plug.in/ns/lv2core#port
+#define LV2_CORE__portProperty       LV2_CORE_PREFIX "portProperty"        ///< http://lv2plug.in/ns/lv2core#portProperty
+#define LV2_CORE__project            LV2_CORE_PREFIX "project"             ///< http://lv2plug.in/ns/lv2core#project
+#define LV2_CORE__prototype          LV2_CORE_PREFIX "prototype"           ///< http://lv2plug.in/ns/lv2core#prototype
+#define LV2_CORE__reportsLatency     LV2_CORE_PREFIX "reportsLatency"      ///< http://lv2plug.in/ns/lv2core#reportsLatency
+#define LV2_CORE__requiredFeature    LV2_CORE_PREFIX "requiredFeature"     ///< http://lv2plug.in/ns/lv2core#requiredFeature
+#define LV2_CORE__sampleRate         LV2_CORE_PREFIX "sampleRate"          ///< http://lv2plug.in/ns/lv2core#sampleRate
+#define LV2_CORE__scalePoint         LV2_CORE_PREFIX "scalePoint"          ///< http://lv2plug.in/ns/lv2core#scalePoint
+#define LV2_CORE__symbol             LV2_CORE_PREFIX "symbol"              ///< http://lv2plug.in/ns/lv2core#symbol
+#define LV2_CORE__toggled            LV2_CORE_PREFIX "toggled"             ///< http://lv2plug.in/ns/lv2core#toggled
+
+// clang-format on
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+   Plugin Instance Handle.
+
+   This is a handle for one particular instance of a plugin.  It is valid to
+   compare to NULL (or 0 for C++) but otherwise the host MUST NOT attempt to
+   interpret it.
+*/
+typedef void* LV2_Handle;
+
+/**
+   Feature.
+
+   Features allow hosts to make additional functionality available to plugins
+   without requiring modification to the LV2 API.  Extensions may define new
+   features and specify the `URI` and `data` to be used if necessary.
+   Some features, such as lv2:isLive, do not require the host to pass data.
+*/
+typedef struct {
+  /**
+     A globally unique, case-sensitive identifier (URI) for this feature.
+
+     This MUST be a valid URI string as defined by RFC 3986.
+  */
+  const char* URI;
+
+  /**
+     Pointer to arbitrary data.
+
+     The format of this data is defined by the extension which describes the
+     feature with the given `URI`.
+  */
+  void* data;
+} LV2_Feature;
+
+/**
+   Plugin Descriptor.
+
+   This structure provides the core functions necessary to instantiate and use
+   a plugin.
+*/
+typedef struct LV2_Descriptor {
+  /**
+     A globally unique, case-sensitive identifier for this plugin.
+
+     This MUST be a valid URI string as defined by RFC 3986.  All plugins with
+     the same URI MUST be compatible to some degree, see
+     http://lv2plug.in/ns/lv2core for details.
+  */
+  const char* URI;
+
+  /**
+     Instantiate the plugin.
+
+     Note that instance initialisation should generally occur in activate()
+     rather than here. If a host calls instantiate(), it MUST call cleanup()
+     at some point in the future.
+
+     @param descriptor Descriptor of the plugin to instantiate.
+
+     @param sample_rate Sample rate, in Hz, for the new plugin instance.
+
+     @param bundle_path Path to the LV2 bundle which contains this plugin
+     binary. It MUST include the trailing directory separator so that simply
+     appending a filename will yield the path to that file in the bundle.
+
+     @param features A NULL terminated array of LV2_Feature structs which
+     represent the features the host supports. Plugins may refuse to
+     instantiate if required features are not found here. However, hosts MUST
+     NOT use this as a discovery mechanism: instead, use the RDF data to
+     determine which features are required and do not attempt to instantiate
+     unsupported plugins at all. This parameter MUST NOT be NULL, i.e. a host
+     that supports no features MUST pass a single element array containing
+     NULL.
+
+     @return A handle for the new plugin instance, or NULL if instantiation
+     has failed.
+  */
+  LV2_Handle (*instantiate)(const struct LV2_Descriptor* descriptor,
+                            double                       sample_rate,
+                            const char*                  bundle_path,
+                            const LV2_Feature* const*    features);
+
+  /**
+     Connect a port on a plugin instance to a memory location.
+
+     Plugin writers should be aware that the host may elect to use the same
+     buffer for more than one port and even use the same buffer for both
+     input and output (see lv2:inPlaceBroken in lv2.ttl).
+
+     If the plugin has the feature lv2:hardRTCapable then there are various
+     things that the plugin MUST NOT do within the connect_port() function;
+     see lv2core.ttl for details.
+
+     connect_port() MUST be called at least once for each port before run()
+     is called, unless that port is lv2:connectionOptional. The plugin must
+     pay careful attention to the block size passed to run() since the block
+     allocated may only just be large enough to contain the data, and is not
+     guaranteed to remain constant between run() calls.
+
+     connect_port() may be called more than once for a plugin instance to
+     allow the host to change the buffers that the plugin is reading or
+     writing. These calls may be made before or after activate() or
+     deactivate() calls.
+
+     @param instance Plugin instance containing the port.
+
+     @param port Index of the port to connect. The host MUST NOT try to
+     connect a port index that is not defined in the plugin's RDF data. If
+     it does, the plugin's behaviour is undefined (a crash is likely).
+
+     @param data_location Pointer to data of the type defined by the port
+     type in the plugin's RDF data (for example, an array of float for an
+     lv2:AudioPort). This pointer must be stored by the plugin instance and
+     used to read/write data when run() is called. Data present at the time
+     of the connect_port() call MUST NOT be considered meaningful.
+  */
+  void (*connect_port)(LV2_Handle instance, uint32_t port, void* data_location);
+
+  /**
+     Initialise a plugin instance and activate it for use.
+
+     This is separated from instantiate() to aid real-time support and so
+     that hosts can reinitialise a plugin instance by calling deactivate()
+     and then activate(). In this case the plugin instance MUST reset all
+     state information dependent on the history of the plugin instance except
+     for any data locations provided by connect_port(). If there is nothing
+     for activate() to do then this field may be NULL.
+
+     When present, hosts MUST call this function once before run() is called
+     for the first time. This call SHOULD be made as close to the run() call
+     as possible and indicates to real-time plugins that they are now live,
+     however plugins MUST NOT rely on a prompt call to run() after
+     activate().
+
+     The host MUST NOT call activate() again until deactivate() has been
+     called first. If a host calls activate(), it MUST call deactivate() at
+     some point in the future. Note that connect_port() may be called before
+     or after activate().
+  */
+  void (*activate)(LV2_Handle instance);
+
+  /**
+     Run a plugin instance for a block.
+
+     Note that if an activate() function exists then it must be called before
+     run(). If deactivate() is called for a plugin instance then run() may
+     not be called until activate() has been called again.
+
+     If the plugin has the feature lv2:hardRTCapable then there are various
+     things that the plugin MUST NOT do within the run() function (see
+     lv2core.ttl for details).
+
+     As a special case, when `sample_count` is 0, the plugin should update
+     any output ports that represent a single instant in time (for example,
+     control ports, but not audio ports). This is particularly useful for
+     latent plugins, which should update their latency output port so hosts
+     can pre-roll plugins to compute latency. Plugins MUST NOT crash when
+     `sample_count` is 0.
+
+     @param instance Instance to be run.
+
+     @param sample_count The block size (in samples) for which the plugin
+     instance must run.
+  */
+  void (*run)(LV2_Handle instance, uint32_t sample_count);
+
+  /**
+     Deactivate a plugin instance (counterpart to activate()).
+
+     Hosts MUST deactivate all activated instances after they have been run()
+     for the last time. This call SHOULD be made as close to the last run()
+     call as possible and indicates to real-time plugins that they are no
+     longer live, however plugins MUST NOT rely on prompt deactivation. If
+     there is nothing for deactivate() to do then this field may be NULL
+
+     Deactivation is not similar to pausing since the plugin instance will be
+     reinitialised by activate(). However, deactivate() itself MUST NOT fully
+     reset plugin state. For example, the host may deactivate a plugin, then
+     store its state (using some extension to do so).
+
+     Hosts MUST NOT call deactivate() unless activate() was previously
+     called. Note that connect_port() may be called before or after
+     deactivate().
+  */
+  void (*deactivate)(LV2_Handle instance);
+
+  /**
+     Clean up a plugin instance (counterpart to instantiate()).
+
+     Once an instance of a plugin has been finished with it must be deleted
+     using this function. The instance handle passed ceases to be valid after
+     this call.
+
+     If activate() was called for a plugin instance then a corresponding call
+     to deactivate() MUST be made before cleanup() is called. Hosts MUST NOT
+     call cleanup() unless instantiate() was previously called.
+  */
+  void (*cleanup)(LV2_Handle instance);
+
+  /**
+     Return additional plugin data defined by some extension.
+
+     A typical use of this facility is to return a struct containing function
+     pointers to extend the LV2_Descriptor API.
+
+     The actual type and meaning of the returned object MUST be specified
+     precisely by the extension. This function MUST return NULL for any
+     unsupported URI. If a plugin does not support any extension data, this
+     field may be NULL.
+
+     The host is never responsible for freeing the returned value.
+  */
+  const void* (*extension_data)(const char* uri);
+} LV2_Descriptor;
+
+/**
+   Helper macro needed for LV2_SYMBOL_EXPORT when using C++.
+*/
+#ifdef __cplusplus
+#  define LV2_SYMBOL_EXTERN extern "C"
+#else
+#  define LV2_SYMBOL_EXTERN
+#endif
+
+/**
+   Put this (LV2_SYMBOL_EXPORT) before any functions that are to be loaded
+   by the host as a symbol from the dynamic library.
+*/
+#ifdef _WIN32
+#  define LV2_SYMBOL_EXPORT LV2_SYMBOL_EXTERN __declspec(dllexport)
+#else
+#  define LV2_SYMBOL_EXPORT \
+    LV2_SYMBOL_EXTERN __attribute__((visibility("default")))
+#endif
+
+/**
+   Prototype for plugin accessor function.
+
+   Plugins are discovered by hosts using RDF data (not by loading libraries).
+   See http://lv2plug.in for details on the discovery process, though most
+   hosts should use an existing library to implement this functionality.
+
+   This is the simple plugin discovery API, suitable for most statically
+   defined plugins.  Advanced plugins that need access to their bundle during
+   discovery can use lv2_lib_descriptor() instead.  Plugin libraries MUST
+   include a function called "lv2_descriptor" or "lv2_lib_descriptor" with
+   C-style linkage, but SHOULD provide "lv2_descriptor" wherever possible.
+
+   When it is time to load a plugin (designated by its URI), the host loads the
+   plugin's library, gets the lv2_descriptor() function from it, and uses this
+   function to find the LV2_Descriptor for the desired plugin.  Plugins are
+   accessed by index using values from 0 upwards.  This function MUST return
+   NULL for out of range indices, so the host can enumerate plugins by
+   increasing `index` until NULL is returned.
+
+   Note that `index` has no meaning, hosts MUST NOT depend on it remaining
+   consistent between loads of the plugin library.
+*/
+LV2_SYMBOL_EXPORT
+const LV2_Descriptor*
+lv2_descriptor(uint32_t index);
+
+/**
+   Type of the lv2_descriptor() function in a library (old discovery API).
+*/
+typedef const LV2_Descriptor* (*LV2_Descriptor_Function)(uint32_t index);
+
+/**
+   Handle for a library descriptor.
+*/
+typedef void* LV2_Lib_Handle;
+
+/**
+   Descriptor for a plugin library.
+
+   To access a plugin library, the host creates an LV2_Lib_Descriptor via the
+   lv2_lib_descriptor() function in the shared object.
+*/
+typedef struct {
+  /**
+     Opaque library data which must be passed as the first parameter to all
+     the methods of this struct.
+  */
+  LV2_Lib_Handle handle;
+
+  /**
+     The total size of this struct.  This allows for this struct to be
+     expanded in the future if necessary.  This MUST be set by the library to
+     sizeof(LV2_Lib_Descriptor).  The host MUST NOT access any fields of this
+     struct beyond get_plugin() unless this field indicates they are present.
+  */
+  uint32_t size;
+
+  /**
+     Destroy this library descriptor and free all related resources.
+  */
+  void (*cleanup)(LV2_Lib_Handle handle);
+
+  /**
+     Plugin accessor.
+
+     Plugins are accessed by index using values from 0 upwards.  Out of range
+     indices MUST result in this function returning NULL, so the host can
+     enumerate plugins by increasing `index` until NULL is returned.
+  */
+  const LV2_Descriptor* (*get_plugin)(LV2_Lib_Handle handle, uint32_t index);
+} LV2_Lib_Descriptor;
+
+/**
+   Prototype for library accessor function.
+
+   This is the more advanced discovery API, which allows plugin libraries to
+   access their bundles during discovery, which makes it possible for plugins to
+   be dynamically defined by files in their bundle.  This API also has an
+   explicit cleanup function, removing any need for non-portable shared library
+   destructors.  Simple plugins that do not require these features may use
+   lv2_descriptor() instead.
+
+   This is the entry point for a plugin library.  Hosts load this symbol from
+   the library and call this function to obtain a library descriptor which can
+   be used to access all the contained plugins.  The returned object must not
+   be destroyed (using LV2_Lib_Descriptor::cleanup()) until all plugins loaded
+   from that library have been destroyed.
+*/
+LV2_SYMBOL_EXPORT
+const LV2_Lib_Descriptor*
+lv2_lib_descriptor(const char* bundle_path, const LV2_Feature* const* features);
+
+/**
+   Type of the lv2_lib_descriptor() function in an LV2 library.
+*/
+typedef const LV2_Lib_Descriptor* (*LV2_Lib_Descriptor_Function)(
+  const char*               bundle_path,
+  const LV2_Feature* const* features);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+/**
+   @}
+   @}
+*/
+
+#endif /* LV2_H_INCLUDED */
--- a/mengubah_lv2.cpp
+++ b/mengubah_lv2.cpp
@ -0,0 +1,187 @@
+#include <cstdint>
+#include "lv2.h"
+#include <array>
+
+#include "common.h"
+#include "effect.h"
+#include "pitchshifter.h"
+#include "formantshifter.h"
+#include "timestretcher.h"
+
+
+
+using namespace Mengu;
+using namespace dsp;
+
+struct PluginHandler {
+    std::array<PitchShifter *, 3> pitch_shifters;
+    std::array<Effect *, 2> formant_shifters;
+    const float *in_buffer;
+    float *out_buffer;
+    const float *pitch_shifter_ind;
+    const float *pitch_shift;
+    const float *formant_shifter_ind;
+    const float *formant_shift;
+};
+
+enum PitchShiftInd {
+    WSOLAPitch = 0,
+    PhaseVocoderPitch = 1,
+    PhaseVocoderDoneRightPitch = 2,
+};
+enum FormantShiftInd {
+    LPCFormant = 0,
+    PSOLAFormant = 1,
+};
+
+/* internal core methods */
+static LV2_Handle instantiate (const struct LV2_Descriptor *descriptor, double sample_rate, const char *bundle_path, const LV2_Feature *const *features) {
+    PluginHandler *plugin = new PluginHandler;
+    plugin->pitch_shifters = {
+        new TimeStretchPitchShifter(new WSOLATimeStretcher(), 1),
+        new TimeStretchPitchShifter(new PhaseVocoderTimeStretcher(), 1),
+        new TimeStretchPitchShifter(new PhaseVocoderDoneRightTimeStretcher(), 1),
+    };
+    plugin->formant_shifters = {
+        new LPCFormantShifter(),
+        new TimeStretchPitchShifter(new PSOLATimeStretcher(), 1),
+    };
+
+    return plugin;
+}
+
+static void connect_port (LV2_Handle instance, uint32_t port, void *data_location) {
+    PluginHandler* plugin = (PluginHandler*) instance;
+    if (plugin == nullptr) return;
+
+    switch (port)
+    {
+    case 0:
+        plugin->in_buffer = (const float*) data_location;
+        break;
+    
+    case 1:
+        plugin->out_buffer = (float*) data_location;
+        break;
+
+    case 2:
+        plugin->pitch_shifter_ind = (const float*) data_location;
+
+    case 3:
+        plugin->pitch_shift = (const float*) data_location;
+        break;
+    
+    case 4:
+        plugin->formant_shifter_ind = (const float*) data_location;
+        break;
+
+    case 5:
+        plugin->formant_shift = (const float*) data_location;
+        break;
+    
+    default:
+        break;
+    }
+}
+
+static void activate (LV2_Handle instance)
+{
+    /* not needed here */
+}
+
+static void run (LV2_Handle instance, uint32_t sample_count)
+{
+    PluginHandler* plugin = (PluginHandler*) instance;
+    if (plugin == nullptr) return;
+    if ((!plugin->in_buffer) || (!plugin->out_buffer) || (!plugin->pitch_shift)) return;
+
+    // apply effects
+    Effect *pitch_shifter = plugin->pitch_shifters[static_cast<PitchShiftInd>(*plugin->pitch_shifter_ind)];
+    Effect *formant_shifter = plugin->formant_shifters[static_cast<FormantShiftInd>(*plugin->formant_shifter_ind)];
+
+    pitch_shifter->set_property(0, EffectPropPayload {
+        .type = Slider,
+        .value = *plugin->pitch_shift,
+    });
+    formant_shifter->set_property(0, EffectPropPayload {
+        .type = Slider,
+        .value = *plugin->formant_shift,
+    });
+
+    static constexpr uint32_t ProcSize = 1 << 10;
+    Complex cbuffer[ProcSize];
+    uint32_t num_processed = 0;
+
+    while (num_processed < sample_count) {
+        uint32_t num_this_pass = MIN(ProcSize, sample_count - num_processed);
+        for (uint32_t i = 0; i < num_this_pass; i++) {
+            cbuffer[i] = (Complex) plugin->in_buffer[num_processed + i];
+        }
+
+        pitch_shifter->push_signal(cbuffer, num_this_pass);
+        pitch_shifter->pop_transformed_signal(cbuffer, num_this_pass);
+        
+        formant_shifter->push_signal(cbuffer, num_this_pass);
+        formant_shifter->pop_transformed_signal(cbuffer, num_this_pass);
+
+        
+
+        for (uint32_t i = 0; i < num_this_pass; i++) {
+            plugin->out_buffer[num_processed + i] = cbuffer[i].real();
+        }
+
+        num_processed += num_this_pass;
+    }
+    
+
+    
+
+    
+}
+
+static void deactivate (LV2_Handle instance)
+{
+    /* not needed here */
+}
+
+static void cleanup (LV2_Handle instance) {
+    PluginHandler *plugin = (PluginHandler *) instance;
+    if (plugin == nullptr) {
+        return;
+    }
+
+    for (auto pitch_shifter: plugin->pitch_shifters) {
+        delete pitch_shifter;
+    }
+
+    for (auto formant_shifter: plugin->formant_shifters) {
+        delete formant_shifter;
+    }
+
+    delete plugin;
+}
+
+static const void* extension_data (const char *uri)
+{
+    return nullptr;
+}
+
+/* descriptor */
+static LV2_Descriptor const descriptor =
+{
+    "https://github.com/9exa/Mengubah/Mengubah",
+    instantiate,
+    connect_port,
+    activate /* or NULL */,
+    run,
+    deactivate /* or NULL */,
+    cleanup,
+    extension_data /* or NULL */
+};
+
+/* interface */
+LV2_SYMBOL_EXPORT const LV2_Descriptor* lv2_descriptor (uint32_t index)
+{
+    if (index == 0) return &descriptor;
+    else return NULL;
+}
--- a/mengumath.h
+++ b/mengumath.h
@ -0,0 +1,164 @@
+#ifndef MENGA_MATH
+#define MENGA_MATH
+
+#include <cmath>
+#include <cstdint>
+
+
+namespace Mengu {
+// Copied from Godot
+
+#define MATH_PI 3.1415926535897932
+#define MATH_TAU 6.283185307179586
+
+// Make room for our constexpr's below by overriding potential system-specific macros.
+#undef ABS
+#undef SIGN
+#undef MIN
+#undef MAX
+#undef CLAMP
+
+// Generic ABS function, for math uses please use Math::abs.
+template <typename T>
+constexpr T ABS(T m_v) {
+	return m_v < 0 ? -m_v : m_v;
+}
+
+template <typename T>
+constexpr const T SIGN(const T m_v) {
+	return m_v == 0 ? 0.0f : (m_v < 0 ? -1.0f : +1.0f);
+}
+
+template <typename T, typename T2>
+constexpr auto MIN(const T m_a, const T2 m_b) {
+	return m_a < m_b ? m_a : m_b;
+}
+
+template <typename T, typename T2>
+constexpr auto MAX(const T m_a, const T2 m_b) {
+	return m_a > m_b ? m_a : m_b;
+}
+
+template <typename T, typename T2, typename T3>
+constexpr auto CLAMP(const T m_a, const T2 m_min, const T3 m_max) {
+	return m_a < m_min ? m_min : (m_a > m_max ? m_max : m_a);
+}
+
+//// ##### 
+
+template <typename T>
+inline T min(T a, T b) {
+	return a < b ? a : b;
+}
+
+template <typename T>
+inline T max(T a, T b) {
+	return a > b ? a : b;
+}
+
+template <typename T>
+inline T sign(T x) {
+	return static_cast<T>(SIGN(x));
+}
+
+template <typename T>
+inline T abs(T x) {
+	return std::abs(x);
+}
+
+template <typename T>
+inline T pow(T x, T e) {
+	return std::pow(x, e);
+}
+
+template <typename T>
+inline T exp2(T x) {
+	return std::exp2(x);
+}
+
+template <typename T>
+inline T log2(T x) {
+	return std::log2(x);
+}
+
+template <typename T>
+inline T log10(T x) {
+	return std::log10(x);
+}
+
+template <typename T>
+inline T logb(T base, T x) {
+	return std::log2(x) / std::log2(base);
+}
+
+
+template <typename T>
+inline T sqrt(T x) {
+	return std::sqrt(x);
+}
+
+template <typename T>
+inline T lerp(T min_v, T max_v, float w) {
+    return min_v + w * (max_v - min_v);
+}
+
+inline float inverse_lerp(float min_v, float max_v, float v) {
+    return (v - min_v) / (max_v - min_v);
+}
+
+// linearly interpelates between the shortest path between 2 angles. Does NOT gaurentee th output be within [-pi, pi]
+inline float lerp_angle(float min_v, float max_v, float w) {
+	if ((max_v - min_v) > MATH_PI) { min_v += MATH_TAU; }
+	else if ((min_v - max_v) > MATH_PI) { max_v += MATH_TAU; }
+
+	return lerp(min_v, max_v, w);
+}
+
+template <typename T>
+inline T modf(T x, T *iptr) {
+	return std::modf(x, iptr);
+}
+
+inline int posmod(int x, int y) {
+    int v = x % y;
+    if (v < 0) {
+        v += y;
+    }
+    return v;
+}
+
+inline float fposmod(float x, float y) {
+	float v = std::fmod(x, y);
+    if (v * y < 0.0f) {
+        v += y;
+    }
+    return v;
+}
+
+inline bool is_pow_2(uint32_t x) {
+	return x != 0 && ((x - 1) & x) == 0;
+}
+
+inline uint32_t last_pow_2(uint32_t x) {
+	x |= (x >> 1);
+	x |= (x >> 2);
+	x |= (x >> 4);
+	x |= (x >> 8);
+	x |= (x >> 16);
+	return x - (x >> 1);
+}
+
+inline uint32_t next_pow_2(uint32_t x) {
+	x |= (x >> 1);
+	x |= (x >> 2);
+	x |= (x >> 4);
+	x |= (x >> 8);
+	x |= (x >> 16);
+	return (x << 1) & ~x;
+}
+
+
+
+};
+
+#endif
--- a/miniaudio.h
+++ b/miniaudio.h
--- a/pitchshifter.cpp
+++ b/pitchshifter.cpp
@ -0,0 +1,291 @@
+#include "common.h"
+#include "effect.h"
+#include "timestretcher.h"
+#include "sampling.h"
+#include "fft.h"
+#include "interpolation.h"
+#include "vecdeque.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+#include "pitchshifter.h"
+#include "singletons.h"
+#include <iterator>
+#include "mengumath.h"
+#include <iostream>
+#include <vector>
+
+using namespace Mengu;
+using namespace dsp;
+
+
+std::vector<EffectPropDesc> PitchShifter::get_property_descs() const {
+    return {
+        EffectPropDesc {
+            .type = EffectPropType::Slider,
+            .name = "Pitch Shift",
+            .desc = "Scales the pitch of pushed signals by this amount",
+            .slider_data = {
+                .min_value = 0.5,
+                .max_value = 2,
+                .scale = Exp,
+            }
+        }
+    };
+}
+
+void PitchShifter::set_property(uint32_t id, EffectPropPayload data) {
+    if (id == 0) {
+        if (data.type == Slider) {
+            set_shift_factor(data.value);
+        }
+    }
+}
+
+EffectPropPayload PitchShifter::get_property(uint32_t id) const {
+    if (id == 0) {
+        return EffectPropPayload {
+            .type = Slider,
+            .value = _shift_factor,
+        };
+    }
+
+    return EffectPropPayload {
+        .type = Slider,
+        .value = 0.0f,
+    };
+}
+
+PhaseVocoderPitchShifterV2::PhaseVocoderPitchShifterV2() {
+    _transformed_buffer.resize(ProcSize);
+}
+
+PhaseVocoderPitchShifterV2::~PhaseVocoderPitchShifterV2() {}
+
+void PhaseVocoderPitchShifterV2::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+}
+
+uint32_t PhaseVocoderPitchShifterV2::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    while ((_transformed_buffer.size() < size + OverlapSize) && (_raw_buffer.size() >= ProcSize)) {
+        std::array<Complex, ProcSize> samples;
+        _raw_buffer.to_array(samples.data(), ProcSize);
+
+
+
+        _lpc.load_sample(samples.data());
+        const std::array<float, ProcSize> &envelope = _lpc.get_envelope();
+        const std::array<float, ProcSize> &residuals = _lpc.get_residuals();
+        const std::array<Complex, ProcSize> &frequencies = _lpc.get_freq_spectrum();
+
+        
+        // std::array<float, ProcSize / 2> log_envelope{};
+        std::array<float, ProcSize / 2> log_residuals{};
+        std::transform(residuals.cbegin(), residuals.cend(), log_residuals.begin(), [] (float f) {
+            return log2(f);
+        });
+
+        std::array<float, ProcSize / 2> args{};
+        std::transform(frequencies.cbegin(), frequencies.cend(), args.begin(), [] (Complex freq) {
+            // shifted to be positive to make lerping easier
+            return std::arg(freq);
+        });
+        /*
+        // resample in the log-frequency domain
+        std::array<Complex, ProcSize / 2> new_freq{};
+        if (_shift_factor > 1.0f) {
+            linear_resample_no_filter(log_residuals.data(), log_residuals.data(), ProcSize, _shift_factor, -2e5f);
+            linear_resample_no_filter(args.data(), args.data(), ProcSize, _shift_factor, 0.0f);
+        }
+        else {
+            linear_resample_no_filter(log_residuals.data(), log_residuals.data(), ProcSize, _shift_factor, -2e5f);
+            linear_resample_no_filter(args.data(), args.data(), ProcSize, _shift_factor, 0.0f);
+            // 0 the rest
+            for (uint32_t i = ProcSize * _shift_factor; i < ProcSize; i++) {
+                log_residuals[i] = -2e5f;
+                args[i] = 0.0f;
+            }
+        }
+
+        std::transform(log_residuals.cbegin(), log_residuals.cend(), new_freq.begin(), [] (float f) {
+            return exp2(f);
+        });
+
+        std::transform(new_freq.cbegin(), new_freq.cend(), envelope.cbegin(), new_freq.begin(),
+            [] (Complex resid, float env) {
+                return resid * env;
+            }
+        );
+        
+
+        // rescale the frequencies
+        float max_base_freq = 0.0f;
+        float max_new_freq = 0.0f;
+        for (uint32_t i = 0; i < ProcSize / 2; i++) {
+            float freq_amp = std::norm(frequencies[i]);
+            if (max_base_freq < freq_amp) { max_base_freq = freq_amp; }
+            if (max_new_freq < new_freq[i].real()) { max_new_freq = new_freq[i].real(); }
+        }
+        max_base_freq = std::sqrt(max_base_freq); // rooting delayed until after the loop
+        std::transform(new_freq.cbegin(), new_freq.cend(), new_freq.begin(),
+            [max_base_freq, max_new_freq] (Complex freq) {
+                return freq * max_base_freq / max_new_freq;
+            }
+        );
+
+        // adjust phases
+        for (uint32_t i = 0; i < ProcSize / 2; i++) {
+            // new_freq[i] = std::polar(std::sqrt(std::norm(frequencies[i])), std::arg(frequencies[i]));
+            new_freq[i] = std::polar(new_freq[i].real(), args[i]);
+            // new_freq[i] = std::polar(new_freq[i].real(), (float) -MATH_TAU * (_samples_processed * i) / ProcSize);
+        }
+        */
+
+        
+        std::array<Complex, ProcSize / 2> new_freq{};
+        std::array<float, ProcSize / 2> freq_mags{};
+        std::transform(frequencies.cbegin(), frequencies.cend(), freq_mags.begin(), [] (Complex c) {
+            return std::sqrt(std::norm(c));
+        });
+        if (_shift_factor > 1.0f) {
+            linear_resample_no_filter(frequencies.data(), new_freq.data(), ProcSize / 2, _shift_factor);
+            linear_resample_no_filter(freq_mags.data(), freq_mags.data(), ProcSize / 2, _shift_factor);
+            // linear_resample_no_filter(args.data(), args.data(), ProcSize, _shift_factor);
+        }
+        else {
+            linear_resample_no_filter(frequencies.data(), new_freq.data(), ProcSize / 2, _shift_factor);
+            linear_resample_no_filter(freq_mags.data(), freq_mags.data(), ProcSize / 2, _shift_factor);
+            // linear_resample_no_filter(args.data(), args.data(), ProcSize, _shift_factor);
+            // 0 the rest
+            for (uint32_t i = ProcSize * _shift_factor; i < ProcSize; i++) {
+                freq_mags[i] = 0.0f;
+                // args[i] = 0.0f;
+            }
+        }
+        // std::transform(freq_mags.cbegin(), freq_mags.cend(), new_freq.cbegin(), new_freq.begin(),
+        //     [] (float mag, Complex old_freq) { return old_freq / std::sqrt(std::norm(old_freq)) * mag; }
+        // );
+        
+        // scale by formants
+        // float envelope_max = -2e5;
+        // float new_freq_max = -2e5;
+        // float scaled_freq_max = -2e5;
+        // for (uint32_t i = 0; i < ProcSize / 2; i++) {
+        //     if (envelope_max < envelope[i]) { envelope_max = envelope[i]; }
+        //     if (new_freq_max < std::norm(new_freq[i])) { new_freq_max = std::norm(new_freq[i]); }
+        // }
+        // new_freq_max = std::sqrt(new_freq_max);
+        // for (uint32_t i = 0; i < ProcSize / 2; i++) {
+        //     new_freq[i] = new_freq[i] * envelope[i] / envelope_max;
+        //     if (scaled_freq_max < std::norm(new_freq[i])) { scaled_freq_max = std::norm(new_freq[i]); }
+        // }
+        // scaled_freq_max = std::sqrt(scaled_freq_max);
+        // for (uint32_t i = 0; i < ProcSize / 2; i++) {
+        //     new_freq[i] *= new_freq_max / scaled_freq_max;
+        // }
+        
+
+        _lpc.get_fft().inverse_transform(new_freq.data(), samples.data());
+
+        mix_and_extend(_transformed_buffer, samples, OverlapSize, hann_window);
+        
+        _samples_processed += ProcSize - OverlapSize;
+        _raw_buffer.pop_front_many(nullptr, ProcSize - OverlapSize);
+    }
+
+    uint32_t n = _transformed_buffer.pop_front_many(output, size);
+    return n;
+}
+
+uint32_t PhaseVocoderPitchShifterV2::n_transformed_ready() const {
+    return _transformed_buffer.size() - ProcSize;
+}
+
+void PhaseVocoderPitchShifterV2::reset() {
+    _transformed_buffer.resize(ProcSize, Complex(0.0f));
+}
+
+TimeStretchPitchShifter::TimeStretchPitchShifter(TimeStretcher *stretcher, uint32_t nchannels):
+    _stretcher(stretcher),
+    _resampler(nchannels, 1.0f) {
+    _stretcher->set_stretch_factor(1.0f);
+    _shift_factor = 1.0f;
+
+    // Complex zeros[MinResampleInputSize] = {Complex()};
+    // _stretcher.push_signal(zeros, MinResampleInputSize);
+    // _pitch_shifting_stretcher.push_signal(zeros, MinResampleInputSize);
+    // _raw_buffer.resize(MinResampleInputSize * 2, 0);
+}
+
+
+TimeStretchPitchShifter::~TimeStretchPitchShifter() {
+    delete _stretcher;
+}
+
+void TimeStretchPitchShifter::push_signal(const Complex *input, const uint32_t &size) {
+    // _raw_buffer.extend_back(input, size);
+    _stretcher->push_signal(input, size);
+    // _pitch_shifting_stretcher.push_signal(input, size);
+
+    
+}
+
+uint32_t TimeStretchPitchShifter::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    // do transform, eagerly
+    // resample the time-stretch pitch shifted samples
+    // while (_pitch_shifting_stretcher.n_transformed_ready() >= MinResampleInputSize) {
+    bool can_still_process = true;
+    while (can_still_process && n_transformed_ready() < size) {
+        const uint32_t desired_stretched_size = size * _shift_factor;
+        std::vector<Complex> stretched(desired_stretched_size);
+
+        const uint32_t actually_stretched = _stretcher->pop_transformed_signal(stretched.data(), desired_stretched_size);
+        stretched.resize(actually_stretched);
+        
+        can_still_process = actually_stretched > 0;
+
+        std::vector<Complex> unstretched = _resampler.resample(stretched);
+
+        _transformed_buffer.extend_back(unstretched.data(), unstretched.size());
+    }
+    uint32_t n = _transformed_buffer.pop_front_many(output, size);
+    // std::cout << "n " << n << std::endl; 
+
+    // compensate for drift
+    if (n_transformed_ready() > IncreaseResampleThreshold) {
+        static constexpr float ResampleHalflife = IncreaseResampleThreshold * 1.5;
+        float resample_factor = exp2(-(int)(n_transformed_ready() - IncreaseResampleThreshold) / ResampleHalflife);
+        _stretcher->set_stretch_factor(_shift_factor * resample_factor);
+    }
+
+    else if (n_transformed_ready() < StandardResampleThreshold) {
+        _stretcher->set_stretch_factor(_shift_factor);
+    }
+
+    for (uint32_t i = n; i < size; i++) {
+        output[i] = 0;
+    }
+    return n;
+}
+
+uint32_t TimeStretchPitchShifter::n_transformed_ready() const {
+    return _transformed_buffer.size();
+}
+
+void TimeStretchPitchShifter::reset() {
+    _raw_buffer.resize(0);
+    _transformed_buffer.resize(0);
+
+    _stretcher->reset();
+
+}
+
+void TimeStretchPitchShifter::set_shift_factor(const float &shift_factor) {
+    _shift_factor = shift_factor;
+    _stretcher->set_stretch_factor(shift_factor);
+
+    _resampler.set_stretch_factor(shift_factor);
+}
--- a/pitchshifter.h
+++ b/pitchshifter.h
@ -0,0 +1,137 @@
+/*
+* (Real time) Pitch shifting object implementations
+*/
+#ifndef MENGA_PITCH_SHIFTER
+#define MENGA_PITCH_SHIFTER
+
+#include "correlation.h"
+#include "effect.h"
+#include "sampling.h"
+#include "timestretcher.h"
+#include "fft.h"
+#include <cstdint>
+#include "common.h"
+#include "cyclequeue.h"
+#include "vecdeque.h"
+#include <vector>
+
+namespace Mengu {
+namespace dsp {
+
+class PitchShifter: public Effect {
+public:
+
+    ~PitchShifter() {}
+    virtual InputDomain get_input_domain() override {
+        return InputDomain::Time;
+    }
+
+    virtual void set_shift_factor(const float &factor) {
+        _shift_factor = factor;
+    };
+
+    virtual std::vector<EffectPropDesc> get_property_descs() const override;
+    
+    virtual void set_property(uint32_t id, EffectPropPayload data) override;
+
+    virtual EffectPropPayload get_property(uint32_t id) const override;
+protected:
+    float _shift_factor = 1.0f;
+};
+
+// Shifter in the frequency domain that uses LPC to estimate formant preservation. 
+class PhaseVocoderPitchShifterV2: public PitchShifter {
+public:
+    PhaseVocoderPitchShifterV2();
+    ~PhaseVocoderPitchShifterV2();
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+
+    virtual void reset() override;
+private:
+    // raw time-domain data
+    VecDeque<Complex> _raw_buffer;
+    // time domain data after pitch_shift
+    VecDeque<Complex> _transformed_buffer;
+
+    static constexpr uint32_t ProcSize = 1 << 9;
+    static constexpr uint32_t OverlapSize = 1 << 6;
+
+    LPC<ProcSize, 30> _lpc;
+
+    uint32_t _samples_processed;
+};
+
+// Shifts by resampling a time stretcher
+class TimeStretchPitchShifter: public PitchShifter {
+public:
+    TimeStretchPitchShifter(TimeStretcher *stretcher, uint32_t nchannels);
+    ~TimeStretchPitchShifter();
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+
+    virtual void reset() override;
+    
+    virtual void set_shift_factor(const float &factor) override;
+
+protected:
+    float _formant_shift = 1.0f;
+private:
+    static constexpr uint32_t MinResampleInputSize = 1 << 10;
+    // Size of transformed buffer before we increase resampling to compensate for drift
+    static constexpr uint32_t IncreaseResampleThreshold = 5000;
+    static constexpr uint32_t StandardResampleThreshold = 3000;
+
+    // raw time-domain data
+    VecDeque<Complex> _raw_buffer;
+    // time domain data after pitch_shift
+    VecDeque<Complex> _transformed_buffer;
+    
+
+    LinearResampler _resampler;
+    TimeStretcher *_stretcher;
+
+
+};
+/*
+class PTimeStretchPitchShifter: public PitchShifter {
+public:
+    PTimeStretchPitchShifter(uint32_t nchannels);
+    ~PTimeStretchPitchShifter();
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() override;
+
+    virtual void reset() override;
+    
+    virtual void set_shift_factor(const float &factor) override;
+
+private:
+    static constexpr uint32_t MinResampleInputSize = 1 << 10;
+    // Size of transformed buffer before we increase resampling to compensate for drift
+    static constexpr uint32_t IncreaseResampleThreshold = 10000;
+    static constexpr uint32_t StandardResampleThreshold = 4000;
+
+    // raw time-domain data
+    VecDeque<Complex> _raw_buffer;
+    // time domain data after pitch_shift
+    VecDeque<Complex> _transformed_buffer;
+
+    LinearResampler _resampler;
+    PSOLATimeStretcher _pitch_shifting_stretcher;
+
+};
+*/
+}; // namespace dsp
+}; // namespace Mengu
+
+
+#endif
--- a/sampling.cpp
+++ b/sampling.cpp
@ -0,0 +1,78 @@
+#include "sampling.h"
+#include "common.h"
+#include "mengumath.h"
+#include "miniaudio.h"
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <iterator>
+#include <math.h>
+#include <vector>
+#include <iostream>
+
+using namespace Mengu;
+using namespace dsp;
+
+constexpr uint32_t DefaultSampleRate = 44100;
+
+// void Mengu::dsp::linear_resample_no_filter(const float *input, float *output, uint32_t size, float shift_factor, float def_val)
+
+
+LinearResampler::LinearResampler(uint32_t nchannels, float stretch_factor) :
+    _stretch_factor(stretch_factor),
+    _nchannels(nchannels) {
+    
+    ma_linear_resampler_config resampler_config = ma_linear_resampler_config_init(
+        ma_format_f32,
+        nchannels,
+        DefaultSampleRate, 
+        DefaultSampleRate * stretch_factor
+    );
+
+    ma_result result = ma_linear_resampler_init(&resampler_config, nullptr, &_resampler);
+
+    if (result != MA_SUCCESS) {
+        std::string err_msg ("Could not create resampler");
+        err_msg += std::to_string(result);
+
+        throw std::runtime_error(err_msg);
+    }
+    
+}
+
+LinearResampler::~LinearResampler() {
+    ma_linear_resampler_uninit(&_resampler, nullptr);
+}
+
+void LinearResampler::set_stretch_factor(float stretch_factor) {
+    _stretch_factor = stretch_factor;
+    ma_linear_resampler_set_rate_ratio(&_resampler, stretch_factor);
+}
+
+std::vector<Complex> LinearResampler::resample(const std::vector<Complex> &samples) {
+    std::vector<float> fsamples;
+    std::transform(samples.cbegin(), samples.cend(), std::back_inserter(fsamples),
+        [] (Complex f) { return f.real(); }
+    );
+
+    ma_uint64 input_size = fsamples.size();
+    ma_uint64 output_size;
+    ma_linear_resampler_get_expected_output_frame_count(&_resampler, samples.size(), &output_size);
+    
+    std::vector<float> foutput(output_size);
+    ma_result result = ma_linear_resampler_process_pcm_frames(&_resampler, fsamples.data(), &input_size, foutput.data(), &output_size);
+
+    if (result != MA_SUCCESS) {
+        std::string err_msg ("Could not perform resample");
+        err_msg += std::to_string(result);
+
+        throw std::runtime_error(err_msg);
+    }
+
+    std::vector<Complex> output;
+    std::transform(foutput.cbegin(), foutput.cend(), std::back_inserter(output),
+        [] (float f) { return Complex(f); }
+    );
+    
+    return output;
+}
--- a/sampling.h
+++ b/sampling.h
@ -0,0 +1,85 @@
+/**
+ * @file sampling.h
+ * @author 9exa
+ * @brief Objects that resamples signal frames
+ * @version 0.1
+ * @date 2023-05-08
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+#ifndef MENGA_SAMPLING
+#define MENGA_SAMPLING
+
+#include "miniaudio.h"
+#include "mengumath.h"
+#include "common.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace Mengu {
+namespace dsp {
+
+// Performs linear resampling without any filtering. Useful for resampling frequency domain directly
+template<class T>
+inline void linear_resample_no_filter(const T *input, 
+                                      T *output, 
+                                      uint32_t size, 
+                                      float shift_factor, 
+                                      T interp(T a, T b, float w) = &lerp,
+                                      T def_val = T()) {
+    float shift_rep = 1 / shift_factor;
+    // interpelate the positiobs in the input
+    if (shift_factor > 1.0f) {
+        // iterate from the back of the array to make the resample work in-place
+        for (uint32_t j = 0; j < size; j++) {
+            float inp_index = j * shift_rep;
+            uint32_t lower = uint32_t(inp_index);
+            float remainder = inp_index - lower;
+
+            output[j] = interp(input[lower], input[lower + 1], remainder);
+        }
+    }
+    else {
+        int j = size - 1;
+        int resample_start = (int) (size * shift_factor);
+        while (j >= resample_start) {
+            output[j] = def_val;
+            j -= 1;
+        }
+        while (j >= 0) {
+            int inp_index = j * shift_rep;
+            int lower = int(inp_index);
+            float remainder = inp_index - lower;
+
+            output[j] = interp(input[lower], input[lower + 1], remainder);
+            j -= 1;
+        }
+    }
+}
+
+// Just a wrapper around miniaudios resampler API, which uses linear resampling 
+// with a 4th order lowpass filter by default
+class LinearResampler {
+public:
+    LinearResampler(uint32_t nchannels, float stretch_factor);
+    ~LinearResampler();
+
+    void set_stretch_factor(float stretch_factor);
+
+    std::vector<Complex> resample(const std::vector<Complex> &samples);
+
+private:
+    ma_linear_resampler _resampler;
+
+    float _stretch_factor;
+    uint32_t _nchannels;
+
+
+};
+
+}
+}
+
+#endif
--- a/singletons.h
+++ b/singletons.h
@ -0,0 +1,42 @@
+/*
+    Store large objects used by multiple dsp objects in one place (i.e. FFT Transforms)
+
+*/ 
+#ifndef MENGA_SINGLETONS
+#define MENGA_SINGLETONS
+
+#include <unordered_map>
+#include "common.h"
+#include "fft.h"
+
+namespace Mengu {
+namespace dsp {
+
+struct Singletons {
+private:
+    static Singletons *_singleton;
+    std::unordered_map<uint32_t, FFT> _ffts;
+public:
+    static Singletons *get_singleton() {
+        if (_singleton = nullptr) {
+            _singleton = new Singletons();
+        }
+        return _singleton;
+    }
+    // getters are gaurenteed to return a valid object. If none exists one will be created
+
+    // ffts initialized to process predetermined length of signal
+    const FFT &get_fft(uint32_t size) {
+        
+        if (_ffts.find(size) != _ffts.end()) {
+            _ffts.emplace(size, FFT(size));
+        }
+        return _ffts.at(size);
+    }
+};
+
+
+}
+}
+
+#endif
--- a/timestretcher.cpp
+++ b/timestretcher.cpp
@ -0,0 +1,809 @@
+#include "timestretcher.h"
+#include "common.h"
+#include "correlation.h"
+#include "effect.h"
+#include "interpolation.h"
+#include "linalg.h"
+#include "mengumath.h"
+#include "vecdeque.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+using namespace Mengu;
+using namespace dsp;
+
+void TimeStretcher::set_stretch_factor(const float &scale) {
+    _stretch_factor = scale;
+}
+
+Effect::InputDomain TimeStretcher::get_input_domain() {
+    return InputDomain::Time;
+}
+
+std::vector<EffectPropDesc> TimeStretcher::get_property_descs() const {
+    return {
+        EffectPropDesc {
+            .type = EffectPropType::Slider,
+            .name = "stretch_factor",
+            .desc = "Scales the length of pushed signals by this amount",
+            .slider_data = {
+                .min_value = 0.5,
+                .max_value = 2,
+                .scale = Exp,
+            }
+        }
+    };
+};
+
+void TimeStretcher::set_property(uint32_t id, EffectPropPayload data) {
+    switch (id) {
+        case 0:
+            if (data.type == Slider) {
+                set_stretch_factor(data.value);
+            }
+            break;
+    };
+}
+
+EffectPropPayload TimeStretcher::get_property(uint32_t id) const {
+    switch (id) {
+        case 0:
+            return EffectPropPayload {
+                .type = Slider,
+                .value = _stretch_factor,
+            };
+            
+    };
+    return EffectPropPayload {};
+}
+
+
+static float _wrap_phase(float phase) {
+    return fposmod(phase + MATH_PI, MATH_TAU) - MATH_PI;
+}
+
+// difference between 2 unwrapped phases. since, phases are preiodic, picks the closest one to the estimate
+static float _phase_diff(float next, float prev, float est = 0.0f) {
+    return _wrap_phase(next - prev - est) + est;
+}
+
+PhaseVocoderTimeStretcher::PhaseVocoderTimeStretcher(bool preserve_formants) {
+    _preserve_formants = preserve_formants;
+    reset();
+}
+
+void PhaseVocoderTimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+}
+
+uint32_t PhaseVocoderTimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    while (n_transformed_ready() < size && _raw_buffer.size() >= WindowSize) {
+        std::array<Complex, WindowSize> sample;
+        _raw_buffer.to_array(sample.data(), WindowSize);
+        _load_new_freq_window(sample);
+
+        std::array<Complex, WindowSize / 2> curr_freqs;
+        std::copy(_lpc.get_freq_spectrum().cbegin(), _lpc.get_freq_spectrum().cbegin() + WindowSize / 2, curr_freqs.begin());
+
+        float analysis_hop_sizef = SynthesisHopSize / _stretch_factor;
+        _stretched_sample_truncated += std::modf(analysis_hop_sizef, &analysis_hop_sizef);
+        uint32_t analysis_hop_size = (uint32_t) analysis_hop_sizef;
+        if (_stretched_sample_truncated) {
+            analysis_hop_size += 1;
+            _stretched_sample_truncated -= 1;
+        }
+        
+        std::array<float, WindowSize / 2> amplitudes = _calc_scaled_magnitudes();
+        std::array<float, WindowSize / 2> new_phases = _calc_scaled_phases(curr_freqs, analysis_hop_size);
+
+        // std::array<Complex, WindowSize> new_samples = _calc_new_samples(amplitudes.data(), curr_raw_phases.data()); 
+        std::array<Complex, WindowSize> new_samples = _calc_new_samples(sample, amplitudes.data(), new_phases.data()); 
+        
+        mix_and_extend(_transformed_buffer, new_samples, WindowSize - SynthesisHopSize, hann_window);
+        // _transformed_buffer.extend_back(new_samples.data(), WindowSize);
+
+        std::transform(new_phases.cbegin(), new_phases.cend(), _last_scaled_phases.begin(), 
+            [] (float unwrapped) { return _wrap_phase(unwrapped); }
+        );
+
+        _raw_buffer.pop_front_many(nullptr, analysis_hop_size);
+
+    }
+
+    return _transformed_buffer.pop_front_many(output, MIN(n_transformed_ready(), size));
+}
+
+uint32_t PhaseVocoderTimeStretcher::n_transformed_ready() const {
+    return MAX(SynthesisHopSize, _transformed_buffer.size()) - (SynthesisHopSize);
+}
+
+void PhaseVocoderTimeStretcher::reset() {
+    _prev_raw_mag2s.fill(0.0f);
+    _prev_raw_phases.fill(0.0f);
+
+    _last_scaled_phases.fill(0.0);
+
+    _raw_buffer.resize(0);
+    _transformed_buffer.resize(SynthesisHopSize, 0);
+    _stretched_sample_truncated = 0.0;
+}
+
+// void PhaseVocoderTimeStretcher::set_stretch_factor(const float &stretch_factor) {
+//     _stretch_factor = stretch_factor;
+//     // _last_scaled_phases.fill(0.0f);
+// };
+
+std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_calc_scaled_magnitudes() {
+    std::array<float, WindowSize / 2> mags;
+    
+    if (_preserve_formants) {
+        const std::array<float, WindowSize> &envelope = _lpc.get_envelope();
+        const std::array<float, WindowSize> &residuals = _lpc.get_residuals();
+
+        for (uint32_t i = 0; i < WindowSize / 2; i++) {
+            const uint32_t stretched_ind = i * _stretch_factor;
+            if (stretched_ind < WindowSize / 2 && std::isfinite(residuals[i] * envelope[stretched_ind])) {
+                mags[i] = residuals[i] * envelope[stretched_ind];
+            }
+            else {
+                mags[i] = 0.0f;
+            }
+        }
+    }
+    else {
+        const std::array<Complex, WindowSize> &freqs = _lpc.get_freq_spectrum();
+        for (uint32_t i = 0; i < WindowSize / 2; i++) {
+            mags[i] = sqrt(std::norm(freqs[i]));
+        }
+    }
+    
+    return mags;
+}
+
+std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_calc_scaled_phases(
+        const std::array<Complex, WindowSize / 2> &curr_freqs,
+        const uint32_t hopsize) {
+    std::array<float, WindowSize / 2> curr_mag2s;
+    std::array<float, WindowSize / 2> curr_phases;
+    std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(), 
+        [] (Complex freq) { return std::norm(freq); }
+    );
+    std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(), 
+        [] (Complex freq) { return std::arg(freq); }
+    );
+
+
+    std::array<float, WindowSize / 2> phase_deltas;
+    float stretch_factor = (float) SynthesisHopSize / hopsize;
+    for (int i = 0; i < WindowSize / 2; i++) {
+        const float est = i * MATH_TAU * ((float) hopsize / WindowSize);// / stretch_factor;
+        phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est);
+    }
+    std::array<float, WindowSize / 2> new_phases;
+    for (int i = 0; i < WindowSize / 2; i++) {
+        new_phases[i] = _last_scaled_phases[i] + (phase_deltas[i] * stretch_factor);
+    }
+
+    _replace_prev_freqs(curr_mag2s, curr_phases);
+
+    return new_phases;
+}
+
+std::array<Complex, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderTimeStretcher::_load_new_freq_window(const std::array<Complex, WindowSize> &sample) {
+    _lpc.load_sample(sample.data());
+    
+    std::array<Complex, WindowSize / 2> new_freqs;
+    for (uint32_t i = 0; i < WindowSize / 2; i++) {
+        new_freqs[i] = _lpc.get_freq_spectrum()[i];
+    }
+
+    return new_freqs;
+}
+
+void PhaseVocoderTimeStretcher::_replace_prev_freqs(const std::array<float, WindowSize / 2> &curr_mag2s, const std::array<float, WindowSize / 2> &curr_phases) {
+    std::copy(curr_mag2s.cbegin(), curr_mag2s.cend(), _prev_raw_mag2s.begin());
+    std::copy(curr_phases.cbegin(), curr_phases.cend(), _prev_raw_phases.begin());
+}
+
+std::array<Complex, PhaseVocoderTimeStretcher::WindowSize> PhaseVocoderTimeStretcher::_calc_new_samples(
+            const std::array<Complex, WindowSize> &raw_samples,
+            const float *amplitudes, 
+            const float *phases) {
+    std::array<Complex, WindowSize> freqs = {0.0};
+    for (uint32_t i = 0; i < WindowSize / 2; i++) {
+        freqs[i] = 2.0f * std::polar(amplitudes[i], phases[i]);
+    }
+
+    std::array<Complex, WindowSize> new_samples;
+    _lpc.get_fft().inverse_transform(freqs.data(), new_samples.data());
+
+    // Make shifted as loud as raw samples
+    _loudness_norm.normalize(new_samples.data(), raw_samples.data(), new_samples.data());
+
+    return new_samples;
+}
+
+PhaseVocoderDoneRightTimeStretcher::PhaseVocoderDoneRightTimeStretcher(bool preserve_formants) : PhaseVocoderTimeStretcher(preserve_formants) {}
+
+std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderDoneRightTimeStretcher::_calc_scaled_phases(
+        const std::array<Complex, WindowSize / 2> &curr_freqs,
+        const uint32_t hopsize) {
+    std::array<float, WindowSize / 2> curr_mag2s;
+    std::array<float, WindowSize / 2> curr_phases;
+    std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_mag2s.begin(), 
+        [] (Complex freq) { return std::norm(freq); }
+    );
+    std::transform(curr_freqs.cbegin(), curr_freqs.cend(), curr_phases.begin(), 
+        [] (Complex freq) { return std::arg(freq); }
+    );
+
+    float stretch_factor = (float) SynthesisHopSize / hopsize;
+    std::array<float, WindowSize / 2> time_phase_deltas;
+    for (int i = 0; i < WindowSize / 2; i++) {
+        const float est = i * MATH_TAU * ((float) hopsize / WindowSize);
+        time_phase_deltas[i] = _phase_diff(curr_phases[i], _prev_raw_phases[i], est);
+    }
+
+    std::array<float, WindowSize / 2> freq_phase_deltas;
+    for (uint32_t i = 1; i < WindowSize / 2 - 1; i++) {
+        const float up_delta = _wrap_phase(curr_phases[i + 1] - curr_phases[i]);
+        const float down_delta = _wrap_phase(curr_phases[i] - curr_phases[i - 1]);
+        freq_phase_deltas[i] = 0.5f * (up_delta + down_delta);
+    }
+    freq_phase_deltas[0] = _wrap_phase(curr_phases[1] - curr_phases[0]);
+    freq_phase_deltas[WindowSize / 2 - 1] = _wrap_phase(curr_phases[WindowSize / 2 - 1] - curr_phases[WindowSize / 2 - 2]);
+
+    std::array<float, WindowSize / 2> new_phases = _propagate_phase_gradients (
+        time_phase_deltas,
+        freq_phase_deltas,
+        _last_scaled_phases,
+        _prev_raw_mag2s,
+        curr_mag2s,
+        stretch_factor,
+        1e-3f
+    );
+
+    // for (int i = 0; i < WindowSize / 2; i++) {
+    //     new_phases[i] = _last_scaled_phases[i] + (time_phase_deltas[i] * stretch_factor);
+    // }
+
+    _replace_prev_freqs(curr_mag2s, curr_phases);
+    return new_phases;
+}
+
+std::array<float, PhaseVocoderTimeStretcher::WindowSize / 2> PhaseVocoderDoneRightTimeStretcher::_propagate_phase_gradients(
+        const std::array<float, WindowSize / 2> &phase_time_deltas, 
+        const std::array<float, WindowSize / 2> &phase_freq_deltas, 
+        const std::array<float, WindowSize / 2> &last_stretched_phases, 
+        const std::array<float, WindowSize / 2> &prev_freq_mags,
+        const std::array<float, WindowSize / 2> &next_freq_mags,
+        const float stretch_factor,
+        const float tolerance) {
+    // sort indexes to the next frequencies based on the magnitude of that bin, in descending order
+    // also, bins with magnitude under the threshold do not propagate in the frequency domain
+    float max_mag = 0.0f;
+    for (uint32_t i = 0; i < WindowSize / 2; i++) {
+        max_mag = MAX(MAX(max_mag, prev_freq_mags[i]), next_freq_mags[i]);
+    }
+    const float abs_tol = max_mag * (tolerance * tolerance);
+
+    // Used to store indexs to each frequencing in the propagation_queue
+    struct FreqBin {
+        enum {
+            Prev,
+            Next,
+        };
+        uint8_t frame;
+        uint32_t bin; 
+    };
+    const auto freq_bin_cmp = [&prev_freq_mags, &next_freq_mags] (FreqBin a, FreqBin b) { 
+        float a_mag = (a.frame == FreqBin::Prev) ? prev_freq_mags[a.bin] : next_freq_mags[a.bin];
+        float b_mag = (b.frame == FreqBin::Prev) ? prev_freq_mags[b.bin] : next_freq_mags[b.bin];
+        return a_mag < b_mag;
+    };
+    
+    // Max Heap of the frequency bins to propagate. (Listen I REALLY don't want allocate dynamically)
+    std::array<FreqBin, WindowSize * 3> propagation_queue;
+    uint32_t propagation_queue_size = WindowSize / 2;
+    for (uint32_t i = 0; i < WindowSize / 2; i++) {
+        propagation_queue[i] = FreqBin { .frame = FreqBin::Prev, .bin = i };
+    }
+    std::make_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
+
+    // Set of frequency bins to propagate to
+    bool can_recieve_propagation[WindowSize / 2];
+    uint32_t n_can_recieve_propagation = WindowSize / 2;
+    for (uint32_t i = 0; i < WindowSize / 2; i++) {
+        if ((next_freq_mags[i] < abs_tol)) {
+            can_recieve_propagation[i] = false;
+            n_can_recieve_propagation -= 1;
+        }
+        else {
+            can_recieve_propagation[i] = true;
+        }
+    }
+
+    // perform propagation in all dimension
+    std::array<float, WindowSize / 2> new_phases {0};
+    while (n_can_recieve_propagation > 0) {
+        std::pop_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
+        FreqBin next_bin = propagation_queue[propagation_queue_size - 1];
+        propagation_queue_size -= 1;
+
+        const uint32_t freq_ind = next_bin.bin;
+        if (next_bin.frame == FreqBin::Prev) {
+            if (can_recieve_propagation[freq_ind]) {
+                new_phases[freq_ind] = last_stretched_phases[freq_ind] + (phase_time_deltas[freq_ind] * stretch_factor);
+                
+                //remove from set
+                can_recieve_propagation[freq_ind] = false;
+                n_can_recieve_propagation -= 1;
+
+                // push to the heap
+                propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_ind};
+                propagation_queue_size += 1;
+                std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
+            }
+        }
+        else {
+            if (freq_ind > 0 && can_recieve_propagation[freq_ind - 1]) {
+                const uint32_t freq_down = freq_ind - 1;
+
+                new_phases[freq_down] = new_phases[freq_ind] - (0.5 * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]) * stretch_factor);
+
+                //remove from set
+                can_recieve_propagation[freq_down] = false;
+                n_can_recieve_propagation -= 1;
+
+                // push to the heap
+                propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_down};
+                propagation_queue_size += 1;
+                std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
+            }
+            if ((freq_ind < WindowSize / 2 - 1) && can_recieve_propagation[freq_ind + 1]) {
+                const uint32_t freq_up = freq_ind + 1;
+                new_phases[freq_up] = new_phases[freq_ind] + (0.5 * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]) * stretch_factor);
+
+                //remove from set
+                can_recieve_propagation[freq_up] = false;
+                n_can_recieve_propagation -= 1;
+
+                // push to the heap
+                propagation_queue[propagation_queue_size] = FreqBin {.frame = FreqBin::Next, .bin = freq_up};
+                propagation_queue_size += 1;
+                std::push_heap(propagation_queue.begin(), propagation_queue.begin() + propagation_queue_size, freq_bin_cmp);
+            }
+        }
+    }
+
+
+    // std::array<float, WindowSize / 2> new_phases {0};
+    // auto freq_ind_cmp = [next_freq_mags] (uint32_t a, uint32_t b) { return next_freq_mags[a] > next_freq_mags[b]; };
+    // std::array<uint32_t, WindowSize / 2> freq_inds;
+    // for (uint32_t i = 0; i < WindowSize / 2; i++) { freq_inds[i] = i;}
+    // std::sort(freq_inds.begin(), freq_inds.end(), freq_ind_cmp);
+
+    // std::array<float, WindowSize / 2> prop_source_mag {0.0f};
+    // for (uint32_t i = 0; i < WindowSize / 2; i++) {
+    //     new_phases[i] = last_stretched_phases[i] + phase_time_deltas[i] * stretch_factor;
+    //     prop_source_mag[i] = prev_freq_mags[i];
+    // }
+    // for (const uint32_t freq_ind: freq_inds) {
+    //     if (freq_ind > 0 && prop_source_mag[freq_ind - 1] < next_freq_mags[freq_ind]) {
+    //         const uint32_t freq_down = freq_ind - 1;
+    //         new_phases[freq_down] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_down] + phase_freq_deltas[freq_ind]);
+    //         prop_source_mag[freq_down] = next_freq_mags[freq_ind];
+    //     }
+    //     if (freq_ind < WindowSize / 2 && prop_source_mag[freq_ind + 1] < next_freq_mags[freq_ind]) {
+    //         const uint32_t freq_up = freq_ind + 1;
+    //         new_phases[freq_up] = new_phases[freq_ind] - 0.5 * stretch_factor * (phase_freq_deltas[freq_up] + phase_freq_deltas[freq_ind]);
+    //         prop_source_mag[freq_up] = next_freq_mags[freq_ind];
+    //     }
+
+
+    //     prop_source_mag[freq_ind] = 2e10;
+    // }
+
+    return new_phases;   
+}
+
+OLATimeStretcher::OLATimeStretcher(uint32_t w_size) {
+    _window_size = w_size;
+    _overlap = _window_size / 5;
+    _selection_window = _window_size / 2;
+
+    _transformed_buffer.resize(_window_size);
+}
+
+template<class T>
+static void mix_into_extend_by_pointer(const Complex *new_data,
+                                       T &output,
+                                       const uint32_t &window_size,
+                                       const uint32_t &overlap_size) {
+    uint32_t i = 0;
+    
+    for (; i < overlap_size; i++) {
+        const float w = hann_window((float) i / overlap_size);
+        const uint32_t output_ind = output.size() - overlap_size + i;
+
+        output[output_ind] = lerp(new_data[i], output[output_ind], w);
+    }
+
+    for (; i < window_size; i++) {
+        output.push_back(new_data[i]);
+    }
+}
+
+void OLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+
+}
+
+
+uint32_t OLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    // Do stretchy
+    // Theoretical interval between samples per process 
+    const uint32_t sample_skip = (_window_size - _overlap) / _stretch_factor;
+
+    // based on the example given by https://www.surina.net/article/time-and-pitch-scaling.html
+    const uint32_t length_for_process = MAX(sample_skip, _window_size + _selection_window);
+
+    while (_raw_buffer.size() > length_for_process) {
+        std::vector<Complex> new_data(_window_size + _selection_window);
+        _raw_buffer.to_array(new_data.data(), _window_size + _selection_window);
+
+        std::vector<Complex> prev_tail(_overlap);
+        _transformed_buffer.pop_back_many(prev_tail.data(), _overlap);
+
+        // find best start for overlap
+        uint32_t overlap_ind = find_max_correlation_quad(prev_tail.data(), new_data.data(), _overlap, _selection_window);
+
+        mix_into_extend_by_pointer(new_data.data() + overlap_ind, prev_tail, _window_size, _overlap);
+
+        for (auto v: prev_tail) {
+            _transformed_buffer.push_back(v);
+        }
+
+        _raw_buffer.pop_front_many(nullptr, sample_skip);
+    }
+
+
+    uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
+
+    for (uint32_t i = n; i < size; i++ ) {
+        output[i] = 0;
+    }
+
+    return n;
+}
+
+uint32_t OLATimeStretcher::n_transformed_ready() const {
+    return _transformed_buffer.size() - _overlap;
+}
+
+void OLATimeStretcher::reset() {
+    _raw_buffer.resize(0);
+    _transformed_buffer.resize(_window_size, 0);
+}
+
+WSOLATimeStretcher::WSOLATimeStretcher() {
+    // _transformed_buffer.resize(MaxBackWindowOverlap);
+}
+
+void WSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+
+    // perform the stretchy
+    // if (_raw_buffer.size() > SampleProcSize) {
+    //     std::array<Complex, SampleProcSize> samples;
+
+    //     _raw_buffer.to_array(samples.data(), SampleProcSize);
+
+    //     // do the stretchy
+    //     uint32_t frames_used = _stretch_sample_and_add(samples.data());
+
+    //     // delete everything used
+    //     _raw_buffer.pop_front_many(nullptr, frames_used);
+    //     _last_overlap_start -= frames_used;
+    //     _next_overlap_start -= frames_used;
+
+
+    // }
+}
+
+uint32_t WSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    // lazily perform the stretchy
+    while ((_raw_buffer.size() > SampleProcSize) && (n_transformed_ready() < size)) {
+        std::array<Complex, SampleProcSize> samples;
+
+        _raw_buffer.to_array(samples.data(), SampleProcSize);
+
+        // do the stretchy
+        uint32_t frames_used = _stretch_sample_and_add(samples.data());
+
+        // delete everything used
+        _raw_buffer.pop_front_many(nullptr, frames_used);
+        _last_overlap_start -= frames_used;
+        _next_overlap_start -= frames_used;
+    }
+
+    uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
+    for (uint32_t i = n; i < size; i++) {
+        output[i] = 0;
+    }
+    return n;
+}
+
+uint32_t WSOLATimeStretcher::n_transformed_ready() const {
+    return _transformed_buffer.size();
+}
+
+void WSOLATimeStretcher::reset() {
+    _raw_buffer.resize(0);
+    _transformed_buffer.resize(0);
+
+}
+
+uint32_t WSOLATimeStretcher::_stretch_sample_and_add(const Complex *sample) {
+    // base overlap
+    const uint32_t overlap_size = WindowSize / 4;
+    // search forward for better overlap point
+    const uint32_t search_window = WindowSize / 5;
+    const uint32_t flat_duration = WindowSize - 2 * overlap_size;
+
+    // consider te amount of frames skiped through truncation
+    double sample_skipd;
+    double dropped_per_window = std::modf((WindowSize - overlap_size) / _stretch_factor, &sample_skipd);
+
+    const uint32_t sample_skip = sample_skipd;
+    
+    while (_next_overlap_start + WindowSize < SampleProcSize) {
+        // Find insertion that best fits the new sample
+        uint32_t prev_not_overlapped = find_max_correlation(
+            sample + _next_overlap_start, 
+            sample + _last_overlap_start, 
+            overlap_size, 
+            search_window
+        );
+
+        uint32_t actual_last_overlap = _last_overlap_start + prev_not_overlapped;
+        const uint32_t actually_overlapped = overlap_size - prev_not_overlapped;
+
+        Complex overlap_buffer[SampleProcSize / 2];
+        
+        overlap_add(
+            sample + actual_last_overlap, 
+            sample + _next_overlap_start,
+            overlap_buffer,
+            actually_overlapped,
+            hamming_window
+        );
+
+        // append new data
+        _transformed_buffer.extend_back(sample + _last_overlap_start, prev_not_overlapped);
+        _transformed_buffer.extend_back(overlap_buffer, actually_overlapped);
+        // take 2 * prev_not_overlapped from both sides of the flat duration
+        _transformed_buffer.extend_back(sample + _next_overlap_start + actually_overlapped, flat_duration);
+
+        // set for next cycle
+        _last_overlap_start = _next_overlap_start + actually_overlapped + flat_duration;
+        _next_overlap_start = _next_overlap_start + sample_skip;
+
+        _stretched_sample_truncated += dropped_per_window;
+
+        if (_stretched_sample_truncated > 1.0) {
+            _next_overlap_start += 1;
+            _stretched_sample_truncated -= 1.0;
+        }
+    }
+
+    return MIN(_last_overlap_start, _next_overlap_start);
+}
+
+PSOLATimeStretcher::PSOLATimeStretcher() {
+    _transformed_buffer.resize(MaxBackWindowOverlap);
+}
+
+
+void PSOLATimeStretcher::push_signal(const Complex *input, const uint32_t &size) {
+    _raw_buffer.extend_back(input, size);
+}
+
+uint32_t PSOLATimeStretcher::pop_transformed_signal(Complex *output, const uint32_t &size) {
+    // lazily perform the stretchy
+    while ((_raw_buffer.size() > SampleProcSize) && (size > n_transformed_ready())) {
+        std::array<Complex, SampleProcSize> samples;
+        std::array<Complex, SampleProcSize> windowed_samples;
+
+        _raw_buffer.to_array(samples.data(), SampleProcSize);
+        _raw_buffer.to_array(windowed_samples.data(), SampleProcSize);
+
+        window_ends(windowed_samples.data(), SampleProcSize, SampleProcSize / 10, hann_window);
+
+        int est_freq = _est_fund_frequency(windowed_samples.data());
+        
+        uint32_t est_period = (1.0 / est_freq) * SampleProcSize / 2;
+        
+        std::vector<uint32_t> est_peaks = _find_upcoming_peaks(samples.data(), est_period);
+
+        _stretch_peaks_and_add(samples.data(), est_peaks);
+
+        // delete everything used
+        _raw_buffer.pop_front_many(nullptr, est_peaks.back() + 1); // +1 because est_peaks are the INDEX of the peaks, not the num of frames used
+    }
+    
+    uint32_t n = _transformed_buffer.pop_front_many(output, MIN(size, n_transformed_ready()));
+
+    for (uint32_t i = n; i < size; i++) {
+        output[i] = 0;
+    }
+
+    return n;
+}
+
+uint32_t PSOLATimeStretcher::n_transformed_ready() const {
+    return _transformed_buffer.size() - MaxBackWindowOverlap;
+}
+
+void PSOLATimeStretcher::reset() {
+    _transformed_buffer.resize(MaxBackWindowOverlap, 0);
+    _raw_buffer.resize(0);
+
+}
+
+int PSOLATimeStretcher::_est_fund_frequency(const Complex *samples) {
+    // Todo: move all this to a PitchDetecter object or something
+    
+    _lpc.load_sample(samples);
+    const std::array<float, SampleProcSize> residuals = _lpc.get_residuals();
+
+    // find candidate from residual peaks (only use positive half of the spectrum)
+    std::vector<float> srhs = calc_srhs(residuals.data(), residuals.size() / 2, MinFreqInd, MaxFreqInd, 10);
+
+    float max_srhs = -10e32;
+    int max_pitch_ind = MaxFreqInd;
+    for (int i = 0; i < srhs.size(); i++) {
+        if (srhs[i] > max_srhs) {
+            max_pitch_ind = MinFreqInd + i;
+            max_srhs = srhs[i];
+        }
+    }
+
+    return max_pitch_ind;
+}
+
+std::vector<uint32_t> PSOLATimeStretcher::_find_upcoming_peaks(const Complex *samples, const uint32_t est_period) {
+    // assume that peaks are around est_period apart, but give some sllack as pitches change slightly
+    const uint32_t search_start = 0.8 * est_period;
+    const uint32_t search_end = 1.2 * est_period;
+
+    std::vector<uint32_t> peaks; 
+    uint32_t last_peak = 0;
+
+    while ((last_peak + est_period) < SampleProcSize) {
+        uint32_t peak = last_peak + search_start;
+        float peak_size = 0.0f;
+
+        for (uint32_t i = last_peak + search_start; i < MIN(SampleProcSize, last_peak + search_end); i++) {
+            if (samples[i].real() > peak_size) {
+                peak_size = samples[i].real();
+                peak = i;
+            }
+        }
+
+        peaks.push_back(peak);
+        last_peak = peak;
+    }
+    
+    return peaks;
+}
+
+// // overlap and extend without applying a window function first
+// // overlap_size may be larger than new_data.size(), in which case only new_data.size() is added. the offset is the same
+template<class T, class NewT>
+static void _mix_and_extend_no_window(T &array, const NewT new_data, const uint32_t &overlap_size) {
+    
+    uint32_t i = 0;
+
+    for(; i < MIN(overlap_size, new_data.size()); i++) {
+        uint32_t ind = array.size() - overlap_size + i;
+        array[ind] = array[ind] + new_data[i];
+    }
+
+    for (; i < new_data.size(); i++) {
+        array.push_back(new_data[i]);
+    }
+
+}
+
+void PSOLATimeStretcher::_stretch_peaks_and_add(const Complex *samples, const std::vector<uint32_t> &est_peaks) {
+
+    // accumulate by collecting the left halves and right halves if each peak sepeartly
+    std::vector<std::vector<Complex>> right_windows;
+    std::vector<std::vector<Complex>> left_windows;
+
+    uint32_t last_peak = 0;
+    for (uint32_t next_peak: est_peaks) {
+        std::vector<Complex> left_window;
+        std::vector<Complex> right_window;
+
+        for (uint32_t i = last_peak; i < next_peak; i++) {
+            float w = (float) (i - last_peak) / (next_peak - last_peak);
+            w = hann_window(w);
+
+            left_window.emplace_back(w * samples[i]);
+            right_window.emplace_back((1.0f - w) * samples[i]);
+        }
+
+        left_windows.push_back(std::move(left_window));
+        right_windows.push_back(std::move(right_window));
+
+        last_peak = next_peak;
+    }
+
+    // arrange the peaks together
+    last_peak = 0;
+
+    
+    for (uint32_t i = 0; i < left_windows.size(); i++) {
+        const std::vector<Complex> left_window = std::move(left_windows[i]);
+        const std::vector<Complex> right_window = std::move(right_windows[i]);
+
+        // use overlapsize of previous period to make the right window continuous with the previous left window
+        _mix_and_extend_no_window(_transformed_buffer, right_window, _next_right_window_overlap);
+
+
+        // calculate the frames overlap taking the truncated part of previous processes into account
+        int curr_period = est_peaks[i] - last_peak;
+        double overlap_sized;
+        _stretched_sample_truncated += std::abs(std::modf((2.0 - _stretch_factor) * curr_period, &overlap_sized));
+        
+        uint32_t overlap_size = std::abs(overlap_sized);
+        if (_stretched_sample_truncated > 1.0f) {
+            overlap_size += 1;
+            _stretched_sample_truncated -= 1.0f;
+        }
+
+
+        if (_stretch_factor >= 1.0f) {
+            if (_stretch_factor > 2.0f) {
+
+                // pad the gap between each side with 0s
+                uint32_t padding_size = std::move(overlap_size);
+            
+                for (uint32_t j = 0; j < padding_size; j++) {
+                    _transformed_buffer.push_back(0);
+                }
+
+                _transformed_buffer.extend_back(left_window.data(), left_window.size());
+            }
+            else {
+                // some parts are overlapped
+                // mix it with the left window of the next
+                _mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size);
+
+            }
+
+            // the end of the _transform buffer is the end of the left window, which is equivalenting to setting both of these values to 0
+            _next_right_window_overlap = 0;
+        }
+        else {
+            // since the left peak ends before the right peak ends, we have to overlap both the right and left peaks into the transformed data
+            _mix_and_extend_no_window(_transformed_buffer, left_window, overlap_size);
+            
+            _next_right_window_overlap = overlap_size - curr_period;
+        }
+
+
+
+        last_peak = est_peaks[i];
+    }
+}
--- a/timestretcher.h
+++ b/timestretcher.h
@ -0,0 +1,226 @@
+#ifndef MENGA_TIME_STRETCHER
+#define MENGA_TIME_STRETCHER
+
+#include "common.h"
+#include "correlation.h"
+#include "effect.h"
+#include "fft.h"
+#include "loudness.h"
+#include "vecdeque.h"
+#include <array>
+#include <cstdint>
+#include <vector>
+
+namespace Mengu {
+namespace dsp {
+
+class TimeStretcher: public Effect {
+public:
+    virtual ~TimeStretcher() {}
+    virtual InputDomain get_input_domain() override;
+
+    virtual void set_stretch_factor(const float &scale);
+
+    virtual std::vector<EffectPropDesc> get_property_descs() const override;
+
+    virtual void set_property(uint32_t id, EffectPropPayload data) override;
+
+    virtual EffectPropPayload get_property(uint32_t id) const override;
+
+protected:
+    float _stretch_factor = 1.0f;
+
+
+    // keep track of resampled size error due to rounding errors
+    double _stretched_sample_truncated = 0.0;
+};
+
+// Classic timeshifter be scale the phases of frequency bins in the time dimension
+class PhaseVocoderTimeStretcher: public TimeStretcher {
+public:
+    PhaseVocoderTimeStretcher(bool _preserve_formants = false);
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+
+    // virtual void set_stretch_factor(const float &stretch_factor) override;
+
+    virtual void reset() override;
+protected:
+
+    static constexpr uint32_t WindowSize = 1 << 9;
+    static constexpr uint32_t SynthesisHopSize = 400;
+
+    static constexpr uint32_t NStoredWindows = 2;
+
+    std::array<float, WindowSize / 2> _prev_raw_mag2s;
+    std::array<float, WindowSize / 2> _prev_raw_phases;
+    // phases of the last transformed samples
+    std::array<float, WindowSize / 2> _last_scaled_phases;
+
+    VecDeque<Complex> _raw_buffer;
+    VecDeque<Complex> _transformed_buffer;
+
+    virtual std::array<float, WindowSize / 2> _calc_scaled_magnitudes();
+    // expects time_deltas to be scaled
+    virtual std::array<float, WindowSize / 2> _calc_scaled_phases(const std::array<Complex, WindowSize / 2> &curr_freqs, const uint32_t hopsize);
+
+    std::array<Complex, WindowSize / 2> _load_new_freq_window(const std::array<Complex, WindowSize> &sample);
+    void _replace_prev_freqs(const std::array<float, WindowSize / 2> &curr_mags, const std::array<float, WindowSize / 2> &curr_phases);
+private:
+
+    // FFT _fft;
+    // for finding the envelope and doing fft
+    LPC<WindowSize, 50> _lpc;
+
+    bool _preserve_formants = false;
+
+    // Used to make sure the percieved loudness of the sample is preserved
+    LoudnessNormalizer<Complex, WindowSize, 1> _loudness_norm;
+    
+    std::array<Complex, WindowSize> _calc_new_samples(const std::array<Complex, WindowSize> &raw_samples, const float *amplitudes, const float *phase_deltas);
+
+};
+
+// 'Phase vocoder done right' implementation
+class PhaseVocoderDoneRightTimeStretcher: public PhaseVocoderTimeStretcher {
+public:
+    PhaseVocoderDoneRightTimeStretcher(bool _preserve_formants = false);
+protected:
+    virtual std::array<float, WindowSize / 2> _calc_scaled_phases(const std::array<Complex, WindowSize / 2> &curr_phases, const uint32_t hopsize) override;
+private:
+    // Phase propagation algorithm as described in the paper
+    std::array<float, WindowSize / 2> _propagate_phase_gradients(const std::array<float, WindowSize / 2> &phase_time_deltas, 
+                                                             const std::array<float, WindowSize / 2> &phase_freq_deltas, 
+                                                             const std::array<float, WindowSize / 2> &last_stretched_phases, 
+                                                             const std::array<float, WindowSize / 2> &prev_mag2s,
+                                                             const std::array<float, WindowSize / 2> &next_mag2s,
+                                                             const float stretch_factor,
+                                                             const float tolerance = 1e-5f);
+};
+
+// Syncronised OverLap and Add time stretcher with fixed window size
+class OLATimeStretcher: public TimeStretcher {
+public:
+    OLATimeStretcher(uint32_t w_size);
+
+    static inline const float MinScale = 0.05;
+
+    // std::vector<Complex> stretch_and_overlap_window2(const Complex *input);
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+
+    virtual void reset() override;
+    
+private:
+    uint32_t _window_size;
+
+    uint32_t _overlap;
+    uint32_t _selection_window;
+    // uint32_t _sample_skip;
+
+    float _desired_extension = 0.0f;
+
+    VecDeque<Complex> _raw_buffer;
+    VecDeque<Complex> _transformed_buffer;
+
+};
+
+// timestrech where extensions are added to best match wave form with. Fixed window size. not fixed soutput size (may be slightly longer)
+class WSOLATimeStretcher: public TimeStretcher {
+public:
+    WSOLATimeStretcher();
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+    
+    virtual void reset() override;
+private:
+    VecDeque<Complex> _raw_buffer;
+    VecDeque<Complex> _transformed_buffer;
+
+    // length of the the input buffer must be before transforming and size of arrays in intermediate calculations. Should catch up to 1000hz
+    static constexpr uint32_t SampleProcSize = 1 << 11;
+
+    // length of a window
+    static constexpr uint32_t WindowSize = 1 << 9;
+
+    // stretches the sample, and adds it to the transform buffer, tje position of each window is based on the autocorrelation
+    // returns how many frames were used and can be discarded
+    uint32_t _stretch_sample_and_add(const Complex *sample);
+
+    // Basically, the beginning of the next overlap(_left_tail_start) can be before the beggining of the last overlap(_right_tail_start)
+    // and the size of the overlap can change on each process. So store both.
+    uint32_t _last_overlap_start = 0;
+    uint32_t _next_overlap_start = 0;
+
+
+};
+
+// Formant preserving, pitch changing time stretch
+// Inherits a lot of algorithmic functionality from SOLA
+class PSOLATimeStretcher: public TimeStretcher {
+public:
+    PSOLATimeStretcher();
+
+    virtual void push_signal(const Complex *input, const uint32_t &size) override;
+    virtual uint32_t pop_transformed_signal(Complex *output, const uint32_t &size) override;
+
+    virtual uint32_t n_transformed_ready() const override;
+
+    virtual void reset() override;
+private:
+
+    VecDeque<Complex> _raw_buffer;
+    VecDeque<Complex> _transformed_buffer;
+
+    static constexpr uint32_t SampleProcSize = 1 << 11;
+
+    // basically how accurate the reconstruction will be. Assumes ~44kHz input, good for operation in up to 16kHz
+    static constexpr uint32_t LPCSize = (uint32_t) 1.25 * 16;
+
+    static constexpr uint32_t MinFreqHz = 50;
+    static constexpr uint32_t MaxFreqHz = 800;
+
+    static constexpr uint32_t InputSampleRate = 44100;
+
+    static constexpr uint32_t MinFreqInd = MAX(MinFreqHz * SampleProcSize / InputSampleRate, 2); 
+    static constexpr uint32_t MaxFreqInd = MAX(MaxFreqHz * SampleProcSize / InputSampleRate, MinFreqInd * 2); 
+
+    // the amount of frames in _transformed_buffer that need to remain in case of overlapping future samples
+    static constexpr uint32_t MaxBackWindowOverlap = (1.0f / MinFreqHz * InputSampleRate) + 1;
+
+    // for finding fundemental frequency
+    // FFT _fft;
+    LPC<SampleProcSize, LPCSize> _lpc;
+
+    // returns the index of the fundemental frequency (pitch) in an fft of samples
+    int _est_fund_frequency(const Complex *sample);
+    // store the last estimated pitches. the median will be used
+    VecDeque<int> _last_periods;
+
+
+    //used to meld windows in the same sample of different length (peaks are not uniformly spaced)
+    uint32_t _next_right_window_overlap = 0;
+
+    // estimate the peaks in the upcoming sample
+    std::vector<uint32_t> _find_upcoming_peaks(const Complex *samples, const uint32_t est_period);
+
+    // stretches the sample, and adds it to the transform buffer;
+    void _stretch_peaks_and_add(const Complex *samples, const std::vector<uint32_t> &est_peaks);
+    
+
+
+};
+
+}
+}
+
+#endif
--- a/vecdeque.h
+++ b/vecdeque.h
@ -0,0 +1,243 @@
+/**
+ * @file vecdeque.h
+ * @author 9exa
+ * @brief An dynamically-sized array where elements can be added on either end. Useful for queues
+ *  Supposed to be like Rusts VecDeque, implemented with a ring_buffer
+ * @version 0.1
+ * @date 2023-04-22
+ * 
+ * @copyright Copyright (c) 2023
+ * 
+ */
+#ifndef MENGA_VECDEQUE
+#define MENGA_VECDEQUE
+
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include "mengumath.h"
+
+namespace Mengu {
+
+template<class T>
+class VecDeque {
+private:
+    T *_data = nullptr;
+
+    uint32_t _front = 0;
+    // uint32_t _end = 0;
+    uint32_t _size = 0;
+    uint32_t _capacity = 0;
+
+public:
+    VecDeque() {}
+    VecDeque(const VecDeque &from) {
+        _front = from._front;
+        _size = from._size;
+        _capacity = from._capacity;
+
+        if (_capacity > 0) {
+            _data = new T[_capacity];
+            for (uint32_t i = 0; i < _capacity; i++) {
+                _data[i] = from._data[i];
+            }
+        }
+        
+    }
+
+    ~VecDeque() {
+        if (_data != nullptr) {
+            delete[] _data;
+        }
+    }
+
+    inline uint32_t size() const {
+        return _size;
+    }
+
+    inline uint32_t capacity() const {
+        return _capacity;
+    }
+
+    inline const T *data() const {
+        return _data;
+    } 
+
+    void reserve(const uint32_t &new_cap) {
+        if (_capacity < new_cap) {
+            T *new_data = new T[new_cap];
+
+            // copy and initialise new array
+            uint32_t i = 0;
+            if (_data != nullptr) {
+                for (; i < _size; i++) {
+                    new_data[i] = std::move(_data[(_front + i) % _capacity]);
+                }
+                delete[] _data;
+                _front = 0;
+            }
+            _data = new_data;
+            _capacity = new_cap;
+            if (_size > _capacity) {
+                resize(_capacity);
+            }
+        }
+    }
+
+    void resize(const uint32_t &new_size) {
+        if (_capacity < new_size) {
+            uint32_t new_cap = MAX(_capacity, 1);
+            while (new_cap < new_size) {
+                new_cap = new_cap << 1; // if you leave out the new_cap = the optimizer just skips this loop. 
+                                // Which makes this infinite loop bug hard to spot
+
+            }
+            reserve(new_cap);
+        }
+        if (new_size > _size) {
+            // expand from back
+            if ( _size > 0) {
+                for (uint32_t i = _size; i < new_size; i++) {
+                    _data[(_front + i) % _capacity] = _data[(_front + _size - 1) % _capacity];
+                }
+            }
+            else {
+                for (uint32_t i = _size; i < new_size; i++) {
+                    _data[(_front + i) % _capacity] = T();
+                }
+            }
+            
+        }
+        _size = new_size;
+    }
+
+    void resize(const uint32_t &new_size, const T &x) {
+        resize(new_size);
+
+        for (uint32_t i = 0; i < new_size; i++) {
+            _data[(_front + i) % _capacity] = x;
+        }
+    }
+
+    inline void push_back(const T &x) {
+        resize(_size + 1);
+        _data[(_front + _size - 1) % _capacity] = x;
+    }
+
+    inline void push_front(const T &x) {
+        resize(_size + 1);
+        _front = (_front == 0) ? _capacity - 1 : _front - 1;
+        _data[_front] = x;
+    }
+
+    // adds elements to the end of the queue
+    inline void extend_back(const T *array, const uint32_t n) {
+        uint32_t old_size = _size;
+        resize(_size + n);
+
+        for (uint32_t i = 0; i < n; i++) {
+            _data[(_front + old_size + i) % _capacity] = array[i];
+        }
+    }
+    // moves at most n elements from the front of the queue to the array output. outputs memory must be validated elsewhere
+    inline uint32_t pop_front_many(T *output, uint32_t n) {
+        n = MIN(n, _size); 
+
+        if (output != nullptr) {
+            for (uint32_t i = 0; i < n; i++) {
+                output[i] = _data[(_front + i) % _capacity];
+            }
+        }
+
+        if (n != 0){
+            _front = (_front + n) % _capacity;
+
+            resize(_size - n);
+        }
+        
+
+        return n;
+    }
+
+    inline uint32_t pop_back_many(T *output, uint32_t n) {
+        n = MIN(n, _size);
+        if (output != nullptr) {
+            for (uint32_t i = 0; i < n; i++) {
+                output[i] = _data[(_front + _size - n + i) % _capacity];
+            }
+        }
+        resize(_size - n);
+
+        return n;
+    }
+
+    void make_contiguous() {
+        if (_size > 0 && _front > 0) {
+            T *new_data = new T[_capacity];
+            for (uint32_t i = 0; i < _size; i++) {
+                new_data[i] = std::move(_data[(_front + i) % _capacity]);
+            }
+
+            delete[] _data;
+
+            _data = new_data;
+        }
+    }
+
+    //// Operators
+    inline T &operator[](int i) {
+        if (_size == 0) {
+            resize(1);
+        }
+        return _data[(_front + posmod(i, _size)) % _capacity];
+    }
+
+    inline const T &operator[](int i) const {
+        if (_size == 0) {
+            resize(1);
+        }
+        return _data[(_front + posmod(i, _size)) % _capacity];
+    }
+
+    VecDeque &operator=(const VecDeque &from) {
+        resize(from._size);
+        _front = 0;
+
+        for (uint32_t i = 0; i < _size; i++) {
+            _data[i] = from._data[(from._front + _size % from._capacity)];
+        }
+
+        return *this;
+    };
+
+    // converts the first 'size' items into a contiguous array. -1 does the whole queue
+    uint32_t to_array(T *out, int size = -1) {
+        if (size == -1) {
+            size = _size;
+        }
+
+        size = MIN(size, _size);
+
+        for (uint32_t i = 0; i < size; i++) {
+            out[i] = _data[(i + _front) % _capacity];
+        }
+
+        return size;
+    }
+
+    // writes the last 'size' items into a contiguous array
+    uint32_t to_array_back(T *out, int size) {
+        size = MIN(size, _size);
+
+        for (uint32_t i = 0; i < size; i++) {
+            out[i] = _data[(i + _front + _size - size) % _capacity];
+        }
+
+        return size;
+    }
+
+};
+
+}
+
+#endif