Performance optimisations
Use local arrays/variables to avoid multiple deferences. Merge outputs with delays to allow compiler to optimise loops. lv2bench shows nearly 5x cpu performance improvement.master
parent
a0e5770e28
commit
14441af2c4
|
@ -10,9 +10,7 @@
|
|||
#define BUFFER_SECONDS 10
|
||||
|
||||
struct Tap {
|
||||
float *t_gain[TAPS];
|
||||
float *l_gain;
|
||||
float *r_gain;
|
||||
float *t_gain[TAPS + CHANNELS];
|
||||
float *gain;
|
||||
float *delay;
|
||||
};
|
||||
|
@ -24,12 +22,9 @@ struct PTap
|
|||
double sample_rate;
|
||||
size_t buffer_max;
|
||||
|
||||
Tap tap[TAPS];
|
||||
Tap l_out;
|
||||
Tap r_out;
|
||||
Tap tap[TAPS + CHANNELS];
|
||||
|
||||
float *buffers[TAPS]; ///< Tap audio buffers
|
||||
float *rp[TAPS]; ///< Read pointers
|
||||
float *wp[TAPS]; ///< Write pointers
|
||||
|
||||
float *in_l;
|
||||
|
@ -62,7 +57,6 @@ static LV2_Handle ptap_instantiate(
|
|||
return NULL;
|
||||
}
|
||||
|
||||
ptap->rp[i] = ptap->buffers[i];
|
||||
ptap->wp[i] = ptap->buffers[i];
|
||||
}
|
||||
|
||||
|
@ -90,24 +84,16 @@ static void ptap_connect_port(LV2_Handle lv2instance, uint32_t port, void *data)
|
|||
|
||||
int tap_index = port / CONTROLS_PER_TAP;
|
||||
|
||||
if (tap_index < TAPS) {
|
||||
if (tap_index < TAPS + CHANNELS) {
|
||||
tap = &ptap->tap[tap_index];
|
||||
} else if (tap_index - TAPS == 0) {
|
||||
tap = &ptap->l_out;
|
||||
} else if (tap_index - TAPS == 1) {
|
||||
tap = &ptap->r_out;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
int tap_port = port % CONTROLS_PER_TAP;
|
||||
|
||||
if (tap_port < TAPS) {
|
||||
if (tap_port < TAPS + CHANNELS) {
|
||||
tap->t_gain[tap_port] = fdata;
|
||||
} else if (tap_port - TAPS == 0) {
|
||||
tap->l_gain = fdata;
|
||||
} else if (tap_port - TAPS == 1) {
|
||||
tap->r_gain = fdata;
|
||||
} else if (tap_port - TAPS == 2) {
|
||||
tap->gain = fdata;
|
||||
} else if (tap_port - TAPS == 3) {
|
||||
|
@ -120,71 +106,65 @@ static void ptap_connect_port(LV2_Handle lv2instance, uint32_t port, void *data)
|
|||
static void ptap_run(LV2_Handle lv2instance, uint32_t sample_count)
|
||||
{
|
||||
PTap *ptap = (PTap *)lv2instance;
|
||||
Tap *tap;
|
||||
float *wp;
|
||||
|
||||
const float *readp[TAPS + CHANNELS];
|
||||
float gain[TAPS + CHANNELS];
|
||||
|
||||
/* Position read pointers behind write pointers */
|
||||
for (int i = 0; i < TAPS; i++) {
|
||||
int delay = *ptap->tap[i].delay * ptap->sample_rate;
|
||||
ptap->rp[i] = ptap->wp[i] - delay;
|
||||
if (ptap->rp[i] < ptap->buffers[i]) {
|
||||
ptap->rp[i] += ptap->buffer_max;
|
||||
/* Zero delay results in processing order dependencies. Tapiir doesn't permit it either... */
|
||||
if (delay < 1) delay = 1;
|
||||
readp[i] = ptap->wp[i] - delay;
|
||||
if (readp[i] < ptap->buffers[i]) {
|
||||
readp[i] += ptap->buffer_max;
|
||||
}
|
||||
gain[i] = *ptap->tap[i].gain;
|
||||
}
|
||||
|
||||
float *in_l = ptap->in_l;
|
||||
float *in_r = ptap->in_r;
|
||||
float *out_l = ptap->out_l;
|
||||
float *out_r = ptap->out_r;
|
||||
|
||||
readp[TAPS ] = ptap->in_l;
|
||||
readp[TAPS + 1] = ptap->in_r;
|
||||
gain[TAPS ] = *ptap->tap[TAPS ].gain;
|
||||
gain[TAPS + 1] = *ptap->tap[TAPS + 1].gain;
|
||||
|
||||
while (sample_count--) {
|
||||
for (int i = 0; i < TAPS; i++) {
|
||||
wp = ptap->wp[i];
|
||||
tap = &ptap->tap[i];
|
||||
float rp[TAPS + CHANNELS];
|
||||
float wp[TAPS + CHANNELS];
|
||||
|
||||
*wp = *in_l * *tap->l_gain;
|
||||
*wp += *in_r * *tap->r_gain;
|
||||
for (int j = 0; j < TAPS; j++) {
|
||||
*wp += *ptap->rp[j] * *tap->t_gain[j];
|
||||
for (int i = 0; i < TAPS + CHANNELS; i++) {
|
||||
rp[i] = *readp[i]++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < TAPS + CHANNELS; i++) {
|
||||
const Tap *tap = &ptap->tap[i];
|
||||
|
||||
float sample = 0;
|
||||
for (int j = 0; j < TAPS + CHANNELS; j++) {
|
||||
sample += rp[j] * *tap->t_gain[j];
|
||||
}
|
||||
*wp *= *tap->gain;
|
||||
wp[i] = sample * gain[i];
|
||||
}
|
||||
|
||||
/* Write to left output */
|
||||
tap = &ptap->l_out;
|
||||
*out_l = *in_l * *tap->l_gain;
|
||||
*out_l += *in_r * *tap->r_gain;
|
||||
for (int j = 0; j < TAPS; j++) {
|
||||
*out_l += *ptap->rp[j] * *tap->t_gain[j];
|
||||
for (int i = 0; i < TAPS; i++) {
|
||||
*ptap->wp[i]++ = wp[i];
|
||||
}
|
||||
*out_l *= *tap->gain;
|
||||
|
||||
/* Write to right output */
|
||||
tap = &ptap->r_out;
|
||||
*out_r = *in_l * *tap->l_gain;
|
||||
*out_r += *in_r * *tap->r_gain;
|
||||
for (int j = 0; j < TAPS; j++) {
|
||||
*out_r += *ptap->rp[j] * *tap->t_gain[j];
|
||||
}
|
||||
*out_r *= *tap->gain;
|
||||
*out_l++ = wp[TAPS];
|
||||
*out_r++ = wp[TAPS + 1];
|
||||
|
||||
/* Progress read pointers */
|
||||
for (int i = 0; i < TAPS; i++) {
|
||||
ptap->wp[i]++;
|
||||
if (ptap->wp[i] >= ptap->buffers[i] + ptap->buffer_max) {
|
||||
ptap->wp[i] = ptap->buffers[i];
|
||||
}
|
||||
|
||||
ptap->rp[i]++;
|
||||
if (ptap->rp[i] >= ptap->buffers[i] + ptap->buffer_max) {
|
||||
ptap->rp[i] = ptap->buffers[i];
|
||||
if (readp[i] >= ptap->buffers[i] + ptap->buffer_max) {
|
||||
readp[i] = ptap->buffers[i];
|
||||
}
|
||||
}
|
||||
|
||||
in_l++;
|
||||
in_r++;
|
||||
out_l++;
|
||||
out_r++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue