Performance optimisations

Use local arrays/variables to avoid multiple deferences.
Merge outputs with delays to allow compiler to optimise loops.
lv2bench shows nearly 5x cpu performance improvement.
master
Peter Nelson 2013-02-06 00:46:45 +00:00
parent a0e5770e28
commit 14441af2c4
1 changed files with 37 additions and 57 deletions

View File

@ -10,9 +10,7 @@
#define BUFFER_SECONDS 10 #define BUFFER_SECONDS 10
struct Tap { struct Tap {
float *t_gain[TAPS]; float *t_gain[TAPS + CHANNELS];
float *l_gain;
float *r_gain;
float *gain; float *gain;
float *delay; float *delay;
}; };
@ -24,12 +22,9 @@ struct PTap
double sample_rate; double sample_rate;
size_t buffer_max; size_t buffer_max;
Tap tap[TAPS]; Tap tap[TAPS + CHANNELS];
Tap l_out;
Tap r_out;
float *buffers[TAPS]; ///< Tap audio buffers float *buffers[TAPS]; ///< Tap audio buffers
float *rp[TAPS]; ///< Read pointers
float *wp[TAPS]; ///< Write pointers float *wp[TAPS]; ///< Write pointers
float *in_l; float *in_l;
@ -62,7 +57,6 @@ static LV2_Handle ptap_instantiate(
return NULL; return NULL;
} }
ptap->rp[i] = ptap->buffers[i];
ptap->wp[i] = ptap->buffers[i]; ptap->wp[i] = ptap->buffers[i];
} }
@ -90,24 +84,16 @@ static void ptap_connect_port(LV2_Handle lv2instance, uint32_t port, void *data)
int tap_index = port / CONTROLS_PER_TAP; int tap_index = port / CONTROLS_PER_TAP;
if (tap_index < TAPS) { if (tap_index < TAPS + CHANNELS) {
tap = &ptap->tap[tap_index]; tap = &ptap->tap[tap_index];
} else if (tap_index - TAPS == 0) {
tap = &ptap->l_out;
} else if (tap_index - TAPS == 1) {
tap = &ptap->r_out;
} else { } else {
return; return;
} }
int tap_port = port % CONTROLS_PER_TAP; int tap_port = port % CONTROLS_PER_TAP;
if (tap_port < TAPS) { if (tap_port < TAPS + CHANNELS) {
tap->t_gain[tap_port] = fdata; tap->t_gain[tap_port] = fdata;
} else if (tap_port - TAPS == 0) {
tap->l_gain = fdata;
} else if (tap_port - TAPS == 1) {
tap->r_gain = fdata;
} else if (tap_port - TAPS == 2) { } else if (tap_port - TAPS == 2) {
tap->gain = fdata; tap->gain = fdata;
} else if (tap_port - TAPS == 3) { } else if (tap_port - TAPS == 3) {
@ -120,71 +106,65 @@ static void ptap_connect_port(LV2_Handle lv2instance, uint32_t port, void *data)
static void ptap_run(LV2_Handle lv2instance, uint32_t sample_count) static void ptap_run(LV2_Handle lv2instance, uint32_t sample_count)
{ {
PTap *ptap = (PTap *)lv2instance; PTap *ptap = (PTap *)lv2instance;
Tap *tap;
float *wp; const float *readp[TAPS + CHANNELS];
float gain[TAPS + CHANNELS];
/* Position read pointers behind write pointers */ /* Position read pointers behind write pointers */
for (int i = 0; i < TAPS; i++) { for (int i = 0; i < TAPS; i++) {
int delay = *ptap->tap[i].delay * ptap->sample_rate; int delay = *ptap->tap[i].delay * ptap->sample_rate;
ptap->rp[i] = ptap->wp[i] - delay; /* Zero delay results in processing order dependencies. Tapiir doesn't permit it either... */
if (ptap->rp[i] < ptap->buffers[i]) { if (delay < 1) delay = 1;
ptap->rp[i] += ptap->buffer_max; readp[i] = ptap->wp[i] - delay;
if (readp[i] < ptap->buffers[i]) {
readp[i] += ptap->buffer_max;
} }
gain[i] = *ptap->tap[i].gain;
} }
float *in_l = ptap->in_l;
float *in_r = ptap->in_r;
float *out_l = ptap->out_l; float *out_l = ptap->out_l;
float *out_r = ptap->out_r; float *out_r = ptap->out_r;
readp[TAPS ] = ptap->in_l;
readp[TAPS + 1] = ptap->in_r;
gain[TAPS ] = *ptap->tap[TAPS ].gain;
gain[TAPS + 1] = *ptap->tap[TAPS + 1].gain;
while (sample_count--) { while (sample_count--) {
for (int i = 0; i < TAPS; i++) { float rp[TAPS + CHANNELS];
wp = ptap->wp[i]; float wp[TAPS + CHANNELS];
tap = &ptap->tap[i];
*wp = *in_l * *tap->l_gain; for (int i = 0; i < TAPS + CHANNELS; i++) {
*wp += *in_r * *tap->r_gain; rp[i] = *readp[i]++;
for (int j = 0; j < TAPS; j++) { }
*wp += *ptap->rp[j] * *tap->t_gain[j];
for (int i = 0; i < TAPS + CHANNELS; i++) {
const Tap *tap = &ptap->tap[i];
float sample = 0;
for (int j = 0; j < TAPS + CHANNELS; j++) {
sample += rp[j] * *tap->t_gain[j];
} }
*wp *= *tap->gain; wp[i] = sample * gain[i];
} }
/* Write to left output */ for (int i = 0; i < TAPS; i++) {
tap = &ptap->l_out; *ptap->wp[i]++ = wp[i];
*out_l = *in_l * *tap->l_gain;
*out_l += *in_r * *tap->r_gain;
for (int j = 0; j < TAPS; j++) {
*out_l += *ptap->rp[j] * *tap->t_gain[j];
} }
*out_l *= *tap->gain;
/* Write to right output */ *out_l++ = wp[TAPS];
tap = &ptap->r_out; *out_r++ = wp[TAPS + 1];
*out_r = *in_l * *tap->l_gain;
*out_r += *in_r * *tap->r_gain;
for (int j = 0; j < TAPS; j++) {
*out_r += *ptap->rp[j] * *tap->t_gain[j];
}
*out_r *= *tap->gain;
/* Progress read pointers */ /* Progress read pointers */
for (int i = 0; i < TAPS; i++) { for (int i = 0; i < TAPS; i++) {
ptap->wp[i]++;
if (ptap->wp[i] >= ptap->buffers[i] + ptap->buffer_max) { if (ptap->wp[i] >= ptap->buffers[i] + ptap->buffer_max) {
ptap->wp[i] = ptap->buffers[i]; ptap->wp[i] = ptap->buffers[i];
} }
ptap->rp[i]++; if (readp[i] >= ptap->buffers[i] + ptap->buffer_max) {
if (ptap->rp[i] >= ptap->buffers[i] + ptap->buffer_max) { readp[i] = ptap->buffers[i];
ptap->rp[i] = ptap->buffers[i];
} }
} }
in_l++;
in_r++;
out_l++;
out_r++;
} }
} }