diff --git a/pedalboard/BufferUtils.h b/pedalboard/BufferUtils.h index 13f70858..ccef6413 100644 --- a/pedalboard/BufferUtils.h +++ b/pedalboard/BufferUtils.h @@ -204,7 +204,7 @@ const juce::AudioBuffer convertPyArrayIntoJuceBuffer( } template -py::array_t copyJuceBufferIntoPyArray(const juce::AudioBuffer &juceBuffer, +py::array_t copyJuceBufferIntoPyArray(juce::AudioBuffer juceBuffer, ChannelLayout channelLayout, int offsetSamples, int ndim = 2) { unsigned int numChannels = juceBuffer.getNumChannels(); @@ -212,7 +212,32 @@ py::array_t copyJuceBufferIntoPyArray(const juce::AudioBuffer &juceBuffer, unsigned int outputSampleCount = std::max((int)numSamples - (int)offsetSamples, 0); - // TODO: Avoid the need to copy here if offsetSamples is 0! + // Zero-copy path for mono with no offset: move the JUCE buffer into a + // capsule and let NumPy point directly at its memory. Multichannel can't + // use this because JUCE allocates each channel separately and NumPy needs + // contiguous memory. + if (offsetSamples == 0 && numChannels == 1 && numSamples > 0) { + auto *buf = new juce::AudioBuffer(std::move(juceBuffer)); + py::capsule owner(buf, [](void *p) { + delete static_cast *>(p); + }); + T *data = buf->getWritePointer(0); + if (ndim == 2) { + switch (channelLayout) { + case ChannelLayout::NotInterleaved: + return py::array_t({(unsigned int)1, numSamples}, + {numSamples * sizeof(T), sizeof(T)}, data, owner); + case ChannelLayout::Interleaved: + return py::array_t({numSamples, (unsigned int)1}, + {sizeof(T), sizeof(T)}, data, owner); + default: + break; + } + } else { + return py::array_t({numSamples}, {sizeof(T)}, data, owner); + } + } + py::array_t outputArray; if (ndim == 2) { switch (channelLayout) { @@ -232,10 +257,6 @@ py::array_t copyJuceBufferIntoPyArray(const juce::AudioBuffer &juceBuffer, py::buffer_info outputInfo = outputArray.request(); - // Depending on the input channel layout, we need to copy data - // differently. This loop is duplicated here to move the if statement - // outside of the tight loop, as we don't need to re-check that the input - // channel is still the same on every iteration of the loop. T *outputBasePointer = static_cast(outputInfo.ptr); if (juceBuffer.getNumSamples() > 0) { @@ -243,7 +264,6 @@ py::array_t copyJuceBufferIntoPyArray(const juce::AudioBuffer &juceBuffer, case ChannelLayout::Interleaved: for (unsigned int i = 0; i < numChannels; i++) { const T *channelBuffer = juceBuffer.getReadPointer(i, offsetSamples); - // We're interleaving the data here, so we can't use copyFrom. for (unsigned int j = 0; j < outputSampleCount; j++) { outputBasePointer[j * numChannels + i] = channelBuffer[j]; } diff --git a/pedalboard/TimeStretch.h b/pedalboard/TimeStretch.h index ecdbd769..30ea9933 100644 --- a/pedalboard/TimeStretch.h +++ b/pedalboard/TimeStretch.h @@ -378,7 +378,7 @@ inline void init_time_stretch(py::module &m) { preserveFormants); } - return copyJuceBufferIntoPyArray(output, detectChannelLayout(input), 0); + return copyJuceBufferIntoPyArray(std::move(output), detectChannelLayout(input), 0); }, R"( Time-stretch (and optionally pitch-shift) a buffer of audio, changing its length. diff --git a/pedalboard/process.h b/pedalboard/process.h index 22e0e265..935a2454 100644 --- a/pedalboard/process.h +++ b/pedalboard/process.h @@ -270,7 +270,7 @@ processFloat32(const py::array_t inputArray, totalOutputLatencySamples = ioBuffer.getNumSamples() - samplesReturned; } - return copyJuceBufferIntoPyArray(ioBuffer, inputChannelLayout, + return copyJuceBufferIntoPyArray(std::move(ioBuffer), inputChannelLayout, totalOutputLatencySamples, inputArray.request().ndim); } diff --git a/tests/test_zero_copy_output.py b/tests/test_zero_copy_output.py new file mode 100644 index 00000000..69152e77 --- /dev/null +++ b/tests/test_zero_copy_output.py @@ -0,0 +1,46 @@ +#! /usr/bin/env python +# +# Copyright 2021 Spotify AB +# +# Licensed under the GNU Public License, Version 3.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.gnu.org/licenses/gpl-3.0.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np +import pytest + +from pedalboard import Gain + + +@pytest.mark.parametrize("sample_rate", [22050, 44100, 48000]) +def test_mono_output_not_copied(sample_rate): + """Mono output with no latency should reuse the buffer, not copy it.""" + signal = np.sin( + 2 * np.pi * 440 * np.arange(sample_rate) / sample_rate + ).astype(np.float32) + out = Gain(gain_db=0).process(signal, sample_rate) + assert out.flags["C_CONTIGUOUS"] + assert out.flags["WRITEABLE"] + np.testing.assert_allclose(out, signal, atol=1e-7) + + +def test_mono_output_lifetime_independent(): + """Each mono output must own its data independently.""" + g = Gain(gain_db=0) + results = [] + for freq in [440, 880, 1320]: + signal = np.sin( + 2 * np.pi * freq * np.arange(44100) / 44100 + ).astype(np.float32) + results.append((g.process(signal, 44100), signal)) + for out, expected in results: + np.testing.assert_allclose(out, expected, atol=1e-7)