SDL: Refactored audio conversion to reduce copying

From edaab8ad9f93c2e25a54aa68817480dd6c2f7877 Mon Sep 17 00:00:00 2001
From: Brick <[EMAIL REDACTED]>
Date: Thu, 4 Apr 2024 19:22:29 +0100
Subject: [PATCH] Refactored audio conversion to reduce copying

More of the logic has been moved into SDL_AudioQueue,
allowing data to be converted directly from the input buffer.
---
 include/SDL3/SDL_audio.h              |   4 +-
 src/audio/SDL_audiocvt.c              | 214 ++++-----
 src/audio/SDL_audioqueue.c            | 619 ++++++++++++++++----------
 src/audio/SDL_audioqueue.h            |  26 +-
 src/audio/SDL_sysaudio.h              |   4 -
 test/testaudiostreamdynamicresample.c |   9 +-
 test/testautomation_audio.c           |   5 -
 7 files changed, 475 insertions(+), 406 deletions(-)

diff --git a/include/SDL3/SDL_audio.h b/include/SDL3/SDL_audio.h
index e77afcbfc1008..8569cb3f15203 100644
--- a/include/SDL3/SDL_audio.h
+++ b/include/SDL3/SDL_audio.h
@@ -789,13 +789,13 @@ extern DECLSPEC float SDLCALL SDL_GetAudioStreamFrequencyRatio(SDL_AudioStream *
 extern DECLSPEC int SDLCALL SDL_SetAudioStreamFrequencyRatio(SDL_AudioStream *stream, float ratio);
 
 /**
- * Add data to be converted/resampled to the stream.
+ * Add data to the stream.
  *
  * This data must match the format/channels/samplerate specified in the latest
  * call to SDL_SetAudioStreamFormat, or the format specified when creating the
  * stream if it hasn't been changed.
  *
- * Note that this call simply queues unconverted data for later. This is
+ * Note that this call simply copies the unconverted data for later. This is
  * different than SDL2, where data was converted during the Put call and the
  * Get call would just dequeue the previously-converted data.
  *
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index e51a7daba023f..a7cc96dd63971 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -308,20 +308,10 @@ static int UpdateAudioStreamInputSpec(SDL_AudioStream *stream, const SDL_AudioSp
         return 0;
     }
 
-    const size_t history_buffer_allocation = SDL_GetResamplerHistoryFrames() * SDL_AUDIO_FRAMESIZE(*spec);
-    Uint8 *history_buffer = stream->history_buffer;
-
-    if (stream->history_buffer_allocation < history_buffer_allocation) {
-        history_buffer = (Uint8 *) SDL_aligned_alloc(SDL_SIMDGetAlignment(), history_buffer_allocation);
-        if (!history_buffer) {
-            return -1;
-        }
-        SDL_aligned_free(stream->history_buffer);
-        stream->history_buffer = history_buffer;
-        stream->history_buffer_allocation = history_buffer_allocation;
+    if (SDL_ResetAudioQueueHistory(stream->queue, SDL_GetResamplerHistoryFrames()) != 0) {
+        return -1;
     }
 
-    SDL_memset(history_buffer, SDL_GetSilenceValueForFormat(spec->format), history_buffer_allocation);
     SDL_copyp(&stream->input_spec, spec);
 
     return 0;
@@ -338,7 +328,7 @@ SDL_AudioStream *SDL_CreateAudioStream(const SDL_AudioSpec *src_spec, const SDL_
     }
 
     retval->freq_ratio = 1.0f;
-    retval->queue = SDL_CreateAudioQueue(4096);
+    retval->queue = SDL_CreateAudioQueue(8192);
 
     if (!retval->queue) {
         SDL_free(retval);
@@ -550,22 +540,12 @@ static int CheckAudioStreamIsFullySetup(SDL_AudioStream *stream)
     return 0;
 }
 
-int SDL_PutAudioStreamData(SDL_AudioStream *stream, const void *buf, int len)
+static int PutAudioStreamBuffer(SDL_AudioStream *stream, const void *buf, int len, SDL_ReleaseAudioBufferCallback callback, void* userdata)
 {
 #if DEBUG_AUDIOSTREAM
     SDL_Log("AUDIOSTREAM: wants to put %d bytes", len);
 #endif
 
-    if (!stream) {
-        return SDL_InvalidParamError("stream");
-    } else if (!buf) {
-        return SDL_InvalidParamError("buf");
-    } else if (len < 0) {
-        return SDL_InvalidParamError("len");
-    } else if (len == 0) {
-        return 0; // nothing to do.
-    }
-
     SDL_LockMutex(stream->lock);
 
     if (CheckAudioStreamIsFullySetup(stream) != 0) {
@@ -580,24 +560,13 @@ int SDL_PutAudioStreamData(SDL_AudioStream *stream, const void *buf, int len)
 
     SDL_AudioTrack* track = NULL;
 
-    // When copying in large amounts of data, try and do as much work as possible
-    // outside of the stream lock, otherwise the output device is likely to be starved.
-    const int large_input_thresh = 1024 * 1024;
-
-    if (len >= large_input_thresh) {
-        SDL_AudioSpec src_spec;
-        SDL_copyp(&src_spec, &stream->src_spec);
-
-        SDL_UnlockMutex(stream->lock);
-
-        size_t chunk_size = SDL_GetAudioQueueChunkSize(stream->queue);
-        track = SDL_CreateChunkedAudioTrack(&src_spec, (const Uint8 *)buf, len, chunk_size);
+    if (callback) {
+        track = SDL_CreateAudioTrack(stream->queue, &stream->src_spec, (Uint8 *)buf, len, len, callback, userdata);
 
         if (!track) {
+            SDL_UnlockMutex(stream->lock);
             return -1;
         }
-
-        SDL_LockMutex(stream->lock);
     }
 
     const int prev_available = stream->put_callback ? SDL_GetAudioStreamAvailable(stream) : 0;
@@ -611,7 +580,6 @@ int SDL_PutAudioStreamData(SDL_AudioStream *stream, const void *buf, int len)
     }
 
     if (retval == 0) {
-        stream->total_bytes_queued += len;
         if (stream->put_callback) {
             const int newavail = SDL_GetAudioStreamAvailable(stream) - prev_available;
             stream->put_callback(stream->put_callback_userdata, stream, newavail, newavail);
@@ -623,6 +591,49 @@ int SDL_PutAudioStreamData(SDL_AudioStream *stream, const void *buf, int len)
     return retval;
 }
 
+static void SDLCALL FreeAllocatedAudioBuffer(void *userdata, const void *buf, int len)
+{
+    SDL_free((void*) buf);
+}
+
+int SDL_PutAudioStreamData(SDL_AudioStream *stream, const void *buf, int len)
+{
+    if (!stream) {
+        return SDL_InvalidParamError("stream");
+    } else if (!buf) {
+        return SDL_InvalidParamError("buf");
+    } else if (len < 0) {
+        return SDL_InvalidParamError("len");
+    } else if (len == 0) {
+        return 0; // nothing to do.
+    }
+
+    // When copying in large amounts of data, try and do as much work as possible
+    // outside of the stream lock, otherwise the output device is likely to be starved.
+    const int large_input_thresh = 64 * 1024;
+
+    if (len >= large_input_thresh) {
+        void* data = SDL_malloc(len);
+
+        if (!data) {
+            return -1;
+        }
+
+        SDL_memcpy(data, buf, len);
+        buf = data;
+
+        int ret = PutAudioStreamBuffer(stream, buf, len, FreeAllocatedAudioBuffer, NULL);
+
+        if (ret < 0) {
+            SDL_free(data);
+        }
+
+        return ret;
+    }
+
+    return PutAudioStreamBuffer(stream, buf, len, NULL, NULL);
+}
+
 int SDL_FlushAudioStream(SDL_AudioStream *stream)
 {
     if (!stream) {
@@ -655,31 +666,6 @@ static Uint8 *EnsureAudioStreamWorkBufferSize(SDL_AudioStream *stream, size_t ne
     return ptr;
 }
 
-static void UpdateAudioStreamHistoryBuffer(SDL_AudioStream* stream,
-    Uint8* input_buffer, int input_bytes, Uint8* left_padding, int padding_bytes)
-{
-    const int history_buffer_frames = SDL_GetResamplerHistoryFrames();
-
-    // Even if we aren't currently resampling, we always need to update the history buffer
-    Uint8 *history_buffer = stream->history_buffer;
-    int history_bytes = history_buffer_frames * SDL_AUDIO_FRAMESIZE(stream->input_spec);
-
-    if (left_padding) {
-        // Fill in the left padding using the history buffer
-        SDL_assert(padding_bytes <= history_bytes);
-        SDL_memcpy(left_padding, history_buffer + history_bytes - padding_bytes, padding_bytes);
-    }
-
-    // Update the history buffer using the new input data
-    if (input_bytes >= history_bytes) {
-        SDL_memcpy(history_buffer, input_buffer + (input_bytes - history_bytes), history_bytes);
-    } else {
-        int preserve_bytes = history_bytes - input_bytes;
-        SDL_memmove(history_buffer, history_buffer + input_bytes, preserve_bytes);
-        SDL_memcpy(history_buffer + preserve_bytes, input_buffer, input_bytes);
-    }
-}
-
 static Sint64 NextAudioStreamIter(SDL_AudioStream* stream, void** inout_iter,
     Sint64* inout_resample_offset, SDL_AudioSpec* out_spec, SDL_bool* out_flushed)
 {
@@ -777,7 +763,6 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
 
     const SDL_AudioFormat src_format = src_spec->format;
     const int src_channels = src_spec->channels;
-    const int src_frame_size = SDL_AUDIO_FRAMESIZE(*src_spec);
 
     const SDL_AudioFormat dst_format = dst_spec->format;
     const int dst_channels = dst_spec->channels;
@@ -793,34 +778,19 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
 
     // Not resampling? It's an easy conversion (and maybe not even that!)
     if (resample_rate == 0) {
-        Uint8* input_buffer = NULL;
+        Uint8* work_buffer = NULL;
 
-        // If no conversion is happening, read straight into the output buffer.
-        // Note, this is just to avoid extra copies.
-        // Some other formats may fit directly into the output buffer, but i'd rather process data in a SIMD-aligned buffer.
-        if ((src_format == dst_format) && (src_channels == dst_channels)) {
-            input_buffer = (Uint8 *)buf;
-        } else {
-            input_buffer = EnsureAudioStreamWorkBufferSize(stream, output_frames * max_frame_size);
+        // Ensure we have enough scratch space for any conversions
+        if ((src_format != dst_format) || (src_channels != dst_channels)) {
+            work_buffer = EnsureAudioStreamWorkBufferSize(stream, output_frames * max_frame_size);
 
-            if (!input_buffer) {
+            if (!work_buffer) {
                 return -1;
             }
         }
 
-        const int input_bytes = output_frames * src_frame_size;
-        if (SDL_ReadFromAudioQueue(stream->queue, input_buffer, input_bytes) != 0) {
-            SDL_assert(!"Not enough data in queue (read)");
-        }
-
-        stream->total_bytes_queued -= input_bytes;
-
-        // Even if we aren't currently resampling, we always need to update the history buffer
-        UpdateAudioStreamHistoryBuffer(stream, input_buffer, input_bytes, NULL, 0);
-
-        // Convert the data, if necessary
-        if (buf != input_buffer) {
-            ConvertAudio(output_frames, input_buffer, src_format, src_channels, buf, dst_format, dst_channels, input_buffer);
+        if (SDL_ReadFromAudioQueue(stream->queue, buf, dst_format, dst_channels, 0, output_frames, 0, work_buffer) != buf) {
+            return SDL_SetError("Not enough data in queue");
         }
 
         return 0;
@@ -832,9 +802,10 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
     // can require a different number of input_frames, depending on the resample_offset.
     // Infact, input_frames can sometimes even be zero when upsampling.
     const int input_frames = (int) SDL_GetResamplerInputFrames(output_frames, resample_rate, stream->resample_offset);
-    const int input_bytes = input_frames * src_frame_size;
 
-    const int resampler_padding_frames = SDL_GetResamplerPaddingFrames(resample_rate);
+    const int padding_frames = SDL_GetResamplerPaddingFrames(resample_rate);
+
+    const SDL_AudioFormat resample_format = SDL_AUDIO_F32;
 
     // If increasing channels, do it after resampling, since we'd just
     // do more work to resample duplicate channels. If we're decreasing, do
@@ -843,7 +814,7 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
     const int resample_channels = SDL_min(src_channels, dst_channels);
 
     // The size of the frame used when resampling
-    const int resample_frame_size = resample_channels * sizeof(float);
+    const int resample_frame_size = SDL_AUDIO_BYTESIZE(resample_format) * resample_channels;
 
     // The main portion of the work_buffer can be used to store 3 things:
     // src_sample_frame_size * (left_padding+input_buffer+right_padding)
@@ -854,14 +825,14 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
     //   resample_frame_size * output_frames
     //
     // Note, ConvertAudio requires (num_frames * max_sample_frame_size) of scratch space
-    const int work_buffer_frames = input_frames + (resampler_padding_frames * 2);
+    const int work_buffer_frames = input_frames + (padding_frames * 2);
     int work_buffer_capacity = work_buffer_frames * max_frame_size;
     int resample_buffer_offset = -1;
 
     // Check if we can resample directly into the output buffer.
     // Note, this is just to avoid extra copies.
     // Some other formats may fit directly into the output buffer, but i'd rather process data in a SIMD-aligned buffer.
-    if ((dst_format != SDL_AUDIO_F32) || (dst_channels != resample_channels)) {
+    if ((dst_format != resample_format) || (dst_channels != resample_channels)) {
         // Allocate space for converting the resampled output to the destination format
         int resample_convert_bytes = output_frames * max_frame_size;
         work_buffer_capacity = SDL_max(work_buffer_capacity, resample_convert_bytes);
@@ -883,45 +854,15 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
         return -1;
     }
 
-    const int padding_bytes = resampler_padding_frames * src_frame_size;
-
-    Uint8* work_buffer_tail = work_buffer;
+    const Uint8* input_buffer = SDL_ReadFromAudioQueue(stream->queue,
+        NULL, resample_format, resample_channels,
+        padding_frames, input_frames, padding_frames, work_buffer);
 
-    // Split the work_buffer into [left_padding][input_buffer][right_padding]
-    Uint8* left_padding = work_buffer_tail;
-    work_buffer_tail += padding_bytes;
-
-    Uint8* input_buffer = work_buffer_tail;
-    work_buffer_tail += input_bytes;
-
-    Uint8* right_padding = work_buffer_tail;
-    work_buffer_tail += padding_bytes;
-
-    SDL_assert((work_buffer_tail - work_buffer) <= work_buffer_capacity);
-
-    // Now read unconverted data from the queue into the work buffer to fulfill the request.
-    if (SDL_ReadFromAudioQueue(stream->queue, input_buffer, input_bytes) != 0) {
-        SDL_assert(!"Not enough data in queue (resample read)");
+    if (!input_buffer) {
+        return SDL_SetError("Not enough data in queue (resample)");
     }
-    stream->total_bytes_queued -= input_bytes;
 
-    // Update the history buffer and fill in the left padding
-    UpdateAudioStreamHistoryBuffer(stream, input_buffer, input_bytes, left_padding, padding_bytes);
-
-    // Fill in the right padding by peeking into the input queue (missing data is filled with silence)
-    if (SDL_PeekIntoAudioQueue(stream->queue, right_padding, padding_bytes) != 0) {
-        SDL_assert(!"Not enough data in queue (resample peek)");
-    }
-
-    SDL_assert(work_buffer_frames == input_frames + (resampler_padding_frames * 2));
-
-    // Resampling! get the work buffer to float32 format, etc, in-place.
-    ConvertAudio(work_buffer_frames, work_buffer, src_format, src_channels, work_buffer, SDL_AUDIO_F32, resample_channels, NULL);
-
-    // Update the work_buffer pointers based on the new frame size
-    input_buffer = work_buffer + ((input_buffer - work_buffer) / src_frame_size * resample_frame_size);
-    work_buffer_tail = work_buffer + ((work_buffer_tail - work_buffer) / src_frame_size * resample_frame_size);
-    SDL_assert((work_buffer_tail - work_buffer) <= work_buffer_capacity);
+    input_buffer += padding_frames * resample_frame_size;
 
     // Decide where the resampled output goes
     void* resample_buffer = (resample_buffer_offset != -1) ? (work_buffer + resample_buffer_offset) : buf;
@@ -932,9 +873,7 @@ static int GetAudioStreamDataInternal(SDL_AudioStream *stream, void *buf, int ou
                   resample_rate, &stream->resample_offset);
 
     // Convert to the final format, if necessary
-    if (buf != resample_buffer) {
-        ConvertAudio(output_frames, resample_buffer, SDL_AUDIO_F32, resample_channels, buf, dst_format, dst_channels, work_buffer);
-    }
+    ConvertAudio(output_frames, resample_buffer, resample_format, resample_channels, buf, dst_format, dst_channels, work_buffer);
 
     return 0;
 }
@@ -1074,7 +1013,9 @@ int SDL_GetAudioStreamQueued(SDL_AudioStream *stream)
     }
 
     SDL_LockMutex(stream->lock);
-    const Uint64 total = stream->total_bytes_queued;
+
+    size_t total = SDL_GetAudioQueueQueued(stream->queue);
+
     SDL_UnlockMutex(stream->lock);
 
     // if this overflows an int, just clamp it to a maximum.
@@ -1092,7 +1033,6 @@ int SDL_ClearAudioStream(SDL_AudioStream *stream)
     SDL_ClearAudioQueue(stream->queue);
     SDL_zero(stream->input_spec);
     stream->resample_offset = 0;
-    stream->total_bytes_queued = 0;
 
     SDL_UnlockMutex(stream->lock);
     return 0;
@@ -1118,7 +1058,6 @@ void SDL_DestroyAudioStream(SDL_AudioStream *stream)
         SDL_UnbindAudioStream(stream);
     }
 
-    SDL_aligned_free(stream->history_buffer);
     SDL_aligned_free(stream->work_buffer);
     SDL_DestroyAudioQueue(stream->queue);
     SDL_DestroyMutex(stream->lock);
@@ -1126,6 +1065,11 @@ void SDL_DestroyAudioStream(SDL_AudioStream *stream)
     SDL_free(stream);
 }
 
+static void SDLCALL DontFreeThisAudioBuffer(void *userdata, const void *buf, int len)
+{
+    // We don't own the buffer, but know it will outlive the stream
+}
+
 int SDL_ConvertAudioSamples(const SDL_AudioSpec *src_spec, const Uint8 *src_data, int src_len,
                             const SDL_AudioSpec *dst_spec, Uint8 **dst_data, int *dst_len)
 {
@@ -1153,7 +1097,7 @@ int SDL_ConvertAudioSamples(const SDL_AudioSpec *src_spec, const Uint8 *src_data
 
     SDL_AudioStream *stream = SDL_CreateAudioStream(src_spec, dst_spec);
     if (stream) {
-        if ((SDL_PutAudioStreamData(stream, src_data, src_len) == 0) && (SDL_FlushAudioStream(stream) == 0)) {
+        if ((PutAudioStreamBuffer(stream, src_data, src_len, DontFreeThisAudioBuffer, NULL) == 0) && (SDL_FlushAudioStream(stream) == 0)) {
             dstlen = SDL_GetAudioStreamAvailable(stream);
             if (dstlen >= 0) {
                 dst = (Uint8 *)SDL_malloc(dstlen);
diff --git a/src/audio/SDL_audioqueue.c b/src/audio/SDL_audioqueue.c
index 923873676f1e5..520b7d72483fb 100644
--- a/src/audio/SDL_audioqueue.c
+++ b/src/audio/SDL_audioqueue.c
@@ -21,253 +21,131 @@
 #include "SDL_internal.h"
 
 #include "SDL_audioqueue.h"
+#include "SDL_sysaudio.h"
 
 #define AUDIO_SPECS_EQUAL(x, y) (((x).format == (y).format) && ((x).channels == (y).channels) && ((x).freq == (y).freq))
 
+typedef struct SDL_MemoryPool SDL_MemoryPool;
+
+struct SDL_MemoryPool
+{
+    void *free_blocks;
+    size_t block_size;
+    size_t num_free;
+    size_t max_free;
+};
+
 struct SDL_AudioTrack
 {
     SDL_AudioSpec spec;
     SDL_bool flushed;
     SDL_AudioTrack *next;
 
-    size_t (*avail)(void *ctx);
-    int (*write)(void *ctx, const Uint8 *buf, size_t len);
-    size_t (*read)(void *ctx, Uint8 *buf, size_t len, SDL_bool advance);
-    void (*destroy)(void *ctx);
+    void *userdata;
+    SDL_ReleaseAudioBufferCallback callback;
+
+    Uint8 *data;
+    size_t head;
+    size_t tail;
+    size_t capacity;
 };
 
 struct SDL_AudioQueue
 {
     SDL_AudioTrack *head;
     SDL_AudioTrack *tail;
-    size_t chunk_size;
-};
 
-typedef struct SDL_AudioChunk SDL_AudioChunk;
+    Uint8 *history_buffer;
+    size_t history_length;
+    size_t history_capacity;
 
-struct SDL_AudioChunk
-{
-    SDL_AudioChunk *next;
-    size_t head;
-    size_t tail;
-    Uint8 data[SDL_VARIABLE_LENGTH_ARRAY];
+    SDL_MemoryPool track_pool;
+    SDL_MemoryPool chunk_pool;
 };
 
-typedef struct SDL_ChunkedAudioTrack
-{
-    SDL_AudioTrack track;
-
-    size_t chunk_size;
-
-    SDL_AudioChunk *head;
-    SDL_AudioChunk *tail;
-    size_t queued_bytes;
-
-    SDL_AudioChunk *free_chunks;
-    size_t num_free_chunks;
-} SDL_ChunkedAudioTrack;
-
-static void DestroyAudioChunk(SDL_AudioChunk *chunk)
+// Allocate a new block, avoiding checking for ones already in the pool
+static void *AllocNewMemoryPoolBlock(const SDL_MemoryPool *pool)
 {
-    SDL_free(chunk);
+    return SDL_malloc(pool->block_size);
 }
 
-static void DestroyAudioChunks(SDL_AudioChunk *chunk)
+// Allocate a new block, first checking if there are any in the pool
+static void *AllocMemoryPoolBlock(SDL_MemoryPool *pool)
 {
-    while (chunk) {
-        SDL_AudioChunk *next = chunk->next;
-        DestroyAudioChunk(chunk);
-        chunk = next;
+    if (pool->num_free == 0) {
+        return AllocNewMemoryPoolBlock(pool);
     }
-}
 
-static void ResetAudioChunk(SDL_AudioChunk *chunk)
-{
-    chunk->next = NULL;
-    chunk->head = 0;
-    chunk->tail = 0;
+    void *block = pool->free_blocks;
+    pool->free_blocks = *(void **)block;
+    --pool->num_free;
+    return block;
 }
 
-static SDL_AudioChunk *CreateAudioChunk(size_t chunk_size)
+// Free a block, or add it to the pool if there's room
+static void FreeMemoryPoolBlock(SDL_MemoryPool *pool, void *block)
 {
-    SDL_AudioChunk *chunk = (SDL_AudioChunk *)SDL_malloc(sizeof(*chunk) + chunk_size);
-
-    if (!chunk) {
-        return NULL;
-    }
-
-    ResetAudioChunk(chunk);
-
-    return chunk;
-}
-
-static void DestroyAudioTrackChunk(SDL_ChunkedAudioTrack *track, SDL_AudioChunk *chunk)
-{
-    // Keeping a list of free chunks reduces memory allocations,
-    // But also increases the amount of work to perform when freeing the track.
-    const size_t max_free_bytes = 64 * 1024;
-
-    if (track->chunk_size * track->num_free_chunks < max_free_bytes) {
-        chunk->next = track->free_chunks;
-        track->free_chunks = chunk;
-        ++track->num_free_chunks;
+    if (pool->num_free < pool->max_free) {
+        *(void **)block = pool->free_blocks;
+        pool->free_blocks = block;
+        ++pool->num_free;
     } else {
-        DestroyAudioChunk(chunk);
+        SDL_free(block);
     }
 }
 
-static SDL_AudioChunk *CreateAudioTrackChunk(SDL_ChunkedAudioTrack *track)
+// Destroy a pool and all of its blocks
+static void DestroyMemoryPool(SDL_MemoryPool *pool)
 {
-    if (track->num_free_chunks > 0) {
-        SDL_AudioChunk *chunk = track->free_chunks;
-
-        track->free_chunks = chunk->next;
-        --track->num_free_chunks;
-
-        ResetAudioChunk(chunk);
-
-        return chunk;
+    void *block = pool->free_blocks;
+    pool->free_blocks = NULL;
+    pool->num_free = 0;
+
+    while (block) {
+        void *next = *(void **)block;
+        SDL_free(block);
+        block = next;
     }
-
-    return CreateAudioChunk(track->chunk_size);
 }
 
-static size_t AvailChunkedAudioTrack(void *ctx)
+// Keeping a list of free chunks reduces memory allocations,
+// But also increases the amount of work to perform when freeing the track.
+static void InitMemoryPool(SDL_MemoryPool *pool, size_t block_size, size_t max_free)
 {
-    SDL_ChunkedAudioTrack *track = (SDL_ChunkedAudioTrack *)ctx;
+    SDL_zerop(pool);
 
-    return track->queued_bytes;
+    SDL_assert(block_size >= sizeof(void *));
+    pool->block_size = block_size;
+    pool->max_free = max_free;
 }
 
-static int WriteToChunkedAudioTrack(void *ctx, const Uint8 *data, size_t len)
+// Allocates a number of blocks and adds them to the pool
+static int ReserveMemoryPoolBlocks(SDL_MemoryPool *pool, size_t num_blocks)
 {
-    SDL_ChunkedAudioTrack *track = (SDL_ChunkedAudioTrack *)ctx;
-
-    SDL_AudioChunk *chunk = track->tail;
-
-    // Handle the first chunk
-    if (!chunk) {
-        chunk = CreateAudioTrackChunk(track);
+    for (; num_blocks; --num_blocks) {
+        void *block = AllocNewMemoryPoolBlock(pool);
 
-        if (!chunk) {
-            return -1;
-        }
-
-        SDL_assert((track->head == NULL) && (track->tail == NULL) && (track->queued_bytes == 0));
-        track->head = chunk;
-        track->tail = chunk;
-    }
-
-    size_t total = 0;
-    size_t old_tail = chunk->tail;
-    size_t chunk_size = track->chunk_size;
-
-    while (chunk) {
-        size_t to_write = chunk_size - chunk->tail;
-        to_write = SDL_min(to_write, len - total);
-        SDL_memcpy(&chunk->data[chunk->tail], &data[total], to_write);
-        total += to_write;
-
-        chunk->tail += to_write;
-
-        if (total == len) {
-            break;
+        if (block == NULL) {
+            return SDL_OutOfMemory();
         }
 
-        SDL_AudioChunk *next = CreateAudioTrackChunk(track);
-        chunk->next = next;
-        chunk = next;
+        *(void **)block = pool->free_blocks;
+        pool->free_blocks = block;
+        ++pool->num_free;
     }
 
-    // Roll back the changes if we couldn't write all the data
-    if (!chunk) {
-        chunk = track->tail;
-
-        SDL_AudioChunk *next = chunk->next;
-        chunk->next = NULL;
-        chunk->tail = old_tail;
-
-        DestroyAudioChunks(next);
-
-        return -1;
-    }
-
-    track->tail = chunk;
-    track->queued_bytes += total;
-
     return 0;
 }
 
-static size_t ReadFromChunkedAudioTrack(void *ctx, Uint8 *data, size_t len, SDL_bool advance)
-{
-    SDL_ChunkedAudioTrack *track = (SDL_ChunkedAudioTrack *)ctx;
-    SDL_AudioChunk *chunk = track->head;
-
-    size_t total = 0;
-    size_t head = 0;
-
-    while (chunk) {
-        head = chunk->head;
-
-        size_t to_read = chunk->tail - head;
-        to_read = SDL_min(to_read, len - total);
-        SDL_memcpy(&data[total], &chunk->data[head], to_read);
-        total += to_read;
-
-        SDL_AudioChunk *next = chunk->next;
-
-        if (total == len) {
-            head += to_read;
-            break;
-        }
-
-        if (advance) {
-            DestroyAudioTrackChunk(track, chunk);
-        }
-
-        chunk = next;
-    }
-
-    if (advance) {
-        if (chunk) {
-            chunk->head = head;
-            track->head = chunk;
-        } else {
-            track->head = NULL;
-            track->tail = NULL;
-        }
-
-        track->queued_bytes -= total;
-    }
-
-    return total;
-}
-
-static void DestroyChunkedAudioTrack(void *ctx)
-{
-    SDL_ChunkedAudioTrack *track = (SDL_ChunkedAudioTrack *)ctx;
-    DestroyAudioChunks(track->head);
-    DestroyAudioChunks(track->free_chunks);
-    SDL_free(track);
-}
-
-static SDL_AudioTrack *CreateChunkedAudioTrack(const SDL_AudioSpec *spec, size_t chunk_size)
+void SDL_DestroyAudioQueue(SDL_AudioQueue *queue)
 {
-    SDL_ChunkedAudioTrack *track = (SDL_ChunkedAudioTrack *)SDL_calloc(1, sizeof(*track));
-
-    if (!track) {
-        return NULL;
-    }
-
-    SDL_copyp(&track->track.spec, spec);
-    track->track.avail = AvailChunkedAudioTrack;
-    track->track.write = WriteToChunkedAudioTrack;
-    track->track.read = ReadFromChunkedAudioTrack;
-    track->track.destroy = DestroyChunkedAudioTrack;
+    SDL_ClearAudioQueue(queue);
 
-    track->chunk_size = chunk_size;
+    DestroyMemoryPool(&queue->track_pool);
+    DestroyMemoryPool(&queue->chunk_pool);
+    SDL_aligned_free(queue->history_buffer);
 
-    return &track->track;
+    SDL_free(queue);
 }
 
 SDL_AudioQueue *SDL_CreateAudioQueue(size_t chunk_size)
@@ -278,35 +156,42 @@ SDL_AudioQueue *SDL_CreateAudioQueue(size_t chunk_size)
         return NULL;
     }
 
-    queue->chunk_size = chunk_size;
+    InitMemoryPool(&queue->track_pool, sizeof(SDL_AudioTrack), 8);
+    InitMemoryPool(&queue->chunk_pool, chunk_size, 4);
+
+    if (ReserveMemoryPoolBlocks(&queue->track_pool, 2) != 0) {
+        SDL_DestroyAudioQueue(queue);
+        return NULL;
+    }
 
     return queue;
 }
 
-void SDL_DestroyAudioQueue(SDL_AudioQueue *queue)
+static void DestroyAudioTrack(SDL_AudioQueue *queue, SDL_AudioTrack *track)
 {
-    SDL_ClearAudioQueue(queue);
+    track->callback(track->userdata, track->data, (int)track->capacity);
 
-    SDL_free(queue);
+    FreeMemoryPoolBlock(&queue->track_pool, track);
 }
 
 void SDL_ClearAudioQueue(SDL_AudioQueue *queue)
 {
     SDL_AudioTrack *track = queue->head;
+
     queue->head = NULL;
     queue->tail = NULL;
+    queue->history_length = 0;
 
     while (track) {
         SDL_AudioTrack *next = track->next;
-        track->destroy(track);
+        DestroyAudioTrack(queue, track);
         track = next;
     }
 }
 
-static void SDL_FlushAudioTrack(SDL_AudioTrack *track)
+static void FlushAudioTrack(SDL_AudioTrack *track)
 {
     track->flushed = SDL_TRUE;
-    track->write = NULL;
 }
 
 void SDL_FlushAudioQueue(SDL_AudioQueue *queue)
@@ -314,7 +199,7 @@ void SDL_FlushAudioQueue(SDL_AudioQueue *queue)
     SDL_AudioTrack *track = queue->tail;
 
     if (track) {
-        SDL_FlushAudioTrack(track);
+        FlushAudioTrack(track);
     }
 }
 
@@ -326,7 +211,7 @@ void SDL_PopAudioQueueHead(SDL_AudioQueue *queue)
         SDL_bool flushed = track->flushed;
 
         SDL_AudioTrack *next = track->next;
-        track->destroy(track);
+        DestroyAudioTrack(queue, track);
         track = next;
 
         if (flushed) {
@@ -335,27 +220,59 @@ void SDL_PopAudioQueueHead(SDL_AudioQueue *queue)
     }
 
     queue->head = track;
+    queue->history_length = 0;
 
     if (!track) {
         queue->tail = NULL;
     }
 }
 
-size_t SDL_GetAudioQueueChunkSize(SDL_AudioQueue *queue)
+SDL_AudioTrack *SDL_CreateAudioTrack(
+    SDL_AudioQueue *queue, const SDL_AudioSpec *spec,
+    Uint8 *data, size_t len, size_t capacity,
+    SDL_ReleaseAudioBufferCallback callback, void *userdata)
 {
-    return queue->chunk_size;
+    SDL_AudioTrack *track = AllocMemoryPoolBlock(&queue->track_pool);
+
+    if (!track) {
+        return NULL;
+    }
+
+    SDL_zerop(track);
+    SDL_copyp(&track->spec, spec);
+
+    track->userdata = userdata;
+    track->callback = callback;
+    track->data = data;
+    track->head = 0;
+    track->tail = len;
+    track->capacity = capacity;
+
+    return track;
 }
 
-SDL_AudioTrack *SDL_CreateChunkedAudioTrack(const SDL_AudioSpec *spec, const Uint8 *data, size_t len, size_t chunk_size)
+static void SDLCALL FreeChunkedAudioBuffer(void *userdata, const void *buf, int len)
 {
-    SDL_AudioTrack *track = CreateChunkedAudioTrack(spec, chunk_size);
+    SDL_AudioQueue *queue = userdata;
 
-    if (!track) {
+    FreeMemoryPoolBlock(&queue->chunk_pool, (void *)buf);
+}
+
+static SDL_AudioTrack *CreateChunkedAudioTrack(SDL_AudioQueue *queue, const SDL_AudioSpec *spec)
+{
+    void *chunk = AllocMemoryPoolBlock(&queue->chunk_pool);
+
+    if (!chunk) {
         return NULL;
     }
 
-    if (track->write(track, data, len) != 0) {
-        track->destroy(track);
+    size_t capacity = queue->chunk_pool.block_size;
+    capacity -= capacity % SDL_AUDIO_FRAMESIZE(*spec);
+
+    SDL_AudioTrack *track = SDL_CreateAudioTrack(queue, spec, chunk, 0, capacity, FreeChunkedAudioBuffer, queue);
+
+    if (!track) {
+        FreeMemoryPoolBlock(&queue->chunk_pool, chunk);
         return NULL;
     }
 
@@ -369,7 +286,7 @@ void SDL_AddTrackToAudioQueue(SDL_AudioQueue *queue, SDL_AudioTrack *track)
     if (tail) {
         // If the spec has changed, make sure to flush the previous track
         if (!AUDIO_SPECS_EQUAL(tail->spec, track->spec)) {
-            SDL_FlushAudioTrack(tail);
+            FlushAudioTrack(tail);
         }
 
         tail->next = track;
@@ -380,6 +297,19 @@ void SDL_AddTrackToAudioQueue(SDL_AudioQueue *queue, SDL_AudioTrack *track)
     queue->tail = track;
 }
 
+static size_t WriteToAudioTrack(SDL_AudioTrack *track, const Uint8 *data, size_t len)
+{
+    if (track->flushed || track->tail >= track->capacity) {
+        return 0;
+    }
+
+    len = SDL_min(len, track->capacity - track->tail);
+    SDL_memcpy(&track->data[track->tail], data

(Patch may be truncated, please check the link at the top of this post.)