SDL_mixer: Added support for playing ADPCM music

From 311259b5e7f640b6b48a17f4295511e27092f92d Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Mon, 3 Jul 2023 12:37:15 -0700
Subject: [PATCH] Added support for playing ADPCM music

---
 src/codecs/music_wav.c | 801 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 761 insertions(+), 40 deletions(-)

diff --git a/src/codecs/music_wav.c b/src/codecs/music_wav.c
index 450ace61..01f79ab6 100644
--- a/src/codecs/music_wav.c
+++ b/src/codecs/music_wav.c
@@ -26,6 +26,48 @@
 #include "music_wav.h"
 #include "mp3utils.h"
 
+typedef struct ADPCM_DecoderState
+{
+    Uint32 channels;        /* Number of channels. */
+    size_t blocksize;       /* Size of an ADPCM block in bytes. */
+    size_t blockheadersize; /* Size of an ADPCM block header in bytes. */
+    size_t samplesperblock; /* Number of samples per channel in an ADPCM block. */
+    void *ddata;            /* Decoder data from initialization. */
+    void *cstate;           /* Decoding state for each channel. */
+
+    /* Current ADPCM block */
+    struct
+    {
+        Uint8 *data;
+        size_t size;
+        size_t pos;
+    } block;
+
+    /* Decoded 16-bit PCM data. */
+    struct
+    {
+        Sint16 *data;
+        size_t size;
+        size_t pos;
+        size_t read;
+    } output;
+
+} ADPCM_DecoderState;
+
+typedef struct MS_ADPCM_CoeffData
+{
+    Uint16 coeffcount;
+    Sint16 *coeff;
+    Sint16 aligndummy; /* Has to be last member. */
+} MS_ADPCM_CoeffData;
+
+typedef struct MS_ADPCM_ChannelState
+{
+    Uint16 delta;
+    Sint16 coeff1;
+    Sint16 coeff2;
+} MS_ADPCM_ChannelState;
+
 typedef struct {
     SDL_bool active;
     Uint32 start;
@@ -44,7 +86,9 @@ typedef struct {
     Sint64 stop;
     Sint64 samplesize;
     Uint8 *buffer;
+    size_t buffered;
     SDL_AudioStream *stream;
+    ADPCM_DecoderState adpcm_state;
     unsigned int numloops;
     WAVLoopPoint *loops;
     Mix_MusicMetaTags tags;
@@ -70,12 +114,16 @@ typedef struct {
 #define SMPL        0x6c706d73      /* "smpl" */
 #define LIST        0x5453494c      /* "LIST" */
 #define ID3_        0x20336469      /* "id3 " */
-#define PCM_CODE    1               /* WAVE_FORMAT_PCM */
-#define ADPCM_CODE  2               /* WAVE_FORMAT_ADPCM */
-#define FLOAT_CODE  3               /* WAVE_FORMAT_IEEE_FLOAT */
-#define ALAW_CODE   6               /* WAVE_FORMAT_ALAW */
-#define uLAW_CODE   7               /* WAVE_FORMAT_MULAW */
-#define EXT_CODE    0xFFFE          /* WAVE_FORMAT_EXTENSIBLE */
+#define UNKNOWN_CODE    0x0000
+#define PCM_CODE        0x0001      /* WAVE_FORMAT_PCM */
+#define MS_ADPCM_CODE   0x0002      /* WAVE_FORMAT_ADPCM */
+#define IEEE_FLOAT_CODE 0x0003      /* WAVE_FORMAT_IEEE_FLOAT */
+#define ALAW_CODE       0x0006      /* WAVE_FORMAT_ALAW */
+#define MULAW_CODE      0x0007      /* WAVE_FORMAT_MULAW */
+#define IMA_ADPCM_CODE  0x0011
+#define MPEG_CODE       0x0050
+#define MPEGLAYER3_CODE 0x0055
+#define EXTENSIBLE_CODE 0xFFFE
 #define WAVE_MONO   1
 #define WAVE_STEREO 2
 
@@ -406,6 +454,613 @@ static int fetch_float64le(void *context, int length)
     return length / 2;
 }
 
+static int MS_ADPCM_Init(ADPCM_DecoderState *state, const Uint8 *chunk_data, Uint32 chunk_length)
+{
+    const WaveFMTEx *fmt = (WaveFMTEx *)chunk_data;
+    const Uint16 channels = SDL_SwapLE16(fmt->format.channels);
+    const Uint16 blockalign = SDL_SwapLE16(fmt->format.blockalign);
+    const Uint16 bitspersample = SDL_SwapLE16(fmt->format.bitspersample);
+    const size_t blockheadersize = (size_t)channels * 7;
+    const size_t blockdatasize = (size_t)blockalign - blockheadersize;
+    const size_t blockframebitsize = (size_t)bitspersample * channels;
+    const size_t blockdatasamples = (blockdatasize * 8) / blockframebitsize;
+    const Sint16 presetcoeffs[14] = { 256, 0, 512, -256, 0, 0, 192, 64, 240, 0, 460, -208, 392, -232 };
+    Uint16 cbExtSize, samplesperblock;
+    size_t i, coeffcount;
+    MS_ADPCM_CoeffData *coeffdata;
+
+    /* Sanity checks. */
+
+    /* While it's clear how IMA ADPCM handles more than two channels, the nibble
+     * order of MS ADPCM makes it awkward. The Standards Update does not talk
+     * about supporting more than stereo anyway.
+     */
+    if (channels > 2) {
+        return Mix_SetError("Invalid number of channels");
+    }
+
+    if (bitspersample != 4) {
+        return Mix_SetError("Invalid MS ADPCM bits per sample of %u", (unsigned int)bitspersample);
+    }
+
+    /* The block size must be big enough to contain the block header. */
+    if (blockalign < blockheadersize) {
+        return Mix_SetError("Invalid MS ADPCM block size (nBlockAlign)");
+    }
+
+    /* There are wSamplesPerBlock, wNumCoef, and at least 7 coefficient pairs in
+     * the extended part of the header.
+     */
+    if (chunk_length < 22) {
+        return Mix_SetError("Could not read MS ADPCM format header");
+    }
+
+    cbExtSize = SDL_SwapLE16(fmt->cbSize);
+    samplesperblock = SDL_SwapLE16(fmt->Samples.samplesperblock);
+    /* Number of coefficient pairs. A pair has two 16-bit integers. */
+    coeffcount = chunk_data[20] | ((size_t)chunk_data[21] << 8);
+    /* bPredictor, the integer offset into the coefficients array, is only
+     * 8 bits. It can only address the first 256 coefficients. Let's limit
+     * the count number here.
+     */
+    if (coeffcount > 256) {
+        coeffcount = 256;
+    }
+
+    if (chunk_length < 22 + coeffcount * 4) {
+        return Mix_SetError("Could not read custom coefficients in MS ADPCM format header");
+    } else if (cbExtSize < 4 + coeffcount * 4) {
+        return Mix_SetError("Invalid MS ADPCM format header (too small)");
+    } else if (coeffcount < 7) {
+        return Mix_SetError("Missing required coefficients in MS ADPCM format header");
+    }
+
+    coeffdata = (MS_ADPCM_CoeffData *)SDL_malloc(sizeof(MS_ADPCM_CoeffData) + coeffcount * 4);
+    if (coeffdata == NULL) {
+        return SDL_OutOfMemory();
+    }
+    coeffdata->coeff = &coeffdata->aligndummy;
+    coeffdata->coeffcount = (Uint16)coeffcount;
+    state->ddata = coeffdata; /* Freed in cleanup. */
+
+    /* Copy the 16-bit pairs. */
+    for (i = 0; i < coeffcount * 2; i++) {
+        Sint32 c = chunk_data[22 + i * 2] | ((Sint32)chunk_data[23 + i * 2] << 8);
+        if (c >= 0x8000) {
+            c -= 0x10000;
+        }
+        if (i < 14 && c != presetcoeffs[i]) {
+            return Mix_SetError("Wrong preset coefficients in MS ADPCM format header");
+        }
+        coeffdata->coeff[i] = (Sint16)c;
+    }
+
+    /* Technically, wSamplesPerBlock is required, but we have all the
+     * information in the other fields to calculate it, if it's zero.
+     */
+    if (samplesperblock == 0) {
+        /* Let's be nice to the encoders that didn't know how to fill this.
+         * The Standards Update calculates it this way:
+         *
+         *   x = Block size (in bits) minus header size (in bits)
+         *   y = Bit depth multiplied by channel count
+         *   z = Number of samples per channel in block header
+         *   wSamplesPerBlock = x / y + z
+         */
+        samplesperblock = (Uint32)blockdatasamples + 2;
+    }
+
+    /* nBlockAlign can be in conflict with wSamplesPerBlock. For example, if
+     * the number of samples doesn't fit into the block. The Standards Update
+     * also describes wSamplesPerBlock with a formula that makes it necessary to
+     * always fill the block with the maximum amount of samples, but this is not
+     * enforced here as there are no compatibility issues.
+     * A truncated block header with just one sample is not supported.
+     */
+    if (samplesperblock == 1 || blockdatasamples < (size_t)(samplesperblock - 2)) {
+        return Mix_SetError("Invalid number of samples per MS ADPCM block (wSamplesPerBlock)");
+    }
+
+    state->blocksize = blockalign;
+    state->channels = channels;
+    state->blockheadersize = blockheadersize;
+    state->samplesperblock = samplesperblock;
+
+    state->cstate = SDL_calloc(channels, sizeof(MS_ADPCM_ChannelState));
+    if (!state->cstate) {
+        return SDL_OutOfMemory();
+    }
+
+    state->block.pos = 0;
+    state->block.size = blockalign;
+    state->block.data = (Uint8 *)SDL_malloc(state->block.size);
+    if (!state->block.data) {
+        return SDL_OutOfMemory();
+    }
+
+    state->output.read = 0;
+    state->output.pos = 0;
+    state->output.size = state->samplesperblock;
+    state->output.data = (Sint16 *)SDL_malloc(state->output.size * sizeof(Sint16));
+    if (!state->output.data) {
+        return SDL_OutOfMemory();
+    }
+
+    return 0;
+}
+
+static Sint16 MS_ADPCM_ProcessNibble(MS_ADPCM_ChannelState *cstate, Sint32 sample1, Sint32 sample2, Uint8 nybble)
+{
+    const Sint32 max_audioval = 32767;
+    const Sint32 min_audioval = -32768;
+    const Uint16 max_deltaval = 65535;
+    const Uint16 adaptive[] = {
+        230, 230, 230, 230, 307, 409, 512, 614,
+        768, 614, 512, 409, 307, 230, 230, 230
+    };
+    Sint32 new_sample;
+    Sint32 errordelta;
+    Uint32 delta = cstate->delta;
+
+    new_sample = (sample1 * cstate->coeff1 + sample2 * cstate->coeff2) / 256;
+    /* The nibble is a signed 4-bit error delta. */
+    errordelta = (Sint32)nybble - (nybble >= 0x08 ? 0x10 : 0);
+    new_sample += (Sint32)delta * errordelta;
+    if (new_sample < min_audioval) {
+        new_sample = min_audioval;
+    } else if (new_sample > max_audioval) {
+        new_sample = max_audioval;
+    }
+    delta = (delta * adaptive[nybble]) / 256;
+    if (delta < 16) {
+        delta = 16;
+    } else if (delta > max_deltaval) {
+        /* This issue is not described in the Standards Update and therefore
+         * undefined. It seems sensible to prevent overflows with a limit.
+         */
+        delta = max_deltaval;
+    }
+
+    cstate->delta = (Uint16)delta;
+    return (Sint16)new_sample;
+}
+
+static int MS_ADPCM_DecodeBlockHeader(ADPCM_DecoderState *state)
+{
+    Uint8 coeffindex;
+    const Uint32 channels = state->channels;
+    Sint32 sample;
+    Uint32 c;
+    MS_ADPCM_ChannelState *cstate = (MS_ADPCM_ChannelState *)state->cstate;
+    MS_ADPCM_CoeffData *ddata = (MS_ADPCM_CoeffData *)state->ddata;
+
+    if (state->block.size < state->blockheadersize) {
+        return Mix_SetError("Invalid ADPCM header");
+    }
+
+    for (c = 0; c < channels; c++) {
+        size_t o = c;
+
+        /* Load the coefficient pair into the channel state. */
+        coeffindex = state->block.data[o];
+        if (coeffindex > ddata->coeffcount) {
+            return Mix_SetError("Invalid MS ADPCM coefficient index in block header");
+        }
+        cstate[c].coeff1 = ddata->coeff[coeffindex * 2];
+        cstate[c].coeff2 = ddata->coeff[coeffindex * 2 + 1];
+
+        /* Initial delta value. */
+        o = (size_t)channels + c * 2;
+        cstate[c].delta = state->block.data[o] | ((Uint16)state->block.data[o + 1] << 8);
+
+        /* Load the samples from the header. Interestingly, the sample later in
+         * the output stream comes first.
+         */
+        o = (size_t)channels * 3 + c * 2;
+        sample = state->block.data[o] | ((Sint32)state->block.data[o + 1] << 8);
+        if (sample >= 0x8000) {
+            sample -= 0x10000;
+        }
+        state->output.data[state->output.pos + channels] = (Sint16)sample;
+
+        o = (size_t)channels * 5 + c * 2;
+        sample = state->block.data[o] | ((Sint32)state->block.data[o + 1] << 8);
+        if (sample >= 0x8000) {
+            sample -= 0x10000;
+        }
+        state->output.data[state->output.pos] = (Sint16)sample;
+
+        state->output.pos++;
+    }
+
+    state->block.pos += state->blockheadersize;
+
+    /* Skip second sample frame that came from the header. */
+    state->output.pos += state->channels;
+
+    return 0;
+}
+
+/* Decodes the data of the MS ADPCM block. Decoding will stop if a block is too
+ * short, returning with none or partially decoded data. The partial data
+ * will always contain full sample frames (same sample count for each channel).
+ * Incomplete sample frames are discarded.
+ */
+static int MS_ADPCM_DecodeBlockData(ADPCM_DecoderState *state)
+{
+    Uint16 nybble = 0;
+    Sint16 sample1, sample2;
+    const Uint32 channels = state->channels;
+    Uint32 c;
+    MS_ADPCM_ChannelState *cstate = (MS_ADPCM_ChannelState *)state->cstate;
+
+    size_t blockpos = state->block.pos;
+    size_t blocksize = state->block.size;
+
+    size_t outpos = state->output.pos;
+
+    size_t blockframesleft = state->samplesperblock - 2;
+
+    while (blockframesleft > 0) {
+        for (c = 0; c < channels; c++) {
+            if (nybble & 0x4000) {
+                nybble <<= 4;
+            } else if (blockpos < blocksize) {
+                nybble = state->block.data[blockpos++] | 0x4000;
+            } else {
+                /* Out of input data. Drop the incomplete frame and return. */
+                state->output.pos = outpos - c;
+                return -1;
+            }
+
+            /* Load previous samples which may come from the block header. */
+            sample1 = state->output.data[outpos - channels];
+            sample2 = state->output.data[outpos - channels * 2];
+
+            sample1 = MS_ADPCM_ProcessNibble(cstate + c, sample1, sample2, (nybble >> 4) & 0x0f);
+            state->output.data[outpos++] = sample1;
+        }
+
+        blockframesleft--;
+    }
+
+    state->output.pos = outpos;
+
+    return 0;
+}
+
+static int IMA_ADPCM_Init(ADPCM_DecoderState *state, const Uint8 *chunk_data, Uint32 chunk_length)
+{
+    const WaveFMTEx *fmt = (WaveFMTEx *)chunk_data;
+    const Uint16 formattag = SDL_SwapLE16(fmt->format.encoding);
+    const Uint16 channels = SDL_SwapLE16(fmt->format.channels);
+    const Uint16 blockalign = SDL_SwapLE16(fmt->format.blockalign);
+    const Uint16 bitspersample = SDL_SwapLE16(fmt->format.bitspersample);
+    const size_t blockheadersize = (size_t)channels * 4;
+    const size_t blockdatasize = (size_t)blockalign - blockheadersize;
+    const size_t blockframebitsize = (size_t)bitspersample * channels;
+    const size_t blockdatasamples = (blockdatasize * 8) / blockframebitsize;
+    Uint16 samplesperblock = 0;
+
+    /* Sanity checks. */
+
+    /* IMA ADPCM can also have 3-bit samples, but it's not supported by SDL at this time. */
+    if (bitspersample == 3) {
+        return Mix_SetError("3-bit IMA ADPCM currently not supported");
+    } else if (bitspersample != 4) {
+        return Mix_SetError("Invalid IMA ADPCM bits per sample of %u", (unsigned int)bitspersample);
+    }
+
+    /* The block size is required to be a multiple of 4 and it must be able to
+     * hold a block header.
+     */
+    if (blockalign < blockheadersize || blockalign % 4) {
+        return Mix_SetError("Invalid IMA ADPCM block size (nBlockAlign)");
+    }
+
+    if (formattag == EXTENSIBLE_CODE) {
+        /* There's no specification for this, but it's basically the same
+         * format because the extensible header has wSampePerBlocks too.
+         */
+    } else if (chunk_length >= 20) {
+        Uint16 cbExtSize = SDL_SwapLE16(fmt->cbSize);
+        if (cbExtSize >= 2) {
+            samplesperblock = SDL_SwapLE16(fmt->Samples.samplesperblock);
+        }
+    }
+
+    if (samplesperblock == 0) {
+        /* Field zero? No problem. We just assume the encoder packed the block.
+         * The specification calculates it this way:
+         *
+         *   x = Block size (in bits) minus header size (in bits)
+         *   y = Bit depth multiplied by channel count
+         *   z = Number of samples per channel in header
+         *   wSamplesPerBlock = x / y + z
+         */
+        samplesperblock = (Uint32)blockdatasamples + 1;
+    }
+
+    /* nBlockAlign can be in conflict with wSamplesPerBlock. For example, if
+     * the number of samples doesn't fit into the block. The Standards Update
+     * also describes wSamplesPerBlock with a formula that makes it necessary
+     * to always fill the block with the maximum amount of samples, but this is
+     * not enforced here as there are no compatibility issues.
+     */
+    if (blockdatasamples < (size_t)(samplesperblock - 1)) {
+        return Mix_SetError("Invalid number of samples per IMA ADPCM block (wSamplesPerBlock)");
+    }
+
+    state->blocksize = blockalign;
+    state->channels = channels;
+    state->blockheadersize = blockheadersize;
+    state->samplesperblock = samplesperblock;
+
+    state->cstate = SDL_calloc(channels, sizeof(Sint8));
+    if (!state->cstate) {
+        return SDL_OutOfMemory();
+    }
+
+    state->block.pos = 0;
+    state->block.size = blockalign;
+    state->block.data = (Uint8 *)SDL_malloc(state->block.size);
+    if (!state->block.data) {
+        return SDL_OutOfMemory();
+    }
+
+    state->output.read = 0;
+    state->output.pos = 0;
+    state->output.size = state->samplesperblock;
+    state->output.data = (Sint16 *)SDL_malloc(state->output.size * sizeof(Sint16));
+    if (!state->output.data) {
+        return SDL_OutOfMemory();
+    }
+
+    return 0;
+}
+
+static Sint16 IMA_ADPCM_ProcessNibble(Sint8 *cindex, Sint16 lastsample, Uint8 nybble)
+{
+    const Sint32 max_audioval = 32767;
+    const Sint32 min_audioval = -32768;
+    const Sint8 index_table_4b[16] = {
+        -1, -1, -1, -1,
+        2, 4, 6, 8,
+        -1, -1, -1, -1,
+        2, 4, 6, 8
+    };
+    const Uint16 step_table[89] = {
+        7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 25, 28, 31,
+        34, 37, 41, 45, 50, 55, 60, 66, 73, 80, 88, 97, 107, 118, 130,
+        143, 157, 173, 190, 209, 230, 253, 279, 307, 337, 371, 408,
+        449, 494, 544, 598, 658, 724, 796, 876, 963, 1060, 1166, 1282,
+        1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327,
+        3660, 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630,
+        9493, 10442, 11487, 12635, 13899, 15289, 16818, 18500, 20350,
+        22385, 24623, 27086, 29794, 32767
+    };
+    Uint32 step;
+    Sint32 sample, delta;
+    Sint8 index = *cindex;
+
+    /* Clamp index into valid range. */
+    if (index > 88) {
+        index = 88;
+    } else if (index < 0) {
+        index = 0;
+    }
+
+    /* explicit cast to avoid gcc warning about using 'char' as array index */
+    step = step_table[(size_t)index];
+
+    /* Update index value */
+    *cindex = index + index_table_4b[nybble];
+
+    /* This calculation uses shifts and additions because multiplications were
+     * much slower back then. Sadly, this can't just be replaced with an actual
+     * multiplication now as the old algorithm drops some bits. The closest
+     * approximation I could find is something like this:
+     * (nybble & 0x8 ? -1 : 1) * ((nybble & 0x7) * step / 4 + step / 8)
+     */
+    delta = step >> 3;
+    if (nybble & 0x04) {
+        delta += step;
+    }
+    if (nybble & 0x02) {
+        delta += step >> 1;
+    }
+    if (nybble & 0x01) {
+        delta += step >> 2;
+    }
+    if (nybble & 0x08) {
+        delta = -delta;
+    }
+
+    sample = lastsample + delta;
+
+    /* Clamp output sample */
+    if (sample > max_audioval) {
+        sample = max_audioval;
+    } else if (sample < min_audioval) {
+        sample = min_audioval;
+    }
+
+    return (Sint16)sample;
+}
+
+static int IMA_ADPCM_DecodeBlockHeader(ADPCM_DecoderState *state)
+{
+    Sint16 step;
+    Uint32 c;
+    Uint8 *cstate = (Uint8 *)state->cstate;
+
+    if (state->block.size < state->blockheadersize) {
+        return Mix_SetError("Invalid ADPCM header");
+    }
+
+    for (c = 0; c < state->channels; c++) {
+        size_t o = state->block.pos + c * 4;
+
+        /* Extract the sample from the header. */
+        Sint32 sample = state->block.data[o] | ((Sint32)state->block.data[o + 1] << 8);
+        if (sample >= 0x8000) {
+            sample -= 0x10000;
+        }
+        state->output.data[state->output.pos++] = (Sint16)sample;
+
+        /* Channel step index. */
+        step = (Sint16)state->block.data[o + 2];
+        cstate[c] = (Sint8)(step > 0x80 ? step - 0x100 : step);
+
+        /* Reserved byte in block header, should be 0. */
+        if (state->block.data[o + 3] != 0) {
+            /* Uh oh, corrupt data?  Buggy code? */;
+        }
+    }
+
+    state->block.pos += state->blockheadersize;
+
+    return 0;
+}
+
+/* Decodes the data of the IMA ADPCM block. Decoding will stop if a block is too
+ * short, returning with none or partially decoded data. The partial data always
+ * contains full sample frames (same sample count for each channel).
+ * Incomplete sample frames are discarded.
+ */
+static int IMA_ADPCM_DecodeBlockData(ADPCM_DecoderState *state)
+{
+    size_t i;
+    int retval = 0;
+    const Uint32 channels = state->channels;
+    const size_t subblockframesize = (size_t)channels * 4;
+    Uint64 bytesrequired;
+    Uint32 c;
+
+    size_t blockpos = state->block.pos;
+    size_t blocksize = state->block.size;
+    size_t blockleft = blocksize - blockpos;
+
+    size_t outpos = state->output.pos;
+
+    Sint64 blockframesleft = state->samplesperblock - 1;
+
+    bytesrequired = (blockframesleft + 7) / 8 * subblockframesize;
+    if (blockleft < bytesrequired) {
+        /* Data truncated. Calculate how many samples we can get out if it. */
+        const size_t guaranteedframes = blockleft / subblockframesize;
+        const size_t remainingbytes = blockleft % subblockframesize;
+        blockframesleft = guaranteedframes;
+        if (remainingbytes > subblockframesize - 4) {
+            blockframesleft += (remainingbytes % 4) * 2;
+        }
+        /* Signal the truncation. */
+        retval = -1;
+    }
+
+    /* Each channel has their nibbles packed into 32-bit blocks. These blocks
+     * are interleaved and make up the data part of the ADPCM block. This loop
+     * decodes the samples as they come from the input data and puts them at
+     * the appropriate places in the output data.
+     */
+    while (blockframesleft > 0) {
+        const size_t subblocksamples = blockframesleft < 8 ? (size_t)blockframesleft : 8;
+
+        for (c = 0; c < channels; c++) {
+            Uint8 nybble = 0;
+            /* Load previous sample which may come from the block header. */
+            Sint16 sample = state->output.data[outpos + c - channels];
+
+            for (i = 0; i < subblocksamples; i++) {
+                if (i & 1) {
+                    nybble >>= 4;
+                } else {
+                    nybble = state->block.data[blockpos++];
+                }
+
+                sample = IMA_ADPCM_ProcessNibble((Sint8 *)state->cstate + c, sample, nybble & 0x0f);
+                state->output.data[outpos + c + i * channels] = sample;
+            }
+        }
+
+        outpos += channels * subblocksamples;
+        blockframesleft -= subblocksamples;
+    }
+
+    state->block.pos = blockpos;
+    state->output.pos = outpos;
+
+    return retval;
+}
+
+static void ADPCM_Cleanup(ADPCM_DecoderState *state)
+{
+    if (state->ddata) {
+        SDL_free(state->ddata);
+        state->ddata = NULL;
+    }
+    if (state->cstate) {
+        SDL_free(state->cstate);
+        state->cstate = NULL;
+    }
+    if (state->block.data) {
+        SDL_free(state->block.data);
+        SDL_zero(state->block);
+    }
+    if (state->output.data) {
+        SDL_free(state->output.data);
+        SDL_zero(state->output);
+    }
+}
+
+static int fetch_adpcm(void *context, int length, int (*DecodeBlockHeader)(ADPCM_DecoderState *state), int (*DecodeBlockData)(ADPCM_DecoderState *state))
+{
+    WAV_Music *music = (WAV_Music *)context;
+    ADPCM_DecoderState *state = &music->adpcm_state;
+    size_t len, left = (size_t)length;
+    Uint8 *dst = music->buffer;
+
+    while (left > 0) {
+        if (state->output.read == state->output.pos) {
+            Sint64 bytesread = SDL_RWread(music->src, state->block.data, state->blocksize);
+            if (bytesread < 0) {
+                return -1;
+            }
+
+            state->block.size = (size_t)bytesread < state->blocksize ? (size_t)bytesread : state->blocksize;
+            state->block.pos = 0;
+            state->output.pos = 0;
+            state->output.read = 0;
+
+            if (DecodeBlockHeader(state) < 0) {
+                return -1;
+            }
+
+            if (DecodeBlockData(state) < 0) {
+                return -1;
+            }
+        }
+        len = SDL_min(left, (state->output.pos - state->output.read) * sizeof(Sint16));
+        SDL_memcpy(dst, &state->output.data[state->output.read], len);
+        state->output.read += (len / sizeof(Sint16));
+        dst += len;
+        left -= len;
+    }
+    music->buffered = (state->output.pos - state->output.read) * sizeof(Sint16);
+
+    return length;
+}
+
+static int fetch_ms_adpcm(void *context, int length)
+{
+    return fetch_adpcm(context, length, MS_ADPCM_DecodeBlockHeader, MS_ADPCM_DecodeBlockData);
+}
+
+static int fetch_ima_adpcm(void *context, int length)
+{
+    return fetch_adpcm(context, length, IMA_ADPCM_DecodeBlockHeader, IMA_ADPCM_DecodeBlockData);
+}
+
 /*
     G711 decode tables taken from SDL (src/audio/SDL_wave.c)
 */
@@ -518,6 +1173,11 @@ static int fetch_alaw(void *context, int length)
     return fetch_xlaw(ALAW_To_PCM16, context, length);
 }
 
+static Sint64 WAV_Position(WAV_Music *music)
+{
+    return SDL_RWtell(music->src) - music->buffered;
+}
+
 /* Play some of a stream previously started with WAV_Play() */
 static int WAV_GetSome(void *context, void *data, int bytes, SDL_bool *done)
 {
@@ -542,7 +1202,7 @@ static int WAV_GetSome(void *context, void *data, int bytes, SDL_bool *done)
         return 0;
     }
 
-    pos = SDL_RWtell(music->src);
+    pos = WAV_Position(music);
     stop = music->stop;
     loop = NULL;
     for (i = 0; i < music->numloops; ++i) {
@@ -575,7 +1235,7 @@ static int WAV_GetSome(void *context, void *data, int bytes, SDL_bool *done)
         at_end = SDL_TRUE;
     }
 
-    if (loop && SDL_RWtell(music->src) >= stop) {
+    if (loop && WAV_Position(music) >= stop) {
         if (loop->current_play_count == 1) {
             loop->active = SDL_FALSE;
         } else {
@@ -588,7 +1248,7 @@ static int WAV_GetSome(void *context, void *data, int bytes, SDL_bool *done)
         }
     }
 
-    if (!looped && (at_end || SDL_RWtell(music->src) >= music->stop)) {
+    if (!looped && (at_end || WAV_Position(music) >= music->stop)) {
         if (music->play_count == 1) {
             music->play_count = 0;
             SDL_FlushAudioStream(music->stream);
@@ -616,30 +1276,63 @@ static int WAV_GetAudio(void *context, void *data, int bytes)
 static int WAV_Seek(void *context, double position)
 {
     WAV_Music *music = (WAV_Music *)context;
-    Sint64 sample_size = music->spec.freq * music->samplesize;
-    Sint64 dest_offset = (Sint64)(position * (double)music->spec.freq * music->samplesize);
-    Sint64 destpos = music->start + dest_offset;
-    destpos -= dest_offset % sample_size;
-    if (destpos > music->stop)
-        return -1;
-    if (SDL_RWseek(music->src, destpos, SDL_RW_SEEK_SET) < 0)
-        return -1;
+    Sint64 destpos;
+    if (music->encoding == MS_ADPCM_CODE || music->encoding == IMA_ADPCM_CODE) {
+        Sint64 dest_offset = (Sint64)(position * music->spec.freq * ((double)music->adpcm_state.blocksize / music->adpcm_state.samplesperblock));
+        int remainder = (int)(dest_offset % music->adpcm_state.blocksize);
+        dest_offset -= remainder;
+        destpos = music->start + dest_offset;
+        if (destpos > music->stop) {
+            return -1;
+        }
+        if (SDL_RWseek(music->src, destpos, SDL_RW_SEEK_SET) < 0) {
+            return -1;
+        }
+
+        music->buffered = 0;
+        music->adpcm_state.output.read = music->adpcm_state.output.pos;
+        if (remainder > 0) {
+            music->decode(music, remainder);
+        }
+    } else {
+        Sint64 sample_size = music->spec.freq * music->samplesize;
+        Sint64 dest_offset = (Sint64)(position * music->spec.freq * music->samplesize);
+        destpos = music->start + dest_offset;
+        destpos -= dest_offset % sample_size;
+        if (destpos > music->stop) {
+            return -1;
+        }
+        if (SDL_RWseek(music->src, destpos, SDL_RW_SEEK_SET) < 0) {
+            return -1;
+        }
+    }
     return 0;
 }
 
 static double WAV_Tell(void *context)
 {
     WAV_Music *music = (WAV_Music *)context;
-    Sint64 phys_pos = SDL_RWtell(music->src);
-    return (double)(phys_pos - music->start) / (double)(music->spec.freq * music->samplesize);
+    Sint64 byte_pos = WAV_Position(music) - music->start;
+    Sint64 sample_pos;
+    if (music->encoding == MS_ADPCM_CODE || music->encoding == IMA_ADPCM_CODE) {
+        sample_pos = ((byte_pos * music->adpcm_state.samplesperblock) / music->adpcm_state.blocksize);
+    } else {
+        sample_pos = byte_pos / music->samplesize;
+    }
+    return (double)sample_pos / music->spec.freq;
 }
 
 /* Return music duration in seconds */
 static double WAV_Duration(void *context)
 {
     WAV_Music *music = (WAV_Music *)context;
-    Sint64 sample_size = music->spec.freq * music->samplesize;
-    return (double)(music->stop - music->start) / sample_size;
+    Sint64 samples;
+    if (music->encoding == MS_ADPCM_CODE || music->encoding == IMA_ADPCM_CODE) {
+        samples = (((music->stop - music->start) * music->adpcm_state.samplesperblock) / music->adpcm_state.blocksize);
+    } else {
+        samples = (music->stop - music->start) / music->samplesize;
+    }
+    return (double)samples / music->spec.freq;
 }
 
 static const char* WAV_GetMetaTag(void *context, Mix_MusicMetaTag tag_type)
@@ -667,6 +1360,7 @@ static void WAV_Delete(void *context)
     if (music->freesrc) {
         SDL_RWclose(music->src);
     }
+    ADPCM_Cleanup(&music->adpcm_state);
     SDL_free(music);
 }
 
@@ -677,29 +1371,35 @@ static SDL_bool ParseFMT(WAV_Music *wave, Uint32 chunk_length)
     size_t size;
     int bits;
     Sint64 nbread;
+    Uint8 *chunk;
 
     if (chunk_length < sizeof(fmt.format)) {
         Mix_SetError("Wave format chunk too small");
         return SDL_FALSE;
     }
 
-    size = (chunk_length >= sizeof(fmt)) ? sizeof(fmt) : sizeof(fmt.format);
-    nbread = SDL_RWread(wave->src, &fmt, size);
-    if (nbread < 0 || (Uint64)nbread != (Uint64)size) {
-        Mix_SetError("Couldn't read %d bytes from WAV file", chunk_length);
+    chunk = (Uint8 *)SDL_malloc(chunk_length);
+    if (!chunk) {
+        Mix_SetError("Out of memory");
         return SDL_FALSE;
     }
-    chunk_length = (Uint32)(chunk_length - size);
-    if (chunk_length != 0 && SDL_RWseek(wave->src, chunk_length, SDL_RW_SEEK_CUR) < 0) {
+
+    nbread = SDL_RWread(wave->src, chunk, chunk_length);
+    if (nbread < 0 || (Uint64)nbread != (Uint64)chunk_length) {
         Mix_SetError("Couldn't read %d bytes from WAV file", chunk_length);
+        SDL_free(chunk);
         return SDL_FALSE;
     }
+    size = (chunk_length >= sizeof(fmt)) ? sizeof(fmt) : sizeof(fmt.format);
+    SDL_zero(fmt);
+    SDL_memcpy(&fmt, chunk, size);
 
     wave->encoding = SDL_SwapLE16(fmt.format.encoding);
 
-    if (wave->encoding == EXT_CODE) {
+    if (wave->encoding == EXTENSIBLE_CODE) {
         if (size < sizeof(fmt)) {
             Mix_SetError("Wave format chunk too small");
+            SDL_free(chunk);
             return SDL_FALSE;
         }
         wave->encoding = (Uint16)SDL_SwapLE32(fmt.subencoding);
@@ -708,31 +1408,52 @@ static SDL_bool ParseFMT(WAV_Music *wave, Uint32 chunk_length)
     /* Decode the audio data format */
     switch 

(Patch may be truncated, please check the link at the top of this post.)