SDL_mixer: stb_vorbis: Use a buffer to cache SDL_RWops reads (24034)

From 2403465b5af6a5f9d21106fa038f3aefe5fc3438 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorbj=C3=B8rn=20Lindeijer?= <[EMAIL REDACTED]>
Date: Fri, 21 Feb 2025 11:20:28 +0100
Subject: [PATCH] stb_vorbis: Use a buffer to cache SDL_RWops reads

Requesting the data one byte at a time can be prohibitively expensive,
especially when the SDL_RWops isn't backed by constant memory but routed
through PhysicsFS for example.

This change introduces a 2kb buffer, which feels small enough to not add
too much overhead and is the same size as used by libvorbisfile. In my
measurements, using a larger buffer no longer made a significant
difference when starting to play an OGG file.

Testing with an uncompressed OGG file read from zip file, timing the
call to Mix_LoadMUS_RW:

* No buffer: ~ 47 ms
* 10 bytes: ~ 5.2 ms
* 100 bytes: ~ 1.1 ms
* 1 kb: ~ 0.62 ms
* 2 kb: ~ 0.6 ms
* 8 kb: ~ 0.58 ms

When using the libvorbisfile backend it takes about 0.2 ms.

(cherry picked from commit da1704a70ca94a866f87320f9e8126dd9f203e7d)
---
 src/codecs/stb_vorbis/stb_vorbis.h | 75 ++++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/src/codecs/stb_vorbis/stb_vorbis.h b/src/codecs/stb_vorbis/stb_vorbis.h
index c159a7c3..4b3c4900 100644
--- a/src/codecs/stb_vorbis/stb_vorbis.h
+++ b/src/codecs/stb_vorbis/stb_vorbis.h
@@ -308,6 +308,7 @@ extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_cl
 #ifdef STB_VORBIS_SDL
 extern stb_vorbis * stb_vorbis_open_rwops_section(SDL_RWops *rwops, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length);
 extern stb_vorbis * stb_vorbis_open_rwops(SDL_RWops *rwops, int close_on_free, int *error, const stb_vorbis_alloc *alloc);
+#define RWOPS_BUFFER_SIZE 2048
 #endif
 
 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
@@ -836,6 +837,10 @@ struct stb_vorbis
 #ifdef STB_VORBIS_SDL
    SDL_RWops *rwops;
    uint32 rwops_start;
+   uint32 rwops_virtual_pos;
+   uint32 rwops_buffer_pos;
+   uint32 rwops_buffer_fill;
+   uint8 rwops_buffer[RWOPS_BUFFER_SIZE];
    int close_on_free;
 #endif
 
@@ -1400,9 +1405,13 @@ static int STBV_CDECL point_compare(const void *p, const void *q)
 static uint8 get8(vorb *z)
 {
    #ifdef STB_VORBIS_SDL
-   uint8 c;
-   if (SDL_RWread(z->rwops, &c, 1, 1) != 1) { z->eof = TRUE; return 0; }
-   return c;
+   if (z->rwops_buffer_pos >= z->rwops_buffer_fill) {
+      z->rwops_buffer_fill = SDL_RWread(z->rwops, z->rwops_buffer, 1, RWOPS_BUFFER_SIZE);
+      z->rwops_buffer_pos = 0;
+      if (z->rwops_buffer_fill == 0) { z->eof = TRUE; return 0; }
+   }
+   z->rwops_virtual_pos++;
+   return z->rwops_buffer[z->rwops_buffer_pos++];
 
    #else
    if (USE_MEMORY(z)) {
@@ -1433,9 +1442,28 @@ static uint32 get32(vorb *f)
 static int getn(vorb *z, uint8 *data, int n)
 {
    #ifdef STB_VORBIS_SDL
-   if (SDL_RWread(z->rwops, data, n, 1) == 1) return 1;
-   z->eof = 1;
-   return 0;
+   while (n > 0) {
+      int chunk;
+
+      if (z->rwops_buffer_pos >= z->rwops_buffer_fill) {
+         z->rwops_buffer_fill = SDL_RWread(z->rwops, z->rwops_buffer, 1, RWOPS_BUFFER_SIZE);
+         z->rwops_buffer_pos = 0;
+         if (z->rwops_buffer_fill == 0) {
+            z->eof = 1;
+            return 0;
+         }
+      }
+
+      chunk = z->rwops_buffer_fill - z->rwops_buffer_pos;
+      if (chunk > n) chunk = n;
+
+      memcpy(data, z->rwops_buffer + z->rwops_buffer_pos, chunk);
+      z->rwops_buffer_pos += chunk;
+      z->rwops_virtual_pos += chunk;
+      data += chunk;
+      n -= chunk;
+   }
+   return 1;
 
    #else
    if (USE_MEMORY(z)) {
@@ -1456,11 +1484,12 @@ static int getn(vorb *z, uint8 *data, int n)
    #endif
 }
 
+static int set_file_offset(stb_vorbis *f, unsigned int loc);
+
 static void skip(vorb *z, int n)
 {
    #ifdef STB_VORBIS_SDL
-   SDL_RWseek(z->rwops, n, RW_SEEK_CUR);
-
+   set_file_offset(z, z->rwops_virtual_pos + n);
    #else
    if (USE_MEMORY(z)) {
       z->stream += n;
@@ -1485,17 +1514,31 @@ static int set_file_offset(stb_vorbis *f, unsigned int loc)
    f->eof = 0;
 
    #ifdef STB_VORBIS_SDL
-   if (loc + f->rwops_start < loc || loc >= 0x80000000) {
-      loc = 0x7fffffff;
+ { unsigned int rwops_pos;
+   uint32 buffer_start = f->rwops_virtual_pos - f->rwops_buffer_pos;
+   uint32 buffer_end = buffer_start + f->rwops_buffer_fill;
+   f->rwops_virtual_pos = loc;
+
+   // Move within buffer if possible
+   if (loc >= buffer_start && loc < buffer_end)
+   {
+      f->rwops_buffer_pos = loc - buffer_start;
+      return 1;
+   }
+
+   rwops_pos = loc + f->rwops_start;
+   if (rwops_pos < loc || loc >= 0x80000000) {
+      rwops_pos = 0x7fffffff;
       f->eof = 1;
-   } else {
-      loc += f->rwops_start;
    }
-   if (SDL_RWseek(f->rwops, loc, RW_SEEK_SET) != -1)
+
+   f->rwops_buffer_pos = f->rwops_buffer_fill = 0;  // Invalidate buffer
+   if (SDL_RWseek(f->rwops, rwops_pos, RW_SEEK_SET) != -1)
       return 1;
    f->eof = 1;
    SDL_RWseek(f->rwops, f->rwops_start, RW_SEEK_END);
    return 0;
+ }
 
    #else
    if (USE_MEMORY(f)) {
@@ -4440,6 +4483,10 @@ static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
    #ifdef STB_VORBIS_SDL
    p->close_on_free = FALSE;
    p->rwops = NULL;
+   p->rwops_start = 0;
+   p->rwops_virtual_pos = 0;
+   p->rwops_buffer_pos = 0;
+   p->rwops_buffer_fill = 0;
    #endif
    #ifndef STB_VORBIS_NO_STDIO
    p->close_on_free = FALSE;
@@ -4711,7 +4758,7 @@ unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
    if (f->push_mode) return 0;
    #endif
    #ifdef STB_VORBIS_SDL
-   return (unsigned int) (SDL_RWtell(f->rwops) - f->rwops_start);
+   return f->rwops_virtual_pos;
    #else
    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
    #endif