SDL: camera: Fixed surface formats, etc, for Emscripten backend.

From 725af6ad16a91591ce1a950b6575ac3b69b9afc8 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Fri, 28 Feb 2025 16:12:37 -0500
Subject: [PATCH] camera: Fixed surface formats, etc, for Emscripten backend.

Fixes #12374.
---
 src/camera/SDL_camera.c                       | 186 +++++++++++-------
 src/camera/SDL_syscamera.h                    |   4 +
 src/camera/emscripten/SDL_camera_emscripten.c |  85 ++++----
 3 files changed, 163 insertions(+), 112 deletions(-)

diff --git a/src/camera/SDL_camera.c b/src/camera/SDL_camera.c
index bc723c70edaae..b73074fb6974d 100644
--- a/src/camera/SDL_camera.c
+++ b/src/camera/SDL_camera.c
@@ -956,6 +956,112 @@ static int SDLCALL CameraThread(void *devicep)
     return 0;
 }
 
+bool SDL_PrepareCameraSurfaces(SDL_Camera *device)
+{
+    SDL_CameraSpec *appspec = &device->spec;           // the app wants this format.
+    const SDL_CameraSpec *devspec = &device->actual_spec;  // the hardware is set to this format.
+
+    SDL_assert(device->acquire_surface == NULL);   // shouldn't call this function twice on an opened camera!
+    SDL_assert(devspec->format != SDL_PIXELFORMAT_UNKNOWN);  // fix the backend, it should have an actual format by now.
+    SDL_assert(devspec->width >= 0);  // fix the backend, it should have an actual format by now.
+    SDL_assert(devspec->height >= 0);  // fix the backend, it should have an actual format by now.
+
+    if (appspec->width <= 0 || appspec->height <= 0) {
+        appspec->width = devspec->width;
+        appspec->height = devspec->height;
+    }
+
+    if (appspec->format == SDL_PIXELFORMAT_UNKNOWN) {
+        appspec->format = devspec->format;
+    }
+
+    if (appspec->framerate_denominator == 0) {
+        appspec->framerate_numerator = devspec->framerate_numerator;
+        appspec->framerate_denominator = devspec->framerate_denominator;
+    }
+
+    if ((devspec->width == appspec->width) && (devspec->height == appspec->height)) {
+        device->needs_scaling = 0;
+    } else {
+        const Uint64 srcarea = ((Uint64) devspec->width) * ((Uint64) devspec->height);
+        const Uint64 dstarea = ((Uint64) appspec->width) * ((Uint64) appspec->height);
+        if (dstarea <= srcarea) {
+            device->needs_scaling = -1;  // downscaling (or changing to new aspect ratio with same area)
+        } else {
+            device->needs_scaling = 1;  // upscaling
+        }
+    }
+
+    device->needs_conversion = (devspec->format != appspec->format);
+
+    device->acquire_surface = SDL_CreateSurfaceFrom(devspec->width, devspec->height, devspec->format, NULL, 0);
+    if (!device->acquire_surface) {
+        goto failed;
+    }
+    SDL_SetSurfaceColorspace(device->acquire_surface, devspec->colorspace);
+
+    // if we have to scale _and_ convert, we need a middleman surface, since we can't do both changes at once.
+    if (device->needs_scaling && device->needs_conversion) {
+        const bool downscaling_first = (device->needs_scaling < 0);
+        const SDL_CameraSpec *s = downscaling_first ? appspec : devspec;
+        const SDL_PixelFormat fmt = downscaling_first ? devspec->format : appspec->format;
+        device->conversion_surface = SDL_CreateSurface(s->width, s->height, fmt);
+        if (!device->conversion_surface) {
+            goto failed;
+        }
+        SDL_SetSurfaceColorspace(device->conversion_surface, devspec->colorspace);
+    }
+
+    // output surfaces are in the app-requested format. If no conversion is necessary, we'll just use the pointers
+    // the backend fills into acquired_surface, and you can get all the way from DMA access in the camera hardware
+    // to the app without a single copy. Otherwise, these will be full surfaces that hold converted/scaled copies.
+
+    for (int i = 0; i < (SDL_arraysize(device->output_surfaces) - 1); i++) {
+        device->output_surfaces[i].next = &device->output_surfaces[i + 1];
+    }
+    device->empty_output_surfaces.next = device->output_surfaces;
+
+    for (int i = 0; i < SDL_arraysize(device->output_surfaces); i++) {
+        SDL_Surface *surf;
+        if (device->needs_scaling || device->needs_conversion) {
+            surf = SDL_CreateSurface(appspec->width, appspec->height, appspec->format);
+        } else {
+            surf = SDL_CreateSurfaceFrom(appspec->width, appspec->height, appspec->format, NULL, 0);
+        }
+        if (!surf) {
+            ClosePhysicalCamera(device);
+            ReleaseCamera(device);
+            return NULL;
+        }
+        SDL_SetSurfaceColorspace(surf, devspec->colorspace);
+
+        device->output_surfaces[i].surface = surf;
+    }
+
+    return true;
+
+failed:
+    if (device->acquire_surface) {
+        SDL_DestroySurface(device->acquire_surface);
+        device->acquire_surface = NULL;
+    }
+
+    if (device->conversion_surface) {
+        SDL_DestroySurface(device->conversion_surface);
+        device->conversion_surface = NULL;
+    }
+
+    for (int i = 0; i < SDL_arraysize(device->output_surfaces); i++) {
+        SDL_Surface *surf = device->output_surfaces[i].surface;
+        if (surf) {
+            SDL_DestroySurface(surf);
+        }
+    }
+    SDL_zeroa(device->output_surfaces);
+
+    return false;
+}
+
 static void ChooseBestCameraSpec(SDL_Camera *device, const SDL_CameraSpec *spec, SDL_CameraSpec *closest)
 {
     // Find the closest available native format/size...
@@ -1108,85 +1214,19 @@ SDL_Camera *SDL_OpenCamera(SDL_CameraID instance_id, const SDL_CameraSpec *spec)
         return NULL;
     }
 
-    if (spec) {
-        SDL_copyp(&device->spec, spec);
-        if (spec->width <= 0 || spec->height <= 0) {
-            device->spec.width = closest.width;
-            device->spec.height = closest.height;
-        }
-        if (spec->format == SDL_PIXELFORMAT_UNKNOWN) {
-            device->spec.format = closest.format;
-        }
-        if (spec->framerate_denominator == 0) {
-            device->spec.framerate_numerator = closest.framerate_numerator;
-            device->spec.framerate_denominator = closest.framerate_denominator;
-        }
-    } else {
-        SDL_copyp(&device->spec, &closest);
-    }
-
+    SDL_copyp(&device->spec, spec ? spec : &closest);
     SDL_copyp(&device->actual_spec, &closest);
 
-    if ((closest.width == device->spec.width) && (closest.height == device->spec.height)) {
-        device->needs_scaling = 0;
-    } else {
-        const Uint64 srcarea = ((Uint64) closest.width) * ((Uint64) closest.height);
-        const Uint64 dstarea = ((Uint64) device->spec.width) * ((Uint64) device->spec.height);
-        if (dstarea <= srcarea) {
-            device->needs_scaling = -1;  // downscaling (or changing to new aspect ratio with same area)
-        } else {
-            device->needs_scaling = 1;  // upscaling
-        }
-    }
-
-    device->needs_conversion = (closest.format != device->spec.format);
-
-    device->acquire_surface = SDL_CreateSurfaceFrom(closest.width, closest.height, closest.format, NULL, 0);
-    if (!device->acquire_surface) {
-        ClosePhysicalCamera(device);
-        ReleaseCamera(device);
-        return NULL;
-    }
-    SDL_SetSurfaceColorspace(device->acquire_surface, closest.colorspace);
-
-    // if we have to scale _and_ convert, we need a middleman surface, since we can't do both changes at once.
-    if (device->needs_scaling && device->needs_conversion) {
-        const bool downsampling_first = (device->needs_scaling < 0);
-        const SDL_CameraSpec *s = downsampling_first ? &device->spec : &closest;
-        const SDL_PixelFormat fmt = downsampling_first ? closest.format : device->spec.format;
-        device->conversion_surface = SDL_CreateSurface(s->width, s->height, fmt);
-        if (!device->conversion_surface) {
-            ClosePhysicalCamera(device);
-            ReleaseCamera(device);
-            return NULL;
-        }
-        SDL_SetSurfaceColorspace(device->conversion_surface, closest.colorspace);
-    }
-
-    // output surfaces are in the app-requested format. If no conversion is necessary, we'll just use the pointers
-    // the backend fills into acquired_surface, and you can get all the way from DMA access in the camera hardware
-    // to the app without a single copy. Otherwise, these will be full surfaces that hold converted/scaled copies.
-
-    for (int i = 0; i < (SDL_arraysize(device->output_surfaces) - 1); i++) {
-        device->output_surfaces[i].next = &device->output_surfaces[i + 1];
-    }
-    device->empty_output_surfaces.next = device->output_surfaces;
-
-    for (int i = 0; i < SDL_arraysize(device->output_surfaces); i++) {
-        SDL_Surface *surf;
-        if (device->needs_scaling || device->needs_conversion) {
-            surf = SDL_CreateSurface(device->spec.width, device->spec.height, device->spec.format);
-        } else {
-            surf = SDL_CreateSurfaceFrom(device->spec.width, device->spec.height, device->spec.format, NULL, 0);
-        }
-        if (!surf) {
+    // SDL_PIXELFORMAT_UNKNOWN here is taken as a signal that the backend
+    //  doesn't know its format yet (Emscripten waiting for user permission,
+    //  in this case), and the backend will call SDL_PrepareCameraSurfaces()
+    //  itself, later but before the app is allowed to acquire images.
+    if (closest.format != SDL_PIXELFORMAT_UNKNOWN) {
+        if (!SDL_PrepareCameraSurfaces(device)) {
             ClosePhysicalCamera(device);
             ReleaseCamera(device);
             return NULL;
         }
-        SDL_SetSurfaceColorspace(surf, closest.colorspace);
-
-        device->output_surfaces[i].surface = surf;
     }
 
     device->drop_frames = 1;
diff --git a/src/camera/SDL_syscamera.h b/src/camera/SDL_syscamera.h
index 7c485ddeca5ed..30a02f391ac7b 100644
--- a/src/camera/SDL_syscamera.h
+++ b/src/camera/SDL_syscamera.h
@@ -54,6 +54,10 @@ extern void SDL_CameraThreadSetup(SDL_Camera *device);
 extern bool SDL_CameraThreadIterate(SDL_Camera *device);
 extern void SDL_CameraThreadShutdown(SDL_Camera *device);
 
+// Backends can call this if they have to finish initializing later, like Emscripten. Most backends should _not_ call this directly!
+extern bool SDL_PrepareCameraSurfaces(SDL_Camera *device);
+
+
 // common utility functionality to gather up camera specs. Not required!
 typedef struct CameraFormatAddData
 {
diff --git a/src/camera/emscripten/SDL_camera_emscripten.c b/src/camera/emscripten/SDL_camera_emscripten.c
index 986c899c8b6ee..fa2a51144e4d3 100644
--- a/src/camera/emscripten/SDL_camera_emscripten.c
+++ b/src/camera/emscripten/SDL_camera_emscripten.c
@@ -98,17 +98,24 @@ static void EMSCRIPTENCAMERA_CloseDevice(SDL_Camera *device)
     }
 }
 
-static void SDLEmscriptenCameraPermissionOutcome(SDL_Camera *device, int approved, int w, int h, int fps)
+static int SDLEmscriptenCameraPermissionOutcome(SDL_Camera *device, int approved, int w, int h, int fps)
 {
-    device->spec.width = device->actual_spec.width = w;
-    device->spec.height = device->actual_spec.height = h;
-    device->spec.framerate_numerator = device->actual_spec.framerate_numerator = fps;
-    device->spec.framerate_denominator = device->actual_spec.framerate_denominator = 1;
-    if (device->acquire_surface) {
-        device->acquire_surface->w = w;
-        device->acquire_surface->h = h;
+    if (approved) {
+        device->actual_spec.format = SDL_PIXELFORMAT_RGBA32;
+        device->actual_spec.width = w;
+        device->actual_spec.height = h;
+        device->actual_spec.framerate_numerator = fps;
+        device->actual_spec.framerate_denominator = 1;
+
+        if (!SDL_PrepareCameraSurfaces(device)) {
+            // uhoh, we're in trouble. Probably ran out of memory.
+            SDL_LogError(SDL_LOG_CATEGORY_ERROR, "Camera could not prepare surfaces: %s ... revoking approval!", SDL_GetError());
+            approved = 0;  // disconnecting the SDL camera might not be safe here, just mark it as denied by user.
+        }
     }
+
     SDL_CameraPermissionOutcome(device, approved ? true : false);
+    return approved;
 }
 
 static bool EMSCRIPTENCAMERA_OpenDevice(SDL_Camera *device, const SDL_CameraSpec *spec)
@@ -167,40 +174,40 @@ static bool EMSCRIPTENCAMERA_OpenDevice(SDL_Camera *device, const SDL_CameraSpec
                 const actualfps = settings.frameRate;
                 console.log("Camera is opened! Actual spec: (" + actualw + "x" + actualh + "), fps=" + actualfps);
 
-                dynCall('viiiii', outcome, [device, 1, actualw, actualh, actualfps]);
-
-                const video = document.createElement("video");
-                video.width = actualw;
-                video.height = actualh;
-                video.style.display = 'none';    // we need to attach this to a hidden video node so we can read it as pixels.
-                video.srcObject = stream;
-
-                const canvas = document.createElement("canvas");
-                canvas.width = actualw;
-                canvas.height = actualh;
-                canvas.style.display = 'none';    // we need to attach this to a hidden video node so we can read it as pixels.
-
-                const ctx2d = canvas.getContext('2d');
-
-                const SDL3 = Module['SDL3'];
-                SDL3.camera.width = actualw;
-                SDL3.camera.height = actualh;
-                SDL3.camera.fps = actualfps;
-                SDL3.camera.fpsincrms = 1000.0 / actualfps;
-                SDL3.camera.stream = stream;
-                SDL3.camera.video = video;
-                SDL3.camera.canvas = canvas;
-                SDL3.camera.ctx2d = ctx2d;
-                SDL3.camera.next_frame_time = performance.now();
-
-                video.play();
-                video.addEventListener('loadedmetadata', () => {
-                    grabNextCameraFrame();  // start this loop going.
-                });
+                if (dynCall('iiiiii', outcome, [device, 1, actualw, actualh, actualfps])) {
+                    const video = document.createElement("video");
+                    video.width = actualw;
+                    video.height = actualh;
+                    video.style.display = 'none';    // we need to attach this to a hidden video node so we can read it as pixels.
+                    video.srcObject = stream;
+
+                    const canvas = document.createElement("canvas");
+                    canvas.width = actualw;
+                    canvas.height = actualh;
+                    canvas.style.display = 'none';    // we need to attach this to a hidden video node so we can read it as pixels.
+
+                    const ctx2d = canvas.getContext('2d');
+
+                    const SDL3 = Module['SDL3'];
+                    SDL3.camera.width = actualw;
+                    SDL3.camera.height = actualh;
+                    SDL3.camera.fps = actualfps;
+                    SDL3.camera.fpsincrms = 1000.0 / actualfps;
+                    SDL3.camera.stream = stream;
+                    SDL3.camera.video = video;
+                    SDL3.camera.canvas = canvas;
+                    SDL3.camera.ctx2d = ctx2d;
+                    SDL3.camera.next_frame_time = performance.now();
+
+                    video.play();
+                    video.addEventListener('loadedmetadata', () => {
+                        grabNextCameraFrame();  // start this loop going.
+                    });
+                }
             })
             .catch((err) => {
                 console.error("Tried to open camera but it threw an error! " + err.name + ": " +  err.message);
-                dynCall('viiiii', outcome, [device, 0, 0, 0, 0]);   // we call this a permission error, because it probably is.
+                dynCall('iiiiii', outcome, [device, 0, 0, 0, 0]);   // we call this a permission error, because it probably is.
             });
     }, device, spec->width, spec->height, spec->framerate_numerator, spec->framerate_denominator, SDLEmscriptenCameraPermissionOutcome, SDL_CameraThreadIterate);