Unable to render sprites in batches / instances using SDL3 GPU API and Odin. SSBO is not available to shader.

I am new to graphics in general, so I am trying to learn from others.

After drawing a triangle, a quad and then a sprite on top, I have been trying to get a batch of them on screen using instancing.


I learnt batching / instancing from 2 articles and a GitHub ticket:


However, I have been unsuccessful in rendering out all the sprites in the list.
Only 1 sprite shows up, as if the SSBO is not getting sent to the GPU.

bunnymark.odin

package main

import "base:runtime"

import "core:log"
import "core:mem"

import "core:fmt"

import "core:math"
import "core:math/linalg"

import sdl "vendor:sdl3"
import img "vendor:sdl3/image"

default_context: runtime.Context

BUNNIES :: 5
MAX_FPS :: f64(60)
FIXED_DELTA_TIME :: 1 / MAX_FPS
MAX_FRAME_SKIP :: 5

gpu: ^sdl.GPUDevice
window: ^sdl.Window
pipeline: ^sdl.GPUGraphicsPipeline
sampler: ^sdl.GPUSampler

win_size: [2]i32

// glslc shader.glsl.frag -o shader.spv.frag
// glslc shader.glsl.vert -o shader.spv.vert
// spirv-cross --msl shader.spv.frag --output shader_frag.metal
// spirv-cross --msl shader.spv.vert --output shader_vert.metal
frag_shader_code := #load("shader_frag.metal")
vert_shader_code := #load("shader_vert.metal")

Vec3 :: [3]f32
Vec2 :: [2]f32
Vec4 :: [4]f32

SpriteData :: struct #max_field_align(16) {
	position: Vec2,
}

init :: proc() {
	sdl.SetLogPriorities(.VERBOSE)

	ok := sdl.Init({.VIDEO}); assert(ok)

	sdl.SetHint(sdl.HINT_RENDER_GPU_DEBUG, "1")
	sdl.SetHint(sdl.HINT_RENDER_VULKAN_DEBUG, "1")

	window = sdl.CreateWindow("Bunnymark SDL3", 1280, 780, {}); assert(window != nil)
	ok = sdl.RaiseWindow(window); assert(ok)

	gpu = sdl.CreateGPUDevice({.MSL, .DXIL, .SPIRV}, true, nil); assert(gpu != nil)
	ok = sdl.ClaimWindowForGPUDevice(gpu, window); assert(ok)

	// Toggle VSync in SDL GPU API - to test maximum FPS
	ok = sdl.SetGPUSwapchainParameters(gpu, window, .SDR, .IMMEDIATE); assert(ok)

	ok = sdl.GetWindowSize(window, &win_size.x, &win_size.y); assert(ok)
}

setup_pipeline :: proc() {
	vert_shader := sdl.CreateGPUShader(
		gpu,
		{
			code_size = len(vert_shader_code),
			code = raw_data(vert_shader_code),
			entrypoint = "main0",
			format = {.MSL},
			stage = .VERTEX,
			num_uniform_buffers = 1,    // for UBO
			num_samplers = 0,
			num_storage_buffers = 1,    // for SSBO
			num_storage_textures = 0,
			props = 0
		},
	)
	frag_shader := sdl.CreateGPUShader(
		gpu,
		{
			code_size = len(frag_shader_code),
			code = raw_data(frag_shader_code),
			entrypoint = "main0",
			format = {.MSL},
			stage = .FRAGMENT,
			num_uniform_buffers = 0,
			num_samplers = 1,
			num_storage_buffers = 0,
			num_storage_textures = 0,
			props = 0
		},
	)

	pipeline = sdl.CreateGPUGraphicsPipeline(
		gpu,
		{
			fragment_shader = frag_shader,
			vertex_shader = vert_shader,
			primitive_type = .TRIANGLELIST,
			rasterizer_state = {
				fill_mode = .FILL,
	            cull_mode = .NONE,          	 // disable face culling, in case sprite is rendered
	            front_face = .COUNTER_CLOCKWISE, // not critical when culling=NONE
	            depth_bias_constant_factor = 0.0,
	            depth_bias_slope_factor = 0.0,
			},
			// --- Depth-stencil state ---
	        depth_stencil_state = {
	            enable_depth_test = false,  // disable depth testing
	            enable_depth_write = false, // don't write to depth buffer
	            compare_op = .ALWAYS,
	        },
			target_info = {
				num_color_targets = 1,
				color_target_descriptions = &(sdl.GPUColorTargetDescription {
					format = sdl.GetGPUSwapchainTextureFormat(gpu, window),
					blend_state = (sdl.GPUColorTargetBlendState){
						enable_blend = true,
						alpha_blend_op = .ADD,
						color_blend_op = .ADD,
						color_write_mask = {.R, .G, .B, .A},
						enable_color_write_mask = true,
						src_alpha_blendfactor = .SRC_ALPHA,
						dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA,
						src_color_blendfactor = .SRC_ALPHA,
						dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA,
					}
				}),
				depth_stencil_format = .INVALID // no depth buffer
			},
		},
	)

	sdl.ReleaseGPUShader(gpu, vert_shader)
	sdl.ReleaseGPUShader(gpu, frag_shader)

	sampler = sdl.CreateGPUSampler(gpu, {
		min_filter = .NEAREST,
		mag_filter = .NEAREST,
		mipmap_mode = .NEAREST,
		address_mode_u = .CLAMP_TO_EDGE,
		address_mode_v = .CLAMP_TO_EDGE,
		address_mode_w = .CLAMP_TO_EDGE,
	})
}

load_bunnies_and_texture :: proc() -> (^sdl.GPUBuffer, ^sdl.GPUTexture) {
	surface := img.Load("./assets/wabbit_alpha.png"); assert(surface != nil)
	pixels_byte_size := surface.w * surface.h * 4
	defer sdl.DestroySurface(surface)

	texture := sdl.CreateGPUTexture(gpu, {
		type = .D2,
		format = .R8G8B8A8_UNORM, // Each color 8 bits, and Unorm means value between 0 - 1 inside shader
		usage = {.SAMPLER},
		width = u32(surface.w),
		height = u32(surface.h),
		layer_count_or_depth = 1,
		num_levels = 1,
	})

	sprites_instances: [dynamic]SpriteData = make([dynamic]SpriteData, 0, BUNNIES)
	defer delete(sprites_instances)

	for i := 0; i < BUNNIES; i += 1 {
		append(&sprites_instances, SpriteData {
			position = Vec2{
				2 * f32(i32(sdl.rand(1280)) % 1280) - 1,
				2 * f32(i32(sdl.rand(720)) % 720) - 1,
			},
		})
	}

	num_sprites_instances := len(sprites_instances)
	sprites_instances_byte_size := num_sprites_instances * size_of(SpriteData)

	bunnies := sdl.CreateGPUBuffer(gpu, {
		usage = {.GRAPHICS_STORAGE_READ},
		size = u32(sprites_instances_byte_size)
	})

	transfer_buf := sdl.CreateGPUTransferBuffer(gpu, {
		usage = .UPLOAD,
		size = u32(sprites_instances_byte_size)
	})
	tex_transfer_buf := sdl.CreateGPUTransferBuffer(gpu, {
		usage = .UPLOAD,
		size = u32(pixels_byte_size)
	})

	transfer_mem := sdl.MapGPUTransferBuffer(gpu, transfer_buf, false)
	mem.copy(transfer_mem, raw_data(sprites_instances), sprites_instances_byte_size)
	sdl.UnmapGPUTransferBuffer(gpu, transfer_buf)

	tex_transfer_mem := sdl.MapGPUTransferBuffer(gpu, tex_transfer_buf, false)
	mem.copy(tex_transfer_mem, surface.pixels, int(pixels_byte_size))
	sdl.UnmapGPUTransferBuffer(gpu, tex_transfer_buf)

	copy_cmd_buf := sdl.AcquireGPUCommandBuffer(gpu)
	copy_pass := sdl.BeginGPUCopyPass(copy_cmd_buf)
	sdl.UploadToGPUBuffer(copy_pass,
		{ transfer_buffer = transfer_buf },
		{ buffer = bunnies, size = u32(sprites_instances_byte_size) },
		true
	)
	sdl.UploadToGPUTexture(copy_pass,
		{ transfer_buffer = tex_transfer_buf },
		{ texture = texture, w = u32(surface.w), h = u32(surface.h), d = 1 },
		false
	)
	sdl.EndGPUCopyPass(copy_pass)
	ok := sdl.SubmitGPUCommandBuffer(copy_cmd_buf); assert(ok)

	sdl.ReleaseGPUTransferBuffer(gpu, transfer_buf)
	sdl.ReleaseGPUTransferBuffer(gpu, tex_transfer_buf)

	return bunnies, texture
}

main :: proc() {
	context.logger = log.create_console_logger()
	default_context := context

	init()
	setup_pipeline()
	bunnies, texture := load_bunnies_and_texture()

	time_accumulator: f64 = 0
	frame_count: u16 = 0

	updates: int
	accumulator, new_time, last_time, dt, alpha: f64 = 0, 0, 0, 0, 0
	current_time := f64(sdl.GetPerformanceCounter()) / f64(sdl.GetPerformanceFrequency())

	main_loop: for {
		free_all(context.temp_allocator)

		new_time = f64(sdl.GetPerformanceCounter()) / f64(sdl.GetPerformanceFrequency())
		dt = new_time - current_time
		current_time = new_time

		// Process Events
		ev: sdl.Event
		for sdl.PollEvent(&ev) {
			#partial switch ev.type {
			case .QUIT:
				break main_loop
			case .KEY_DOWN:
				if ev.key.scancode == .ESCAPE do break main_loop
			}
		}

		// Render
		// Acquire command buffer
		cmd_buf := sdl.AcquireGPUCommandBuffer(gpu)

		// Acquire swapchain texture
		swapchain_tex: ^sdl.GPUTexture
		ok := sdl.WaitAndAcquireGPUSwapchainTexture(
			cmd_buf,
			window,
			&swapchain_tex,
			nil,
			nil,
		); assert(ok)

		if swapchain_tex != nil {
			// Begin render passes
			color_target := sdl.GPUColorTargetInfo {
				texture     = swapchain_tex,
				load_op     = .CLEAR,
				clear_color = {0.5, 1, 0.8, 1},
				store_op    = .STORE,
				cycle 		= false,
			}
			render_pass := sdl.BeginGPURenderPass(cmd_buf, &color_target, 1, nil)

			// Draw Stuff
			// - bind pipeline
			sdl.BindGPUGraphicsPipeline(render_pass, pipeline)

			// - bind SSBO
			sdl.BindGPUVertexStorageBuffers(render_pass, 0, &bunnies, 1)

			// - bind sampler and texture to draw call
			sdl.BindGPUFragmentSamplers(render_pass, 0, &(sdl.GPUTextureSamplerBinding {
				texture = texture,
				sampler = sampler,
			}), 1)

			// - draw calls
			sdl.DrawGPUPrimitives(render_pass, 6, BUNNIES, 0, 0)

			// End render pass
			sdl.EndGPURenderPass(render_pass)
		}
		
		// Submit command buffer
		ok = sdl.SubmitGPUCommandBuffer(cmd_buf); assert(ok)
		
		frame_count += 1
		time_accumulator += dt
		if (time_accumulator >= 1) {
			fmt.println(math.ceil(f64(frame_count) / time_accumulator))
			frame_count = 0
			time_accumulator = 0
		}
	}
}

shader.glsl.vert

#version 460 // Compiling this shader under Vulkan GLSL (i.e. #version 460 + set/binding qualifiers), which follows the GL_KHR_vulkan_glsl specification.

struct SpriteData {
  vec2  position;
};

layout (std140, set = 0, binding = 1) readonly buffer DataBuffer {
  SpriteData sprites[];   
};

layout (location = 0) out vec2 out_uv;

int triangle_indices[6] = int[](
  0, 1, 2,
  2, 1, 3
);
vec2 vertex_pos[4] = vec2[](
  vec2(0.0, 0.0), // tl
  vec2(0.0, 1.0), // tr
  vec2(1.0, 0.0), // bl
  vec2(1.0, 1.0)  // br
);

// In Vulkan GLSL, the built-in variable name for the vertex index is different: gl_VertexIndex
void main() {
  uint sprite_index = uint(gl_InstanceIndex);
  uint vertex_index = uint(triangle_indices[gl_VertexIndex % 6]); // gl_VertexIndex
  SpriteData sprite = sprites[sprite_index];

  vec2 sprite_coord = vertex_pos[vertex_index];
  sprite_coord.xy += sprite.position.xy;

  vec2 scaled_pos = vec2(
    sprite_coord.x * (52.0 / 1280.0) - (52.0 / 1280.0),
    sprite_coord.y * (74.0 / 720.0) - (74.0 / 720.0)
  );
  vec2 world_pos = scaled_pos + sprite.position.xy;

  gl_Position = vec4(world_pos.x, -world_pos.y, 0.0, 1.0);

  out_uv      = vertex_pos[vertex_index];
}

shader.glsl.frag

#version 460

layout(location = 0)  in vec2 in_uv;

layout(set = 2, binding = 0) uniform sampler2D tex_sampler;

layout(location = 0) out vec4 frag_color;

void main() {
  frag_color = texture(tex_sampler, in_uv);
}

I am using a Mac M1 device, so to transpile GLSL, here are the commands:

glslc shader.glsl.frag -o shader.spv.frag
glslc shader.glsl.vert -o shader.spv.vert
spirv-cross --msl shader.spv.frag --output shader_frag.metal
spirv-cross --msl shader.spv.vert --output shader_vert.metal

Bunny: wabbit_alpha.png

wabbit_alpha


Only 1 Bunny shows up!


StackOverflow ticket: https://stackoverflow.com/questions/79798372/unable-to-render-sprites-in-batches-instances-using-sdl3-gpu-api-and-odin-ssb
GitHub comment: [SDL3] GPU: frametimes appear to be higher than expected in specific scenarios ยท Issue #11537 ยท libsdl-org/SDL ยท GitHub


I always wanted to work with SDL, but never attempted it until Odin lang came along. I want to get good at it.

Use shadercross instead of spirv-cross to transpile your shaders. It knows how to configure spirv-cross to generate shaders the way SDL_GPU expects

1 Like

I will use Shadercross and update here.

โ€”โ€”โ€”โ€”โ€”โ€” UPDATE โ€”โ€”โ€”โ€”โ€”โ€”โ€”
Hello @sjr,

Thank you for your suggestion.

I used SDL_Shadercross to translate from SPIRV to MSL, and it had slightly different output, with a buffer number that was missing in translation done by spirv-cross.

However, only a single sprite showed up.

So, just to be certain, I will rewrite the shader to HLSL and then directly use SDL_Shadercross to convert it to MSL.

@sjr,

I got it to work.

Thank you so much.

I got 600,000 sprites on screen at 60 fps in debug mode, on my Mac M1.

I will figure out how to move them around like in Bunnymarks.

โ€”โ€”โ€”โ€”โ€”-

I will add the code after I introduce movement.

โ€”โ€”โ€”โ€”-

P.S. I am not trying to do benchmarks, Iโ€™m just trying learn by trying to do something that looked doable as a beginner.

Also, I will attempt this with a compute shader as well, to learn the difference.