SDL: Added support for linear and pixel art scaling for palettized textures

From 134b47730bcc0933d1cf3ce823c1efdf03037a1a Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Sat, 4 Oct 2025 14:16:02 -0700
Subject: [PATCH] Added support for linear and pixel art scaling for palettized
 textures

---
 include/SDL3/SDL_render.h                     |    1 -
 src/render/SDL_render.c                       |   10 +-
 .../direct3d/D3D9_PixelShader_Palette.h       |  276 +-
 .../direct3d/D3D9_PixelShader_Palette.hlsl    |   19 -
 .../direct3d/D3D9_PixelShader_Palette.hlsli   |   49 +
 .../D3D9_PixelShader_Palette_Linear.h         |  209 +
 .../D3D9_PixelShader_Palette_Linear.hlsl      |    7 +
 .../D3D9_PixelShader_Palette_Nearest.h        |   95 +
 .../D3D9_PixelShader_Palette_Nearest.hlsl     |    7 +
 src/render/direct3d/SDL_render_d3d.c          |   34 +-
 src/render/direct3d/SDL_shaders_d3d.c         |   11 +-
 src/render/direct3d/SDL_shaders_d3d.h         |    3 +-
 src/render/direct3d/compile_shaders.bat       |    3 +-
 .../direct3d11/D3D11_PixelShader_Advanced.h   |  804 ++-
 .../direct3d11/D3D11_PixelShader_Common.hlsli |   63 +-
 src/render/direct3d11/SDL_render_d3d11.c      |   52 +-
 .../direct3d12/D3D12_PixelShader_Advanced.h   | 2014 ++++----
 .../direct3d12/D3D12_PixelShader_Common.hlsli |   63 +-
 src/render/direct3d12/SDL_render_d3d12.c      |   52 +-
 src/render/gpu/SDL_render_gpu.c               |   70 +-
 .../gpu/shaders/texture_advanced.frag.dxil.h  |  886 ++--
 .../gpu/shaders/texture_advanced.frag.hlsl    |   65 +-
 .../gpu/shaders/texture_advanced.frag.msl.h   |  977 ++--
 .../gpu/shaders/texture_advanced.frag.spv.h   | 2154 ++++----
 src/render/metal/SDL_render_metal.m           |   54 +-
 src/render/metal/SDL_shaders_metal.metal      |   61 +-
 src/render/metal/SDL_shaders_metal_ios.h      | 1182 +++--
 .../metal/SDL_shaders_metal_iphonesimulator.h | 1398 +++---
 src/render/metal/SDL_shaders_metal_macos.h    | 4357 +++++++++--------
 src/render/metal/SDL_shaders_metal_tvos.h     | 1182 +++--
 .../metal/SDL_shaders_metal_tvsimulator.h     | 1188 +++--
 src/render/opengl/SDL_render_gl.c             |   67 +-
 src/render/opengl/SDL_shaders_gl.c            |  169 +-
 src/render/opengl/SDL_shaders_gl.h            |    6 +-
 src/render/opengles2/SDL_render_gles2.c       |   75 +-
 src/render/opengles2/SDL_shaders_gles2.c      |  179 +-
 src/render/opengles2/SDL_shaders_gles2.h      |   16 +-
 src/render/vulkan/SDL_render_vulkan.c         |   48 +-
 .../vulkan/VULKAN_PixelShader_Advanced.h      |  741 +--
 src/render/vulkan/VULKAN_PixelShader_Colors.h |   18 +-
 .../vulkan/VULKAN_PixelShader_Common.hlsli    |   57 +-
 .../vulkan/VULKAN_PixelShader_Textures.h      |   18 +-
 test/testscale.c                              |   34 +-
 43 files changed, 11262 insertions(+), 7512 deletions(-)
 delete mode 100644 src/render/direct3d/D3D9_PixelShader_Palette.hlsl
 create mode 100644 src/render/direct3d/D3D9_PixelShader_Palette.hlsli
 create mode 100755 src/render/direct3d/D3D9_PixelShader_Palette_Linear.h
 create mode 100644 src/render/direct3d/D3D9_PixelShader_Palette_Linear.hlsl
 create mode 100755 src/render/direct3d/D3D9_PixelShader_Palette_Nearest.h
 create mode 100644 src/render/direct3d/D3D9_PixelShader_Palette_Nearest.hlsl

diff --git a/include/SDL3/SDL_render.h b/include/SDL3/SDL_render.h
index 60c3962b35edf..195e580290b72 100644
--- a/include/SDL3/SDL_render.h
+++ b/include/SDL3/SDL_render.h
@@ -1234,7 +1234,6 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetTextureBlendMode(SDL_Texture *texture, S
  * The default texture scale mode is SDL_SCALEMODE_LINEAR.
  *
  * If the scale mode is not supported, the closest supported mode is chosen.
- * Palettized textures will always use SDL_SCALEMODE_NEAREST.
  *
  * \param texture the texture to update.
  * \param scaleMode the SDL_ScaleMode to use for texture scaling.
diff --git a/src/render/SDL_render.c b/src/render/SDL_render.c
index 0753e5c719ef8..44f293c05dc50 100644
--- a/src/render/SDL_render.c
+++ b/src/render/SDL_render.c
@@ -1525,11 +1525,7 @@ SDL_Texture *SDL_CreateTextureWithProperties(SDL_Renderer *renderer, SDL_Propert
     texture->color.b = 1.0f;
     texture->color.a = 1.0f;
     texture->blendMode = SDL_ISPIXELFORMAT_ALPHA(format) ? SDL_BLENDMODE_BLEND : SDL_BLENDMODE_NONE;
-    if (SDL_ISPIXELFORMAT_INDEXED(format)) {
-        texture->scaleMode = SDL_SCALEMODE_NEAREST;
-    } else {
-        texture->scaleMode = renderer->scale_mode;
-    }
+    texture->scaleMode = renderer->scale_mode;
     texture->view.pixel_w = w;
     texture->view.pixel_h = h;
     texture->view.viewport.w = -1;
@@ -2162,12 +2158,8 @@ bool SDL_SetTextureScaleMode(SDL_Texture *texture, SDL_ScaleMode scaleMode)
 
     switch (scaleMode) {
     case SDL_SCALEMODE_NEAREST:
-        break;
     case SDL_SCALEMODE_PIXELART:
     case SDL_SCALEMODE_LINEAR:
-        if (SDL_ISPIXELFORMAT_INDEXED(texture->format)) {
-            scaleMode = SDL_SCALEMODE_NEAREST;
-        }
         break;
     default:
         return SDL_InvalidParamError("scaleMode");
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette.h b/src/render/direct3d/D3D9_PixelShader_Palette.h
index 9bc727ee503c9..201c337c0a1da 100644
--- a/src/render/direct3d/D3D9_PixelShader_Palette.h
+++ b/src/render/direct3d/D3D9_PixelShader_Palette.h
@@ -6,83 +6,263 @@
 //
 //   sampler2D image;
 //   sampler1D palette;
+//   float4 texel_size;
+//   float texture_type;
 //
 //
 // Registers:
 //
 //   Name         Reg   Size
 //   ------------ ----- ----
+//   texture_type c0       1
+//   texel_size   c1       1
 //   image        s0       1
 //   palette      s1       1
 //
 
     ps_2_0
-    def c0, 0.99609375, 0.001953125, 0, 0
-    dcl t0.xy
+    def c2, -1, 255, 0.5, 0.00390625
+    def c3, -2, 0, 0, 0
+    def c4, 1, 0, 0, 1
     dcl v0
+    dcl t0.xy
     dcl_2d s0
     dcl_2d s1
-    texld r0, t0, s0
-    mad r0.xy, r0.x, c0.x, c0.y
-    texld r0, r0, s1
+    mov r0.xz, c2
+    mad r1.x, t0.x, c1.z, r0.z
+    mad r1.y, t0.y, c1.w, r0.z
+    frc r0.yz, r1.zxyw
+    add r1.xy, -r0.yzxw, r1
+    add r1.zw, r1.wzyx, -c2.z
+    add r1.xy, r1, c2.z
+    mul r1.xy, r1, c1
+    mul r2.xy, r1.wzyx, c1
+    mov r3.x, r2.x
+    mov r3.y, r1.y
+    mov r4.y, r2.y
+    mov r4.x, r1.x
+    texld r3, r3, s0
+    texld r2, r2, s0
+    texld r1, r1, s0
+    texld r4, r4, s0
+    texld r5, t0, s0
+    mad r0.w, r3.x, c2.y, c2.z
+    mul r3.xy, r0.w, c2.w
+    mad r0.w, r2.x, c2.y, c2.z
+    mul r2.xy, r0.w, c2.w
+    mad r0.w, r1.x, c2.y, c2.z
+    mul r1.xy, r0.w, c2.w
+    mad r0.w, r4.x, c2.y, c2.z
+    mul r4.xy, r0.w, c2.w
+    mad r0.w, r5.x, c2.y, c2.z
+    mul r5.xy, r0.w, c2.w
+    texld r3, r3, s1
+    texld r2, r2, s1
+    texld r1, r1, s1
+    texld r4, r4, s1
+    texld r5, r5, s1
+    lrp r6, r0.z, r3, r2
+    lrp r2, r0.z, r1, r4
+    lrp r1, r0.y, r2, r6
+    mov r2.x, c0.x
+    add r0.y, r2.x, c3.x
+    mul r0.y, r0.y, r0.y
+    cmp r1, -r0.y, r1, c4
+    add r0.x, r0.x, c0.x
+    mul r0.x, r0.x, r0.x
+    cmp r0, -r0.x, r5, r1
     mul r0, r0, v0
     mov oC0, r0
 
-// approximately 5 instruction slots used (2 texture, 3 arithmetic)
+// approximately 45 instruction slots used (10 texture, 35 arithmetic)
 #endif
 
 const BYTE g_ps20_main[] =
 {
       0,   2, 255, 255, 254, 255,
-     42,   0,  67,  84,  65,  66,
-     28,   0,   0,   0, 123,   0,
+     67,   0,  67,  84,  65,  66,
+     28,   0,   0,   0, 223,   0,
       0,   0,   0,   2, 255, 255,
-      2,   0,   0,   0,  28,   0,
+      4,   0,   0,   0,  28,   0,
       0,   0,   0,   1,   0,   0,
-    116,   0,   0,   0,  68,   0,
+    216,   0,   0,   0, 108,   0,
       0,   0,   3,   0,   0,   0,
-      1,   0,   0,   0,  76,   0,
+      1,   0,   0,   0, 116,   0,
       0,   0,   0,   0,   0,   0,
-     92,   0,   0,   0,   3,   0,
+    132,   0,   0,   0,   3,   0,
       1,   0,   1,   0,   0,   0,
-    100,   0,   0,   0,   0,   0,
-      0,   0, 105, 109,  97, 103,
-    101,   0, 171, 171,   4,   0,
-     12,   0,   1,   0,   1,   0,
-      1,   0,   0,   0,   0,   0,
-      0,   0, 112,  97, 108, 101,
-    116, 116, 101,   0,   4,   0,
-     11,   0,   1,   0,   1,   0,
-      1,   0,   0,   0,   0,   0,
-      0,   0, 112, 115,  95,  50,
-     95,  48,   0,  77, 105,  99,
-    114, 111, 115, 111, 102, 116,
-     32,  40,  82,  41,  32,  72,
-     76,  83,  76,  32,  83, 104,
-     97, 100, 101, 114,  32,  67,
-    111, 109, 112, 105, 108, 101,
-    114,  32,  49,  48,  46,  49,
-      0, 171,  81,   0,   0,   5,
-      0,   0,  15, 160,   0,   0,
-    127,  63,   0,   0,   0,  59,
+    140,   0,   0,   0,   0,   0,
+      0,   0, 156,   0,   0,   0,
+      2,   0,   1,   0,   1,   0,
+      0,   0, 168,   0,   0,   0,
+      0,   0,   0,   0, 184,   0,
+      0,   0,   2,   0,   0,   0,
+      1,   0,   0,   0, 200,   0,
+      0,   0,   0,   0,   0,   0,
+    105, 109,  97, 103, 101,   0,
+    171, 171,   4,   0,  12,   0,
+      1,   0,   1,   0,   1,   0,
+      0,   0,   0,   0,   0,   0,
+    112,  97, 108, 101, 116, 116,
+    101,   0,   4,   0,  11,   0,
+      1,   0,   1,   0,   1,   0,
+      0,   0,   0,   0,   0,   0,
+    116, 101, 120, 101, 108,  95,
+    115, 105, 122, 101,   0, 171,
+      1,   0,   3,   0,   1,   0,
+      4,   0,   1,   0,   0,   0,
+      0,   0,   0,   0, 116, 101,
+    120, 116, 117, 114, 101,  95,
+    116, 121, 112, 101,   0, 171,
+    171, 171,   0,   0,   3,   0,
+      1,   0,   1,   0,   1,   0,
       0,   0,   0,   0,   0,   0,
-      0,   0,  31,   0,   0,   2,
-      0,   0,   0, 128,   0,   0,
-      3, 176,  31,   0,   0,   2,
-      0,   0,   0, 128,   0,   0,
-     15, 144,  31,   0,   0,   2,
-      0,   0,   0, 144,   0,   8,
-     15, 160,  31,   0,   0,   2,
-      0,   0,   0, 144,   1,   8,
-     15, 160,  66,   0,   0,   3,
-      0,   0,  15, 128,   0,   0,
-    228, 176,   0,   8, 228, 160,
-      4,   0,   0,   4,   0,   0,
-      3, 128,   0,   0,   0, 128,
-      0,   0,   0, 160,   0,   0,
-     85, 160,  66,   0,   0,   3,
-      0,   0,  15, 128,   0,   0,
+    112, 115,  95,  50,  95,  48,
+      0,  77, 105,  99, 114, 111,
+    115, 111, 102, 116,  32,  40,
+     82,  41,  32,  72,  76,  83,
+     76,  32,  83, 104,  97, 100,
+    101, 114,  32,  67, 111, 109,
+    112, 105, 108, 101, 114,  32,
+     49,  48,  46,  49,   0, 171,
+     81,   0,   0,   5,   2,   0,
+     15, 160,   0,   0, 128, 191,
+      0,   0, 127,  67,   0,   0,
+      0,  63,   0,   0, 128,  59,
+     81,   0,   0,   5,   3,   0,
+     15, 160,   0,   0,   0, 192,
+      0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,
+     81,   0,   0,   5,   4,   0,
+     15, 160,   0,   0, 128,  63,
+      0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0, 128,  63,
+     31,   0,   0,   2,   0,   0,
+      0, 128,   0,   0,  15, 144,
+     31,   0,   0,   2,   0,   0,
+      0, 128,   0,   0,   3, 176,
+     31,   0,   0,   2,   0,   0,
+      0, 144,   0,   8,  15, 160,
+     31,   0,   0,   2,   0,   0,
+      0, 144,   1,   8,  15, 160,
+      1,   0,   0,   2,   0,   0,
+      5, 128,   2,   0, 228, 160,
+      4,   0,   0,   4,   1,   0,
+      1, 128,   0,   0,   0, 176,
+      1,   0, 170, 160,   0,   0,
+    170, 128,   4,   0,   0,   4,
+      1,   0,   2, 128,   0,   0,
+     85, 176,   1,   0, 255, 160,
+      0,   0, 170, 128,  19,   0,
+      0,   2,   0,   0,   6, 128,
+      1,   0, 210, 128,   2,   0,
+      0,   3,   1,   0,   3, 128,
+      0,   0, 201, 129,   1,   0,
+    228, 128,   2,   0,   0,   3,
+      1,   0,  12, 128,   1,   0,
+     27, 128,   2,   0, 170, 161,
+      2,   0,   0,   3,   1,   0,
+      3, 128,   1,   0, 228, 128,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   1,   0,   3, 128,
+      1,   0, 228, 128,   1,   0,
+    228, 160,   5,   0,   0,   3,
+      2,   0,   3, 128,   1,   0,
+     27, 128,   1,   0, 228, 160,
+      1,   0,   0,   2,   3,   0,
+      1, 128,   2,   0,   0, 128,
+      1,   0,   0,   2,   3,   0,
+      2, 128,   1,   0,  85, 128,
+      1,   0,   0,   2,   4,   0,
+      2, 128,   2,   0,  85, 128,
+      1,   0,   0,   2,   4,   0,
+      1, 128,   1,   0,   0, 128,
+     66,   0,   0,   3,   3,   0,
+     15, 128,   3,   0, 228, 128,
+      0,   8, 228, 160,  66,   0,
+      0,   3,   2,   0,  15, 128,
+      2,   0, 228, 128,   0,   8,
+    228, 160,  66,   0,   0,   3,
+      1,   0,  15, 128,   1,   0,
+    228, 128,   0,   8, 228, 160,
+     66,   0,   0,   3,   4,   0,
+     15, 128,   4,   0, 228, 128,
+      0,   8, 228, 160,  66,   0,
+      0,   3,   5,   0,  15, 128,
+      0,   0, 228, 176,   0,   8,
+    228, 160,   4,   0,   0,   4,
+      0,   0,   8, 128,   3,   0,
+      0, 128,   2,   0,  85, 160,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   3,   0,   3, 128,
+      0,   0, 255, 128,   2,   0,
+    255, 160,   4,   0,   0,   4,
+      0,   0,   8, 128,   2,   0,
+      0, 128,   2,   0,  85, 160,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   2,   0,   3, 128,
+      0,   0, 255, 128,   2,   0,
+    255, 160,   4,   0,   0,   4,
+      0,   0,   8, 128,   1,   0,
+      0, 128,   2,   0,  85, 160,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   1,   0,   3, 128,
+      0,   0, 255, 128,   2,   0,
+    255, 160,   4,   0,   0,   4,
+      0,   0,   8, 128,   4,   0,
+      0, 128,   2,   0,  85, 160,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   4,   0,   3, 128,
+      0,   0, 255, 128,   2,   0,
+    255, 160,   4,   0,   0,   4,
+      0,   0,   8, 128,   5,   0,
+      0, 128,   2,   0,  85, 160,
+      2,   0, 170, 160,   5,   0,
+      0,   3,   5,   0,   3, 128,
+      0,   0, 255, 128,   2,   0,
+    255, 160,  66,   0,   0,   3,
+      3,   0,  15, 128,   3,   0,
     228, 128,   1,   8, 228, 160,
+     66,   0,   0,   3,   2,   0,
+     15, 128,   2,   0, 228, 128,
+      1,   8, 228, 160,  66,   0,
+      0,   3,   1,   0,  15, 128,
+      1,   0, 228, 128,   1,   8,
+    228, 160,  66,   0,   0,   3,
+      4,   0,  15, 128,   4,   0,
+    228, 128,   1,   8, 228, 160,
+     66,   0,   0,   3,   5,   0,
+     15, 128,   5,   0, 228, 128,
+      1,   8, 228, 160,  18,   0,
+      0,   4,   6,   0,  15, 128,
+      0,   0, 170, 128,   3,   0,
+    228, 128,   2,   0, 228, 128,
+     18,   0,   0,   4,   2,   0,
+     15, 128,   0,   0, 170, 128,
+      1,   0, 228, 128,   4,   0,
+    228, 128,  18,   0,   0,   4,
+      1,   0,  15, 128,   0,   0,
+     85, 128,   2,   0, 228, 128,
+      6,   0, 228, 128,   1,   0,
+      0,   2,   2,   0,   1, 128,
+      0,   0,   0, 160,   2,   0,
+      0,   3,   0,   0,   2, 128,
+      2,   0,   0, 128,   3,   0,
+      0, 160,   5,   0,   0,   3,
+      0,   0,   2, 128,   0,   0,
+     85, 128,   0,   0,  85, 128,
+     88,   0,   0,   4,   1,   0,
+     15, 128,   0,   0,  85, 129,
+      1,   0, 228, 128,   4,   0,
+    228, 160,   2,   0,   0,   3,
+      0,   0,   1, 128,   0,   0,
+      0, 128,   0,   0,   0, 160,
+      5,   0,   0,   3,   0,   0,
+      1, 128,   0,   0,   0, 128,
+      0,   0,   0, 128,  88,   0,
+      0,   4,   0,   0,  15, 128,
+      0,   0,   0, 129,   5,   0,
+    228, 128,   1,   0, 228, 128,
       5,   0,   0,   3,   0,   0,
      15, 128,   0,   0, 228, 128,
       0,   0, 228, 144,   1,   0,
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette.hlsl b/src/render/direct3d/D3D9_PixelShader_Palette.hlsl
deleted file mode 100644
index 99dd4798d06df..0000000000000
--- a/src/render/direct3d/D3D9_PixelShader_Palette.hlsl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-uniform sampler2D image;
-uniform sampler1D palette;
-
-struct PixelShaderInput
-{
-    float4 pos : SV_POSITION;
-    float2 tex : TEXCOORD0;
-    float4 color : COLOR0;
-};
-
-float4 main(PixelShaderInput input) : SV_TARGET
-{
-    float4 Output;
-    float index;
-    index = tex2D(image, input.tex).r;
-    Output = tex1D(palette, index * (255. / 256) + (0.5 / 256));
-    return Output * input.color;
-}
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette.hlsli b/src/render/direct3d/D3D9_PixelShader_Palette.hlsli
new file mode 100644
index 0000000000000..5bd5a58e66b76
--- /dev/null
+++ b/src/render/direct3d/D3D9_PixelShader_Palette.hlsli
@@ -0,0 +1,49 @@
+
+cbuffer Constants
+{
+    float4 texel_size;
+};
+
+uniform sampler2D image;
+uniform sampler1D palette;
+
+struct PixelShaderInput
+{
+    float4 pos : SV_POSITION;
+    float2 tex : TEXCOORD0;
+    float4 color : COLOR0;
+};
+
+static const float TEXTURETYPE_NONE = 0;
+static const float TEXTURETYPE_PALETTE_NEAREST = 1;
+static const float TEXTURETYPE_PALETTE_LINEAR = 2;
+
+float4 SamplePaletteNearest(float2 uv)
+{
+    float index = tex2D(image, uv).r * 255;
+    return tex1D(palette, (index + 0.5) / 256);
+}
+
+// Implementation with thanks from bgolus:
+// https://discussions.unity.com/t/how-to-make-data-shader-support-bilinear-trilinear/598639/8
+float4 SamplePaletteLinear(float2 uv)
+{
+    // scale & offset uvs to integer values at texel centers
+    float2 uv_texels = uv * texel_size.zw + 0.5;
+
+    // get uvs for the center of the 4 surrounding texels by flooring
+    float4 uv_min_max = float4((floor(uv_texels) - 0.5) * texel_size.xy, (floor(uv_texels) + 0.5) * texel_size.xy);
+
+    // blend factor
+    float2 uv_frac = frac(uv_texels);
+
+    // sample all 4 texels
+    float4 texelA = SamplePaletteNearest(uv_min_max.xy);
+    float4 texelB = SamplePaletteNearest(uv_min_max.xw);
+    float4 texelC = SamplePaletteNearest(uv_min_max.zy);
+    float4 texelD = SamplePaletteNearest(uv_min_max.zw);
+
+    // bilinear interpolation
+    return lerp(lerp(texelA, texelB, uv_frac.y), lerp(texelC, texelD, uv_frac.y), uv_frac.x);
+}
+
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette_Linear.h b/src/render/direct3d/D3D9_PixelShader_Palette_Linear.h
new file mode 100755
index 0000000000000..da73f860e9d72
--- /dev/null
+++ b/src/render/direct3d/D3D9_PixelShader_Palette_Linear.h
@@ -0,0 +1,209 @@
+#if 0
+//
+// Generated by Microsoft (R) HLSL Shader Compiler 10.1
+//
+// Parameters:
+//
+//   sampler2D image;
+//   sampler1D palette;
+//   float4 texel_size;
+//
+//
+// Registers:
+//
+//   Name         Reg   Size
+//   ------------ ----- ----
+//   texel_size   c0       1
+//   image        s0       1
+//   palette      s1       1
+//
+
+    ps_2_0
+    def c1, 0.5, -0.5, 255, 0.00390625
+    dcl t0.xy
+    dcl v0
+    dcl_2d s0
+    dcl_2d s1
+    mov r0.w, c1.x
+    mad r0.x, t0.x, c0.z, r0.w
+    mad r0.y, t0.y, c0.w, r0.w
+    frc r0.zw, r0.wzyx
+    add r0.xy, -r0.wzyx, r0
+    add r1.xy, r0, c1.y
+    add r0.xy, r0, c1.x
+    mul r0.xy, r0, c0
+    mul r1.xy, r1, c0
+    mov r2.x, r1.x
+    mov r2.y, r0.y
+    mov r3.y, r1.y
+    mov r3.x, r0.x
+    texld r2, r2, s0
+    texld r1, r1, s0
+    texld r4, r0, s0
+    texld r3, r3, s0
+    mad r0.x, r2.x, c1.z, c1.x
+    mul r0.xy, r0.x, c1.w
+    mad r1.x, r1.x, c1.z, c1.x
+    mul r1.xy, r1.x, c1.w
+    mad r1.z, r4.x, c1.z, c1.x
+    mul r2.xy, r1.z, c1.w
+    mad r1.z, r3.x, c1.z, c1.x
+    mul r3.xy, r1.z, c1.w
+    texld r4, r0, s1
+    texld r1, r1, s1
+    texld r2, r2, s1
+    texld r3, r3, s1
+    lrp r5, r0.z, r4, r1
+    lrp r1, r0.z, r2, r3
+    lrp r2, r0.w, r1, r5
+    mul r0, r2, v0
+    mov oC0, r0
+
+// approximately 34 instruction slots used (8 texture, 26 arithmetic)
+#endif
+
+const BYTE g_ps20_main[] =
+{
+      0,   2, 255, 255, 254, 255,
+     54,   0,  67,  84,  65,  66,
+     28,   0,   0,   0, 171,   0,
+      0,   0,   0,   2, 255, 255,
+      3,   0,   0,   0,  28,   0,
+      0,   0,   0,   1,   0,   0,
+    164,   0,   0,   0,  88,   0,
+      0,   0,   3,   0,   0,   0,
+      1,   0,   0,   0,  96,   0,
+      0,   0,   0,   0,   0,   0,
+    112,   0,   0,   0,   3,   0,
+      1,   0,   1,   0,   0,   0,
+    120,   0,   0,   0,   0,   0,
+      0,   0, 136,   0,   0,   0,
+      2,   0,   0,   0,   1,   0,
+      0,   0, 148,   0,   0,   0,
+      0,   0,   0,   0, 105, 109,
+     97, 103, 101,   0, 171, 171,
+      4,   0,  12,   0,   1,   0,
+      1,   0,   1,   0,   0,   0,
+      0,   0,   0,   0, 112,  97,
+    108, 101, 116, 116, 101,   0,
+      4,   0,  11,   0,   1,   0,
+      1,   0,   1,   0,   0,   0,
+      0,   0,   0,   0, 116, 101,
+    120, 101, 108,  95, 115, 105,
+    122, 101,   0, 171,   1,   0,
+      3,   0,   1,   0,   4,   0,
+      1,   0,   0,   0,   0,   0,
+      0,   0, 112, 115,  95,  50,
+     95,  48,   0,  77, 105,  99,
+    114, 111, 115, 111, 102, 116,
+     32,  40,  82,  41,  32,  72,
+     76,  83,  76,  32,  83, 104,
+     97, 100, 101, 114,  32,  67,
+    111, 109, 112, 105, 108, 101,
+    114,  32,  49,  48,  46,  49,
+      0, 171,  81,   0,   0,   5,
+      1,   0,  15, 160,   0,   0,
+      0,  63,   0,   0,   0, 191,
+      0,   0, 127,  67,   0,   0,
+    128,  59,  31,   0,   0,   2,
+      0,   0,   0, 128,   0,   0,
+      3, 176,  31,   0,   0,   2,
+      0,   0,   0, 128,   0,   0,
+     15, 144,  31,   0,   0,   2,
+      0,   0,   0, 144,   0,   8,
+     15, 160,  31,   0,   0,   2,
+      0,   0,   0, 144,   1,   8,
+     15, 160,   1,   0,   0,   2,
+      0,   0,   8, 128,   1,   0,
+      0, 160,   4,   0,   0,   4,
+      0,   0,   1, 128,   0,   0,
+      0, 176,   0,   0, 170, 160,
+      0,   0, 255, 128,   4,   0,
+      0,   4,   0,   0,   2, 128,
+      0,   0,  85, 176,   0,   0,
+    255, 160,   0,   0, 255, 128,
+     19,   0,   0,   2,   0,   0,
+     12, 128,   0,   0,  27, 128,
+      2,   0,   0,   3,   0,   0,
+      3, 128,   0,   0,  27, 129,
+      0,   0, 228, 128,   2,   0,
+      0,   3,   1,   0,   3, 128,
+      0,   0, 228, 128,   1,   0,
+     85, 160,   2,   0,   0,   3,
+      0,   0,   3, 128,   0,   0,
+    228, 128,   1,   0,   0, 160,
+      5,   0,   0,   3,   0,   0,
+      3, 128,   0,   0, 228, 128,
+      0,   0, 228, 160,   5,   0,
+      0,   3,   1,   0,   3, 128,
+      1,   0, 228, 128,   0,   0,
+    228, 160,   1,   0,   0,   2,
+      2,   0,   1, 128,   1,   0,
+      0, 128,   1,   0,   0,   2,
+      2,   0,   2, 128,   0,   0,
+     85, 128,   1,   0,   0,   2,
+      3,   0,   2, 128,   1,   0,
+     85, 128,   1,   0,   0,   2,
+      3,   0,   1, 128,   0,   0,
+      0, 128,  66,   0,   0,   3,
+      2,   0,  15, 128,   2,   0,
+    228, 128,   0,   8, 228, 160,
+     66,   0,   0,   3,   1,   0,
+     15, 128,   1,   0, 228, 128,
+      0,   8, 228, 160,  66,   0,
+      0,   3,   4,   0,  15, 128,
+      0,   0, 228, 128,   0,   8,
+    228, 160,  66,   0,   0,   3,
+      3,   0,  15, 128,   3,   0,
+    228, 128,   0,   8, 228, 160,
+      4,   0,   0,   4,   0,   0,
+      1, 128,   2,   0,   0, 128,
+      1,   0, 170, 160,   1,   0,
+      0, 160,   5,   0,   0,   3,
+      0,   0,   3, 128,   0,   0,
+      0, 128,   1,   0, 255, 160,
+      4,   0,   0,   4,   1,   0,
+      1, 128,   1,   0,   0, 128,
+      1,   0, 170, 160,   1,   0,
+      0, 160,   5,   0,   0,   3,
+      1,   0,   3, 128,   1,   0,
+      0, 128,   1,   0, 255, 160,
+      4,   0,   0,   4,   1,   0,
+      4, 128,   4,   0,   0, 128,
+      1,   0, 170, 160,   1,   0,
+      0, 160,   5,   0,   0,   3,
+      2,   0,   3, 128,   1,   0,
+    170, 128,   1,   0, 255, 160,
+      4,   0,   0,   4,   1,   0,
+      4, 128,   3,   0,   0, 128,
+      1,   0, 170, 160,   1,   0,
+      0, 160,   5,   0,   0,   3,
+      3,   0,   3, 128,   1,   0,
+    170, 128,   1,   0, 255, 160,
+     66,   0,   0,   3,   4,   0,
+     15, 128,   0,   0, 228, 128,
+      1,   8, 228, 160,  66,   0,
+      0,   3,   1,   0,  15, 128,
+      1,   0, 228, 128,   1,   8,
+    228, 160,  66,   0,   0,   3,
+      2,   0,  15, 128,   2,   0,
+    228, 128,   1,   8, 228, 160,
+     66,   0,   0,   3,   3,   0,
+     15, 128,   3,   0, 228, 128,
+      1,   8, 228, 160,  18,   0,
+      0,   4,   5,   0,  15, 128,
+      0,   0, 170, 128,   4,   0,
+    228, 128,   1,   0, 228, 128,
+     18,   0,   0,   4,   1,   0,
+     15, 128,   0,   0, 170, 128,
+      2,   0, 228, 128,   3,   0,
+    228, 128,  18,   0,   0,   4,
+      2,   0,  15, 128,   0,   0,
+    255, 128,   1,   0, 228, 128,
+      5,   0, 228, 128,   5,   0,
+      0,   3,   0,   0,  15, 128,
+      2,   0, 228, 128,   0,   0,
+    228, 144,   1,   0,   0,   2,
+      0,   8,  15, 128,   0,   0,
+    228, 128, 255, 255,   0,   0
+};
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette_Linear.hlsl b/src/render/direct3d/D3D9_PixelShader_Palette_Linear.hlsl
new file mode 100644
index 0000000000000..63aeaa002d6d2
--- /dev/null
+++ b/src/render/direct3d/D3D9_PixelShader_Palette_Linear.hlsl
@@ -0,0 +1,7 @@
+
+#include "D3D9_PixelShader_Palette.hlsli"
+
+float4 main(PixelShaderInput input) : SV_TARGET
+{
+    return SamplePaletteLinear(input.tex) * input.color;
+}
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.h b/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.h
new file mode 100755
index 0000000000000..6a36bb75e65c8
--- /dev/null
+++ b/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.h
@@ -0,0 +1,95 @@
+#if 0
+//
+// Generated by Microsoft (R) HLSL Shader Compiler 10.1
+//
+// Parameters:
+//
+//   sampler2D image;
+//   sampler1D palette;
+//
+//
+// Registers:
+//
+//   Name         Reg   Size
+//   ------------ ----- ----
+//   image        s0       1
+//   palette      s1       1
+//
+
+    ps_2_0
+    def c0, 255, 0.5, 0.00390625, 0
+    dcl t0.xy
+    dcl v0
+    dcl_2d s0
+    dcl_2d s1
+    texld r0, t0, s0
+    mad r0.x, r0.x, c0.x, c0.y
+    mul r0.xy, r0.x, c0.z
+    texld r0, r0, s1
+    mul r0, r0, v0
+    mov oC0, r0
+
+// approximately 6 instruction slots used (2 texture, 4 arithmetic)
+#endif
+
+const BYTE g_ps20_main[] =
+{
+      0,   2, 255, 255, 254, 255,
+     42,   0,  67,  84,  65,  66,
+     28,   0,   0,   0, 123,   0,
+      0,   0,   0,   2, 255, 255,
+      2,   0,   0,   0,  28,   0,
+      0,   0,   0,   1,   0,   0,
+    116,   0,   0,   0,  68,   0,
+      0,   0,   3,   0,   0,   0,
+      1,   0,   0,   0,  76,   0,
+      0,   0,   0,   0,   0,   0,
+     92,   0,   0,   0,   3,   0,
+      1,   0,   1,   0,   0,   0,
+    100,   0,   0,   0,   0,   0,
+      0,   0, 105, 109,  97, 103,
+    101,   0, 171, 171,   4,   0,
+     12,   0,   1,   0,   1,   0,
+      1,   0,   0,   0,   0,   0,
+      0,   0, 112,  97, 108, 101,
+    116, 116, 101,   0,   4,   0,
+     11,   0,   1,   0,   1,   0,
+      1,   0,   0,   0,   0,   0,
+      0,   0, 112, 115,  95,  50,
+     95,  48,   0,  77, 105,  99,
+    114, 111, 115, 111, 102, 116,
+     32,  40,  82,  41,  32,  72,
+     76,  83,  76,  32,  83, 104,
+     97, 100, 101, 114,  32,  67,
+    111, 109, 112, 105, 108, 101,
+    114,  32,  49,  48,  46,  49,
+      0, 171,  81,   0,   0,   5,
+      0,   0,  15, 160,   0,   0,
+    127,  67,   0,   0,   0,  63,
+      0,   0, 128,  59,   0,   0,
+      0,   0,  31,   0,   0,   2,
+      0,   0,   0, 128,   0,   0,
+      3, 176,  31,   0,   0,   2,
+      0,   0,   0, 128,   0,   0,
+     15, 144,  31,   0,   0,   2,
+      0,   0,   0, 144,   0,   8,
+     15, 160,  31,   0,   0,   2,
+      0,   0,   0, 144,   1,   8,
+     15, 160,  66,   0,   0,   3,
+      0,   0,  15, 128,   0,   0,
+    228, 176,   0,   8, 228, 160,
+      4,   0,   0,   4,   0,   0,
+      1, 128,   0,   0,   0, 128,
+      0,   0,   0, 160,   0,   0,
+     85, 160,   5,   0,   0,   3,
+      0,   0,   3, 128,   0,   0,
+      0, 128,   0,   0, 170, 160,
+     66,   0,   0,   3,   0,   0,
+     15, 128,   0,   0, 228, 128,
+      1,   8, 228, 160,   5,   0,
+      0,   3,   0,   0,  15, 128,
+      0,   0, 228, 128,   0,   0,
+    228, 144,   1,   0,   0,   2,
+      0,   8,  15, 128,   0,   0,
+    228, 128, 255, 255,   0,   0
+};
diff --git a/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.hlsl b/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.hlsl
new file mode 100644
index 0000000000000..6131f7bf61471
--- /dev/null
+++ b/src/render/direct3d/D3D9_PixelShader_Palette_Nearest.hlsl
@@ -0,0 +1,7 @@
+
+#include "D3D9_PixelShader_Palette.hlsli"
+
+float4 main(PixelShaderInput input) : SV_TARGET
+{
+    return SamplePaletteNearest(input.tex) * input.color;
+}
diff --git a/src/render/direct3d/SDL_render_d3d.c b/src/render/direct3d/SDL_render_d3d.c
index e72c49e34e4f4..a368a61e4d4ea 100644
--- a/src/render/direct3d/SDL_render_d3d.c
+++ b/src/render/direct3d/SDL_render_d3d.c
@@ -86,9 +86,7 @@ typedef struct
     IDirect3DSurface9 *defaultRenderTarget;
     IDirect3DSurface9 *currentRenderTarget;
     void *d3dxDLL;
-#ifdef SDL_HAVE_YUV
     LPDIRECT3DPIXELSHADER9 shaders[NUM_SHADERS];
-#endif
     LPDIRECT3DVERTEXBUFFER9 vertexBuffers[8];
     size_t vertexBufferSize[8];
     int currentVertexBuffer;
@@ -100,8 +98,9 @@ typedef struct
 typedef struct
 {
     D3D_TextureRep texture;
-    D3D9_Shader shader;
+    UINT shader_params_length;
     const float *shader_params;
+    float palette_shader_params[4];
 
 #ifdef SDL_HAVE_YUV
     // YV12 texture support
@@ -657,7 +656,12 @@ static bool D3D_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
         return false;
     }
     if (texture->format == SDL_PIXELFORMAT_INDEX8) {
-        texturedata->shader = SHADER_PALETTE;
+        texturedata->shader_params_length = 1; // The palette shader takes 1 float4 parameters
+        texturedata->shader_params = texturedata->palette_shader_params;
+        texturedata->palette_shader_params[0] = 1.0f / texture->w;
+        texturedata->palette_shader_params[1] = 1.0f / texture->h;
+        texturedata->palette_shader_params[2] = texture->w;
+        texturedata->palette_shader_params[3] = texture->h;
     }
 #ifdef SDL_HAVE_YUV
     if (texture->format == SDL_PIXELFORMAT_YV12 ||
@@ -672,7 +676,7 @@ static bool D3D_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
             return false;
         }
 
-        texturedata->shader = SHADER_YUV;
+        texturedata->shader_params_length = 4; // The YUV shader takes 4 float4 parameters
         texturedata->shader_params = SDL_GetYCbCRtoRGBConversionMatrix(texture->colorspace, texture->w, texture->h, 8);
         if (texturedata->shader_params == NULL) {
             return SDL_SetError("Unsupported YUV colorspace");
@@ -1041,7 +1045,7 @@ static void UpdateTextureAddressMode(D3D_RenderData *data, SDL_TextureAddressMod
     }
 }
 
-static bool SetupTextureState(D3D_RenderData *data, SDL_Texture *texture, D3D9_Shader *shader, const float **shader_params)
+static bool SetupTextureState(D3D_RenderData *data, SDL_Texture *texture, SDL_ScaleMode scale_mode, D3D9_Shader *shader, const float **shader_params)
 {
     D3D_TextureData *texturedata = (D3D_TextureData *)texture->internal;
 
@@ -1049,7 +1053,15 @@ static bool SetupTextureState(D3D_RenderData *data, SDL_Texture *texture, D3D9_S
         return SDL_SetError("Texture is not currently available");
     }
 
-    *shader = texturedata->shader;
+    if (texture->format == SDL_PIXELFORMAT_INDEX8) {
+        if (scale_mode == SDL_SCALEMODE_LINEAR) {
+            *shader = SHADER_PALETTE_LINEAR;
+        } else {
+            *shader = SHADER_PALETTE_NEAREST;
+        }
+    } else if (texturedata->yuv) {
+        *shader = SHADER_YUV;
+    }
     *shader_params = texturedata->shader_params;
 
     if (!BindTextureRep(data->device, &texturedata->texture, 0)) {
@@ -1099,11 +1111,10 @@ static bool SetDrawState(D3D_RenderData *data, const SDL_RenderCommand *cmd)
             IDirect3DDevice9_SetTexture(data->device, 2, NULL);
         }
 #endif
-        if (texture && !SetupTextureState(data, texture, &shader, &shader_params)) {
+        if (texture && !SetupTextureState(data, texture, cmd->data.draw.texture_scale_mode, &shader, &shader_params)) {
             return false;
         }
 
-#ifdef SDL_H

(Patch may be truncated, please check the link at the top of this post.)