Patch for SDL sound mixing

Hi,

I’ve made a patch with my SDL modification. I think it’s ready to go
into cvs (I’ve have been using it for some months, and without problems).
The speedup is noticable, especially when you mix many sound channels
together. For example, in a game I’m writing cpu usage for the sound
thread goes from 6% to 2-3%.

Stephane

PS : I also have some mmx code for the sound conversion functions, but I
was wondering if it could provide some speed-up at run time (or just at
start up). Any opinions ?
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed…
Name: SDL-mmxmixer.patch
URL: http://lists.libsdl.org/pipermail/sdl-libsdl.org/attachments/20021101/cc96d47b/attachment.asc

Stephane Marchesin wrote:

Hi,

I’ve made a patch with my SDL modification. I think it’s ready to go
into cvs (I’ve have been using it for some months, and without problems).
The speedup is noticable, especially when you mix many sound channels
together. For example, in a game I’m writing cpu usage for the sound
thread goes from 6% to 2-3%.

Stephane

PS : I also have some mmx code for the sound conversion functions, but I
was wondering if it could provide some speed-up at run time (or just at
start up). Any opinions ?


diff -Naur SDL12/src/audio/Makefile.am SDL12.new/src/audio/Makefile.am
— SDL12/src/audio/Makefile.am 2002-10-05 18:50:56.000000000 +0200
+++ SDL12.new/src/audio/Makefile.am 2002-10-30 22:04:58.000000000 +0100
@@ -22,7 +22,9 @@
SDL_mixer.c
SDL_sysaudio.h
SDL_wave.c \

  • SDL_wave.h
  • SDL_wave.h \
  • SDL_mixer_MMX.c \
  • SDL_mixer_MMX.h

libaudio_la_SOURCES = $(COMMON_SRCS)
libaudio_la_LIBADD = $(DRIVERS)
diff -Naur SDL12/src/audio/SDL_mixer.c SDL12.new/src/audio/SDL_mixer.c
— SDL12/src/audio/SDL_mixer.c 2002-03-06 12:23:02.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer.c 2002-10-30 22:07:11.000000000 +0100
@@ -36,6 +36,22 @@
#include “SDL_timer.h”
#include “SDL_sysaudio.h”

+/* Function to check the CPU flags */
+#define MMX_CPU 0x800000
+#ifdef USE_ASMBLIT
+#define CPU_Flags() Hermes_X86_CPU()
+#else
+#define CPU_Flags() 0L
+#endif
+
+#ifdef USE_ASMBLIT
+#define X86_ASSEMBLER
+#define HermesConverterInterface void
+#define HermesClearInterface void
+#define STACKCALL
+
+#include “HeadX86.h”
+#endif

/* This table is used to add two sound values together and pin

  • the value to avoid overflow. (used with permission from ARDI)
    @@ -130,6 +146,15 @@
    break;

    case AUDIO_S8: {

+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)

  • 	if (CPU_Flags() & MMX_CPU)
    
  • 	{
    
  • 		SDL_MixAudio_MMX_S8((char*)dst,(char*)src,(unsigned int)len,(int)volume);
    
  • 	}
    
  • 	else
    

+#endif

  • 	{
    	Sint8 *dst8, *src8;
    	Sint8 src_sample;
    	int dst_sample;
    

@@ -153,10 +178,19 @@
++dst8;
++src8;
}

  • 	}
    }
    break;
    
    case AUDIO_S16LSB: {
    

+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)

  • 	if (CPU_Flags() & MMX_CPU)
    
  • 	{
    
  • 		SDL_MixAudio_MMX_S16((char*)dst,(char*)src,(unsigned int)len,(int)volume);
    
  • 	}
    
  • 	else
    

+#endif

  • 	{
    	Sint16 src1, src2;
    	int dst_sample;
    	const int max_audioval = ((1<<(16-1))-1);
    

@@ -180,6 +214,7 @@
dst[1] = dst_sample&0xFF;
dst += 2;
}

  • 	}
    }
    break;
    

diff -Naur SDL12/src/audio/SDL_mixer_MMX.c SDL12.new/src/audio/SDL_mixer_MMX.c
— SDL12/src/audio/SDL_mixer_MMX.c 1970-01-01 01:00:00.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer_MMX.c 2002-10-30 22:05:02.000000000 +0100
@@ -0,0 +1,185 @@
+// MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
+// Copyright 2002 Stephane Marchesin (stephane.marchesin at wanadoo.fr)
+// This code is licensed under the LGPL (see COPYING for details)
+//
+// Assumes buffer size in bytes is a multiple of 16
+// Assumes SDL_MIX_MAXVOLUME = 128
+
+
+////////////////////////////////////////////////
+// Mixing for 16 bit signed buffers
+////////////////////////////////////////////////
+
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
+void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
+{

  • asm volatile (

+" movl %0,%%edi\n" // edi = dst
+" movl %1,%%esi\n" // esi = src
+" movl %3,%%eax\n" // eax = volume
+
+" movl %2,%%ebx\n" // ebx = size
+
+" shrl $4,%%ebx\n" // process 16 bytes per iteration = 8 samples
+
+" jz .endS16\n"
+
+" pxor %%mm0,%%mm0\n"
+
+" movd %%eax,%%mm0\n"
+" movq %%mm0,%%mm1\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n" // mm0 = vol|vol|vol|vol
+
+".align 16\n"
+" .mixloopS16:\n"
+
+" movq (%%esi),%%mm1\n" // mm1 = a|b|c|d
+
+" movq %%mm1,%%mm2\n" // mm2 = a|b|c|d
+
+" movq 8(%%esi),%%mm4\n" // mm4 = e|f|g|h
+

  • // pr? charger le buffer dst dans mm7
    +" movq (%%edi),%%mm7\n" // mm7 = dst[0]"
  • // multiplier par le volume
    +" pmullw %%mm0,%%mm1\n" // mm1 = l(av)|l(bv)|l(cv)|l(dv)

+" pmulhw %%mm0,%%mm2\n" // mm2 = h(av)|h(bv)|h(cv)|h(dv)
+" movq %%mm4,%%mm5\n" // mm5 = e|f|g|h
+
+" pmullw %%mm0,%%mm4\n" // mm4 = l(ev)|l(fv)|l(gv)|l(hv)
+
+" pmulhw %%mm0,%%mm5\n" // mm5 = h(ev)|h(fv)|h(gv)|h(hv)
+" movq %%mm1,%%mm3\n" // mm3 = l(av)|l(bv)|l(cv)|l(dv)
+
+" punpckhwd %%mm2,%%mm1\n" // mm1 = av|bv
+
+" movq %%mm4,%%mm6\n" // mm6 = l(ev)|l(fv)|l(gv)|l(hv)
+" punpcklwd %%mm2,%%mm3\n" // mm3 = cv|dv
+
+" punpckhwd %%mm5,%%mm4\n" // mm4 = ef|fv
+
+" punpcklwd %%mm5,%%mm6\n" // mm6 = gv|hv
+

  • // pr? charger le buffer dst dans mm5
    +" movq 8(%%edi),%%mm5\n" // mm5 = dst[1]
  • // diviser par 128
    +" psrad $7,%%mm1\n" // mm1 = av/128|bv/128 , 128 = SDL_MIX_MAXVOLUME
    +" addl $16,%%esi\n"

+" psrad $7,%%mm3\n" // mm3 = cv/128|dv/128
+
+" psrad $7,%%mm4\n" // mm4 = ev/128|fv/128
+

  • // mm1 = le sample avec le volume modifi?
    +" packssdw %%mm1,%%mm3\n" // mm3 = s(av|bv|cv|dv)

+" psrad $7,%%mm6\n" // mm6= gv/128|hv/128
+" paddsw %%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst
+

  • // mm4 = le sample avec le volume modifi?
    +" packssdw %%mm4,%%mm6\n" // mm6 = s(ev|fv|gv|hv)
    +" movq %%mm3,(%%edi)\n"

+" paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
+
+" movq %%mm6,8(%%edi)\n"
+
+" addl $16,%%edi\n"
+
+" dec %%ebx\n"
+
+" jnz .mixloopS16\n"
+
+" emms\n"
+
+".endS16:\n"

  • :
  • : “m” (dst), “m”(src),“m”(size),
  • “m”(volume)
  • : “eax”,“ebx”, “esi”, “edi”,“memory”
  • );
    +}

+////////////////////////////////////////////////
+// Mixing for 8 bit signed buffers
+////////////////////////////////////////////////
+
+void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
+{

  • asm volatile (

+" movl %0,%%edi\n" // edi = dst
+" movl %1,%%esi\n" // esi = src
+" movl %3,%%eax\n" // eax = volume
+
+" movd %%ebx,%%mm0\n"
+" movq %%mm0,%%mm1\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+
+" movl %2,%%ebx\n" // ebx = size
+" shr $3,%%ebx\n" // process 8 bytes per iteration = 8 samples
+
+" cmp $0,%%ebx\n"
+" je .endS8\n"
+
+".align 16\n"
+" .mixloopS8:\n"
+
+" pxor %%mm2,%%mm2\n" // mm2 = 0
+" movq (%%esi),%%mm1\n" // mm1 = a|b|c|d|e|f|g|h
+
+" movq %%mm1,%%mm3\n" // mm3 = a|b|c|d|e|f|g|h
+

  • // on va faire le “sign extension” en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0
    +" pcmpgtb %%mm1,%%mm2\n" // mm2 = 11111111|00000000|00000000…

+" punpckhbw %%mm2,%%mm1\n" // mm1 = 0|a|0|b|0|c|0|d
+
+" punpcklbw %%mm2,%%mm3\n" // mm3 = 0|e|0|f|0|g|0|h
+" movq (%%edi),%%mm2\n" // mm2 = destination
+
+" pmullw %%mm0,%%mm1\n" // mm1 = va|vb|vc|vd
+" addl $8,%%esi\n"
+
+" pmullw %%mm0,%%mm3\n" // mm3 = ve|vf|vg|vh
+" psraw $7,%%mm1\n" // mm1 = va/128|vb/128|vc/128|vd/128
+
+" psraw $7,%%mm3\n" // mm3 = ve/128|vf/128|vg/128|vh/128
+
+" packsswb %%mm1,%%mm3\n" // mm1 = va/128|vb/128|vc/128|vd/128|ve/128|vf/128|vg/128|vh/128
+
+" paddsb %%mm2,%%mm3\n" // add to destination buffer
+
+" movq %%mm3,(%%edi)\n" // store back to ram
+" addl $8,%%edi\n"
+
+" dec %%ebx\n"
+
+" jnz .mixloopS8\n"
+
+".endS8:\n"
+" emms\n"

  • :
  • : “m” (dst), “m”(src),“m”(size),
  • “m”(volume)
  • : “eax”,“ebx”, “esi”, “edi”,“memory”
  • );
    +}
    +#endif

diff -Naur SDL12/src/audio/SDL_mixer_MMX.h SDL12.new/src/audio/SDL_mixer_MMX.h
— SDL12/src/audio/SDL_mixer_MMX.h 1970-01-01 01:00:00.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer_MMX.h 2002-10-30 22:05:02.000000000 +0100
@@ -0,0 +1,13 @@
+// headers for MMX assembler version of SDL_MixAudio
+// Copyright 2002 Stephane Marchesin (stephane.marchesin at wanadoo.fr)
+// This code is licensed under the LGPL (see COPYING for details)
+//
+// Assumes buffer size in bytes is a multiple of 16
+// Assumes SDL_MIX_MAXVOLUME = 128
+
+
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
+void SDL_MixAudio_MMX_S16(char* ,char* ,unsigned int ,int );
+void SDL_MixAudio_MMX_S8(char* ,char* ,unsigned int ,int );
+#endif
+
How about other comilers???
VC++ and BC++ ???
-------------- next part --------------
A non-text attachment was scrubbed…
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
URL: http://lists.libsdl.org/pipermail/sdl-libsdl.org/attachments/20021102/b8ecf3fb/attachment.pgp

[snip]

How about other comilers???
VC++ and BC++ ??
?

I don’t have other compilers. But it will work under any gcc version
that supports inline asm. Besides, if you have those compilers you can
port the code, it’s just a matter of changing the at&t asm syntax to
windows asm syntax.

And if you can’t compile it (because you don’t have the gnu compiler, or
don’t have an x86 system), there is still the C version (the same is
true for the mmx code you’ll find in src/video/SDL_yuv_mmx.c).

Maybe if enough people are intersted I’ll install VC++ and do it in
another patch…

Stephane

Hi,

I’ve made a patch with my SDL modification. I think it’s ready to go
into cvs (I’ve have been using it for some months, and without problems).
The speedup is noticable, especially when you mix many sound channels
together. For example, in a game I’m writing cpu usage for the sound
thread goes from 6% to 2-3%.

Looks great, I’ve added your code to CVS. Thanks!

PS : I also have some mmx code for the sound conversion functions, but I
was wondering if it could provide some speed-up at run time (or just at
start up). Any opinions ?

Feel free to post it, I’m sure someone can use it.

See ya!
-Sam Lantinga, Software Engineer, Blizzard Entertainment