Hi,
I’ve made a patch with my SDL modification. I think it’s ready to go
into cvs (I’ve have been using it for some months, and without problems).
The speedup is noticable, especially when you mix many sound channels
together. For example, in a game I’m writing cpu usage for the sound
thread goes from 6% to 2-3%.
Stephane
PS : I also have some mmx code for the sound conversion functions, but I
was wondering if it could provide some speed-up at run time (or just at
start up). Any opinions ?
diff -Naur SDL12/src/audio/Makefile.am SDL12.new/src/audio/Makefile.am
— SDL12/src/audio/Makefile.am 2002-10-05 18:50:56.000000000 +0200
+++ SDL12.new/src/audio/Makefile.am 2002-10-30 22:04:58.000000000 +0100
@@ -22,7 +22,9 @@
SDL_mixer.c
SDL_sysaudio.h
SDL_wave.c \
- SDL_wave.h \
- SDL_mixer_MMX.c \
- SDL_mixer_MMX.h
libaudio_la_SOURCES = $(COMMON_SRCS)
libaudio_la_LIBADD = $(DRIVERS)
diff -Naur SDL12/src/audio/SDL_mixer.c SDL12.new/src/audio/SDL_mixer.c
— SDL12/src/audio/SDL_mixer.c 2002-03-06 12:23:02.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer.c 2002-10-30 22:07:11.000000000 +0100
@@ -36,6 +36,22 @@
#include “SDL_timer.h”
#include “SDL_sysaudio.h”
+/* Function to check the CPU flags */
+#define MMX_CPU 0x800000
+#ifdef USE_ASMBLIT
+#define CPU_Flags() Hermes_X86_CPU()
+#else
+#define CPU_Flags() 0L
+#endif
+
+#ifdef USE_ASMBLIT
+#define X86_ASSEMBLER
+#define HermesConverterInterface void
+#define HermesClearInterface void
+#define STACKCALL
+
+#include “HeadX86.h”
+#endif
/* This table is used to add two sound values together and pin
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
-
if (CPU_Flags() & MMX_CPU)
-
{
-
SDL_MixAudio_MMX_S8((char*)dst,(char*)src,(unsigned int)len,(int)volume);
-
}
-
else
+#endif
@@ -153,10 +178,19 @@
++dst8;
++src8;
}
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
-
if (CPU_Flags() & MMX_CPU)
-
{
-
SDL_MixAudio_MMX_S16((char*)dst,(char*)src,(unsigned int)len,(int)volume);
-
}
-
else
+#endif
@@ -180,6 +214,7 @@
dst[1] = dst_sample&0xFF;
dst += 2;
}
diff -Naur SDL12/src/audio/SDL_mixer_MMX.c SDL12.new/src/audio/SDL_mixer_MMX.c
— SDL12/src/audio/SDL_mixer_MMX.c 1970-01-01 01:00:00.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer_MMX.c 2002-10-30 22:05:02.000000000 +0100
@@ -0,0 +1,185 @@
+// MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
+// Copyright 2002 Stephane Marchesin (stephane.marchesin at wanadoo.fr)
+// This code is licensed under the LGPL (see COPYING for details)
+//
+// Assumes buffer size in bytes is a multiple of 16
+// Assumes SDL_MIX_MAXVOLUME = 128
+
+
+////////////////////////////////////////////////
+// Mixing for 16 bit signed buffers
+////////////////////////////////////////////////
+
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
+void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
+{
+" movl %0,%%edi\n" // edi = dst
+" movl %1,%%esi\n" // esi = src
+" movl %3,%%eax\n" // eax = volume
+
+" movl %2,%%ebx\n" // ebx = size
+
+" shrl $4,%%ebx\n" // process 16 bytes per iteration = 8 samples
+
+" jz .endS16\n"
+
+" pxor %%mm0,%%mm0\n"
+
+" movd %%eax,%%mm0\n"
+" movq %%mm0,%%mm1\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n" // mm0 = vol|vol|vol|vol
+
+".align 16\n"
+" .mixloopS16:\n"
+
+" movq (%%esi),%%mm1\n" // mm1 = a|b|c|d
+
+" movq %%mm1,%%mm2\n" // mm2 = a|b|c|d
+
+" movq 8(%%esi),%%mm4\n" // mm4 = e|f|g|h
+
- // pr? charger le buffer dst dans mm7
+" movq (%%edi),%%mm7\n" // mm7 = dst[0]"
-
- // multiplier par le volume
+" pmullw %%mm0,%%mm1\n" // mm1 = l(av)|l(bv)|l(cv)|l(dv)
-
+" pmulhw %%mm0,%%mm2\n" // mm2 = h(av)|h(bv)|h(cv)|h(dv)
+" movq %%mm4,%%mm5\n" // mm5 = e|f|g|h
+
+" pmullw %%mm0,%%mm4\n" // mm4 = l(ev)|l(fv)|l(gv)|l(hv)
+
+" pmulhw %%mm0,%%mm5\n" // mm5 = h(ev)|h(fv)|h(gv)|h(hv)
+" movq %%mm1,%%mm3\n" // mm3 = l(av)|l(bv)|l(cv)|l(dv)
+
+" punpckhwd %%mm2,%%mm1\n" // mm1 = av|bv
+
+" movq %%mm4,%%mm6\n" // mm6 = l(ev)|l(fv)|l(gv)|l(hv)
+" punpcklwd %%mm2,%%mm3\n" // mm3 = cv|dv
+
+" punpckhwd %%mm5,%%mm4\n" // mm4 = ef|fv
+
+" punpcklwd %%mm5,%%mm6\n" // mm6 = gv|hv
+
- // pr? charger le buffer dst dans mm5
+" movq 8(%%edi),%%mm5\n" // mm5 = dst[1]
-
- // diviser par 128
+" psrad $7,%%mm1\n" // mm1 = av/128|bv/128 , 128 = SDL_MIX_MAXVOLUME
+" addl $16,%%esi\n"
-
+" psrad $7,%%mm3\n" // mm3 = cv/128|dv/128
+
+" psrad $7,%%mm4\n" // mm4 = ev/128|fv/128
+
- // mm1 = le sample avec le volume modifi?
+" packssdw %%mm1,%%mm3\n" // mm3 = s(av|bv|cv|dv)
-
+" psrad $7,%%mm6\n" // mm6= gv/128|hv/128
+" paddsw %%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst
+
- // mm4 = le sample avec le volume modifi?
+" packssdw %%mm4,%%mm6\n" // mm6 = s(ev|fv|gv|hv)
+" movq %%mm3,(%%edi)\n"
-
+" paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
+
+" movq %%mm6,8(%%edi)\n"
+
+" addl $16,%%edi\n"
+
+" dec %%ebx\n"
+
+" jnz .mixloopS16\n"
+
+" emms\n"
+
+".endS16:\n"
- :
- : “m” (dst), “m”(src),“m”(size),
- “m”(volume)
- : “eax”,“ebx”, “esi”, “edi”,“memory”
- );
+}
-
-
-
+////////////////////////////////////////////////
+// Mixing for 8 bit signed buffers
+////////////////////////////////////////////////
+
+void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
+{
+" movl %0,%%edi\n" // edi = dst
+" movl %1,%%esi\n" // esi = src
+" movl %3,%%eax\n" // eax = volume
+
+" movd %%ebx,%%mm0\n"
+" movq %%mm0,%%mm1\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+" psllq $16,%%mm0\n"
+" por %%mm1,%%mm0\n"
+
+" movl %2,%%ebx\n" // ebx = size
+" shr $3,%%ebx\n" // process 8 bytes per iteration = 8 samples
+
+" cmp $0,%%ebx\n"
+" je .endS8\n"
+
+".align 16\n"
+" .mixloopS8:\n"
+
+" pxor %%mm2,%%mm2\n" // mm2 = 0
+" movq (%%esi),%%mm1\n" // mm1 = a|b|c|d|e|f|g|h
+
+" movq %%mm1,%%mm3\n" // mm3 = a|b|c|d|e|f|g|h
+
- // on va faire le “sign extension” en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0
+" pcmpgtb %%mm1,%%mm2\n" // mm2 = 11111111|00000000|00000000…
-
+" punpckhbw %%mm2,%%mm1\n" // mm1 = 0|a|0|b|0|c|0|d
+
+" punpcklbw %%mm2,%%mm3\n" // mm3 = 0|e|0|f|0|g|0|h
+" movq (%%edi),%%mm2\n" // mm2 = destination
+
+" pmullw %%mm0,%%mm1\n" // mm1 = va|vb|vc|vd
+" addl $8,%%esi\n"
+
+" pmullw %%mm0,%%mm3\n" // mm3 = ve|vf|vg|vh
+" psraw $7,%%mm1\n" // mm1 = va/128|vb/128|vc/128|vd/128
+
+" psraw $7,%%mm3\n" // mm3 = ve/128|vf/128|vg/128|vh/128
+
+" packsswb %%mm1,%%mm3\n" // mm1 = va/128|vb/128|vc/128|vd/128|ve/128|vf/128|vg/128|vh/128
+
+" paddsb %%mm2,%%mm3\n" // add to destination buffer
+
+" movq %%mm3,(%%edi)\n" // store back to ram
+" addl $8,%%edi\n"
+
+" dec %%ebx\n"
+
+" jnz .mixloopS8\n"
+
+".endS8:\n"
+" emms\n"
- :
- : “m” (dst), “m”(src),“m”(size),
- “m”(volume)
- : “eax”,“ebx”, “esi”, “edi”,“memory”
- );
+}
+#endif
-
diff -Naur SDL12/src/audio/SDL_mixer_MMX.h SDL12.new/src/audio/SDL_mixer_MMX.h
— SDL12/src/audio/SDL_mixer_MMX.h 1970-01-01 01:00:00.000000000 +0100
+++ SDL12.new/src/audio/SDL_mixer_MMX.h 2002-10-30 22:05:02.000000000 +0100
@@ -0,0 +1,13 @@
+// headers for MMX assembler version of SDL_MixAudio
+// Copyright 2002 Stephane Marchesin (stephane.marchesin at wanadoo.fr)
+// This code is licensed under the LGPL (see COPYING for details)
+//
+// Assumes buffer size in bytes is a multiple of 16
+// Assumes SDL_MIX_MAXVOLUME = 128
+
+
+#if defined(i386) && defined(GNUC) && defined(USE_ASMBLIT)
+void SDL_MixAudio_MMX_S16(char* ,char* ,unsigned int ,int );
+void SDL_MixAudio_MMX_S8(char* ,char* ,unsigned int ,int );
+#endif
+
How about other comilers???
VC++ and BC++ ???
-------------- next part --------------
A non-text attachment was scrubbed…
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
URL: http://lists.libsdl.org/pipermail/sdl-libsdl.org/attachments/20021102/b8ecf3fb/attachment.pgp