Here’s an improvement to SDL_FillRect(), mainly for PowerPC but other
platforms should see some improvement for 16bpp as well. CC:ed to the list
so that PPC-owners can flame me. Many thanks for Fingolfin for his heroic
testing efforts despite my silly typos
Index: SDL_surface.c===================================================================
RCS file: /cvs/SDL/src/video/SDL_surface.c,v
retrieving revision 1.8.2.25
diff -u -r1.8.2.25 SDL_surface.c
— SDL_surface.c 2000/12/04 21:53:58 1.8.2.25
+++ SDL_surface.c 2001/01/27 00:20:50
@@ -474,92 +474,123 @@
dstrect->xdst->format->BytesPerPixel;
if ( dst->format->palette || (color == 0) ) {
x = dstrect->wdst->format->BytesPerPixel;
-#ifdef SDL_memset4
if ( !color && !((long)row&3) && !(x&3) && !(dst->pitch&3) ) {
-
int n = x >> 2; for ( y=dstrect->h; y; --y ) {
-
SDL_memset4(row, 0, x);
-
SDL_memset4(row, 0, n); row += dst->pitch; }
-
} else
-#endif
-
for ( y=dstrect->h; y; --y ) {
-#ifdef powerpc /* SIGBUS when using memset() ?? */
-
if ( color ) {
-
Uint8 *rowp = row;
-
int left = x;
-
while ( left-- ) { *rowp++ = color; }
-
} else {
-
Uint8 *rowp = row;
-
int left = x;
-
while ( left-- ) { *rowp++ = 0; }
-
} else {
+#ifdef powerpc
-
/*
-
* memset() on PPC (both glibc and codewarrior) uses
-
* the dcbz (Data Cache Block Zero) instruction, which
-
* causes an alignment exception if the destination is
-
* uncachable, so only use it on software surfaces
-
*/
-
if((dst->flags & SDL_HWSURFACE) == SDL_HWSURFACE) {
-
if(dstrect->w >= 8) {
-
/*
-
* 64-bit stores are probably most
-
* efficient to uncached video memory
-
*/
-
double fill;
-
memset(&fill, color, 8);
-
for(y = dstrect->h; y; y--) {
-
Uint8 *d = row;
-
unsigned n = x;
-
unsigned nn;
-
Uint8 c = color;
-
double f = fill;
-
while((unsigned long)d
-
& (sizeof(double) - 1)) {
-
*d++ = c;
-
n--;
-
}
-
nn = n / (sizeof(double) * 4);
-
while(nn) {
-
((double *)d)[0] = f;
-
((double *)d)[1] = f;
-
((double *)d)[2] = f;
-
((double *)d)[3] = f;
-
d += 4*sizeof(double);
-
nn--;
-
}
-
n &= ~(sizeof(double) * 4 - 1);
-
nn = n / sizeof(double);
-
while(nn) {
-
*(double *)d = f;
-
d += sizeof(double);
-
nn--;
-
}
-
n &= ~(sizeof(double) - 1);
-
while(n) {
-
*d++ = c;
-
n--;
-
}
-
row += dst->pitch;
-
}
-
} else {
-
/* narrow boxes */
-
for(y = dstrect->h; y; y--) {
-
Uint8 *d = row;
-
Uint8 c = color;
-
int n = x;
-
while(n) {
-
*d++ = c;
-
n--;
-
}
-
row += dst->pitch;
-
}
-
}
-
} else
+#endif /* powerpc */
-
{
-
for(y = dstrect->h; y; y--) {
-
memset(row, color, x);
-
row += dst->pitch;
-
} }
-#else
-
memset(row, color, x);
-#endif
-
} else {row += dst->pitch; }
switch (dst->format->BytesPerPixel) { -
case 2: {
-
Uint16 *pixels;
-
case 2: for ( y=dstrect->h; y; --y ) {
-
pixels = (Uint16 *)row;
-
for ( x=dstrect->w/4; x; --x ) {
-
*pixels++ = color;
-
*pixels++ = color;
-
*pixels++ = color;
-
*pixels++ = color;
-
}
-
switch (dstrect->w%4) {
-
case 3:
-
*pixels++ = color;
-
case 2:
-
*pixels++ = color;
-
case 1:
-
*pixels++ = color;
-
Uint16 *pixels = (Uint16 *)row;
-
Uint16 c = color;
-
Uint32 cc = (Uint32)c << 16 | c;
-
int n = dstrect->w;
-
if((unsigned long)pixels & 3) {
-
*pixels++ = c;
-
n--; }
-
if(n >> 1)
-
SDL_memset4(pixels, cc, n >> 1);
-
if(n & 1)
-
pixels[n - 1] = c; row += dst->pitch; }
-
}
-
break;
-
break;
-
case 3: {
-
Uint8 *pixels;
-
case 3: if(SDL_BYTEORDER == SDL_BIG_ENDIAN)
-
color <<= 8;
-
color <<= 8; for ( y=dstrect->h; y; --y ) {
-
pixels = row;
-
Uint8 *pixels = row; for ( x=dstrect->w; x; --x ) { memcpy(pixels, &color, 3); pixels += 3; } row += dst->pitch; }
-
}
-
break;
-
break;
-
case 4: {
-
Uint32 *pixels;
-
for ( y=dstrect->h; y; --y ) {
-
pixels = (Uint32 *)row;
-
for ( x=dstrect->w/4; x; --x ) {
-
*pixels++ = color;
-
*pixels++ = color;
-
*pixels++ = color;
-
*pixels++ = color;
-
}
-
switch (dstrect->w%4) {
-
case 3:
-
*pixels++ = color;
-
case 2:
-
*pixels++ = color;
-
case 1:
-
*pixels++ = color;
-
}
-
case 4:
-
for(y = dstrect->h; y; --y) {
-
SDL_memset4(row, color, dstrect->w); row += dst->pitch; }
-
}
-
break;
-
}break; }
SDL_UnlockSurface(dst);
Index: SDL_memops.h
===================================================================
RCS file: /cvs/SDL/src/video/Attic/SDL_memops.h,v
retrieving revision 1.1.2.12
diff -u -r1.1.2.12 SDL_memops.h
— SDL_memops.h 2000/04/03 22:35:38 1.1.2.12
+++ SDL_memops.h 2001/01/27 00:20:50
@@ -35,65 +35,68 @@
#if defined(GNUC) && defined(i386)
/* Thanks to Brennan “Bas” Underwood, for the inspiration.
*/
-#define SDL_memcpy(dst, src, len)
-{ int u0, u1, u2; \
- asm volatile ( \
-
"cld\n\t" \
-
"rep ; movsl\n\t" \
-
"testb $2,%b4\n\t" \
-
"je 1f\n\t" \
-
"movsw\n" \
-
"1:\ttestb $1,%b4\n\t" \
-
"je 2f\n\t" \
-
"movsb\n" \
-
"2:" \
-
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
-
: "0" ((len)/4), "q" (len), "1" (dst),"2" (src) \
-
: "memory" ); \
-}
+#define SDL_memcpy(dst, src, len)
+do{ \
- Uint32 u0, u1, u2; \
- asm volatile ( \
-
"cld\n\t" \
-
"rep ; movsl\n\t" \
-
"testb $2,%b4\n\t" \
-
"je 1f\n\t" \
-
"movsw\n" \
-
"1:\ttestb $1,%b4\n\t" \
-
"je 2f\n\t" \
-
"movsb\n" \
-
"2:" \
-
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
-
: "0" ((unsigned)(len)/4), "q" (len), "1" (dst),"2" (src) \
-
: "memory" ); \
+}while(0)
-#define SDL_revcpy(dst, src, len)
-{ int u0, u1, u2; \
- char *dstp = (char *)(dst); \
- char *srcp = (char *)(src); \
- int n = (len); \
- if ( n >= 4 ) { \
- asm volatile ( \
-
"std\n\t" \
-
"rep ; movsl\n\t" \
-
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
-
: "0" (n/4), \
-
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
-
: "memory" ); \
- } \
- switch (n%4) { \
-
case 3: dstp[2] = srcp[2]; \
-
case 2: dstp[1] = srcp[1]; \
-
case 1: dstp[0] = srcp[0]; \
-
break; \
-
default: \
-
break; \
- }
-}
+#define SDL_revcpy(dst, src, len)
+do { \
- int u0, u1, u2; \
- char *dstp = (char *)(dst); \
- char *srcp = (char *)(src); \
- int n = (len); \
- if ( n >= 4 ) { \
- asm volatile ( \
-
"std\n\t" \
-
"rep ; movsl\n\t" \
-
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
-
: "0" (n >> 2), \
-
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
-
: "memory" ); \
- } \
- switch (n & 3) { \
-
case 3: dstp[2] = srcp[2]; \
-
case 2: dstp[1] = srcp[1]; \
-
case 1: dstp[0] = srcp[0]; \
-
break; \
-
default: \
-
break; \
- }
+}while(0)
-#define SDL_memmove(dst, src, len)
-{ \
- if ( dst < src ) { \
-
SDL_memcpy(dst, src, len); \
- } else { \
-
SDL_revcpy(dst, src, len); \
- }
-}
+#define SDL_memmove(dst, src, len)
+do { \
- if ( (dst) < (src) ) { \
-
SDL_memcpy((dst), (src), (len)); \
- } else { \
-
SDL_revcpy((dst), (src), (len)); \
- }
+}while(0)
-#define SDL_memset4(dst, val, len)
-{ int u0, u1, u2; \
- asm volatile ( \
-
"cld\n\t" \
-
"rep ; stosl\n\t" \
-
: "=&D" (u0), "=&a" (u1), "=&c" (u2) \
-
: "0" (dst), "1" (val), "2" (len/4) \
-
: "memory" ); \
-}
+#define SDL_memset4(dst, val, len)
+do { \
- Uint32 u0, u1, u2; \
- asm volatile ( \
-
"cld\n\t" \
-
"rep ; stosl\n\t" \
-
: "=&D" (u0), "=&a" (u1), "=&c" (u2) \
-
: "0" (dst), "1" (val), "2" (len) \
-
: "memory" ); \
+}while(0)
#endif /* GNU C and x86 */
@@ -105,18 +108,20 @@
#define SDL_revcpy(dst, src, len) memmove(dst, src, len)
#endif
#ifndef SDL_memset4
-#define SDL_memset4(dst, val, len)
-{ int count = (len)/4; \
- int n = (count+3)/4; \
- Uint32 *p = (Uint32 *)(dst); \
-
switch (count % 4) { \
-
case 0: do { *p++ = val; \
-
case 3: *p++ = val; \
-
case 2: *p++ = val; \
-
case 1: *p++ = val; \
-
} while ( --n > 0 ); \
- }
-}
+#define SDL_memset4(dst, val, len)
+do{ \
- unsigned _count = (len); \
- unsigned _n = (_count + 3) / 4; \
- Uint32 *_p = (Uint32 *)(dst); \
- Uint32 _val = (val); \
-
switch (_count % 4) { \
-
case 0: do { *_p++ = _val; \
-
case 3: *_p++ = _val; \
-
case 2: *_p++ = _val; \
-
case 1: *_p++ = _val; \
-
} while ( --_n ); \
- }
+}while(0)
#endif
#endif /* _SDL_memops_h */