I forgot to backslashify a macro in code that I couldn’t test in my patch.
Thanks to hofi for spotting that, and verifying that the RLE change indeed
works. Sam, please apply
Cheers,
Mattias.
Index: src/video/SDL_RLEaccel.c===================================================================
RCS file: /cvs/SDL/src/video/SDL_RLEaccel.c,v
retrieving revision 1.3.2.22
diff -u -r1.3.2.22 SDL_RLEaccel.c
— src/video/SDL_RLEaccel.c 2001/02/17 17:52:09 1.3.2.22
+++ src/video/SDL_RLEaccel.c 2001/04/06 16:25:54
@@ -109,12 +109,21 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
+#define PIXEL_COPY(to, from, len, bpp)
+do { \
- if(bpp == 4) { \
- SDL_memcpy4(to, from, (unsigned)(len)); \
- } else { \
- SDL_memcpy(to, from, (unsigned)(len) * (bpp)); \
- }
+} while(0)
/*
- Various colorkey blit methods, for opaque and per-surface alpha
*/
#define OPAQUE_BLIT(to, from, length, bpp, alpha) \
- SDL_memcpy(to, from, (unsigned)(length * bpp))
- PIXEL_COPY(to, from, length, bpp)
/*
- For 32bpp pixels on the form 0x00rrggbb:
@@ -657,9 +666,9 @@
if(crun > right - cofs)
crun = right - cofs;
if(crun > 0) \
-
SDL_memcpy(dstbuf + cofs * sizeof(Ptype), \
-
PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \ srcbuf + (cofs - ofs) * sizeof(Ptype), \
-
(unsigned)crun * sizeof(Ptype)); \
-
(unsigned)crun, sizeof(Ptype)); \ srcbuf += run * sizeof(Ptype); \ ofs += run; \ } else if(!ofs) \
@@ -816,8 +825,8 @@
run = ((Ctype *)srcbuf)[1];
srcbuf += 2 * sizeof(Ctype);
if(run) { \
-
SDL_memcpy(dstbuf + ofs * sizeof(Ptype), srcbuf, \
-
run * sizeof(Ptype)); \
-
PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
-
run, sizeof(Ptype)); \ srcbuf += run * sizeof(Ptype); \ ofs += run; \ } else if(!ofs) \
Index: src/video/SDL_blit_A.c
RCS file: /cvs/SDL/src/video/SDL_blit_A.c,v
retrieving revision 1.3.2.9
diff -u -r1.3.2.9 SDL_blit_A.c
— src/video/SDL_blit_A.c 2001/03/04 17:36:19 1.3.2.9
+++ src/video/SDL_blit_A.c 2001/04/06 16:25:54
@@ -195,8 +195,8 @@
}
}
-/* fast RGB888->(A)RGB888 blending with surface alpha */
-static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo info)
+/ fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
+static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
{
int width = info->d_width;
int height = info->d_height;
@@ -204,32 +204,58 @@
int srcskip = info->s_skip >> 2;
Uint32 *dstp = (Uint32 *)info->d_pixels;
int dstskip = info->d_skip >> 2;
-
SDL_PixelFormat *srcfmt = info->src;
-
unsigned alpha = srcfmt->alpha;
while(height–) {
DUFFS_LOOP4({ -
Uint32 s;
-
Uint32 d;
-
Uint32 s1;
-
Uint32 d1;
-
s = *srcp;
-
d = *dstp;
-
s1 = s & 0xff00ff;
-
d1 = d & 0xff00ff;
-
d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
-
s &= 0xff00;
-
d &= 0xff00;
-
d = (d + ((s - d) * alpha >> 8)) & 0xff00;
-
*dstp = d1 | d | 0xff000000;
-
++srcp;
-
++dstp;
-
Uint32 s = *srcp++;
-
Uint32 d = *dstp;
-
*dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
-
}+ (s & d & 0x00010101); }, width); srcp += srcskip; dstp += dstskip;
}
+/* fast RGB888->(A)RGB888 blending with surface alpha */
+static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
+{
- unsigned alpha = info->src->alpha;
- if(alpha == 128) {
-
BlitRGBtoRGBSurfaceAlpha128(info);
- } else {
-
int width = info->d_width;
-
int height = info->d_height;
-
Uint32 *srcp = (Uint32 *)info->s_pixels;
-
int srcskip = info->s_skip >> 2;
-
Uint32 *dstp = (Uint32 *)info->d_pixels;
-
int dstskip = info->d_skip >> 2;
-
while(height--) {
-
DUFFS_LOOP4({
-
Uint32 s;
-
Uint32 d;
-
Uint32 s1;
-
Uint32 d1;
-
s = *srcp;
-
d = *dstp;
-
s1 = s & 0xff00ff;
-
d1 = d & 0xff00ff;
-
d1 = (d1 + ((s1 - d1) * alpha >> 8))
-
& 0xff00ff;
-
s &= 0xff00;
-
d &= 0xff00;
-
d = (d + ((s - d) * alpha >> 8)) & 0xff00;
-
*dstp = d1 | d | 0xff000000;
-
++srcp;
-
++dstp;
-
}, width);
-
srcp += srcskip;
-
dstp += dstskip;
-
}
- }
+}
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
{
@@ -277,8 +303,18 @@
}
}
-/* fast RGB565->RGB565 blending with surface alpha */
-static void Blit565to565SurfaceAlpha(SDL_BlitInfo info)
+/ 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel /
+
+/ blend a single 16 bit pixel at 50% */
+#define BLEND16_50(d, s, mask) \
- ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
+/* blend two 16 bit pixels at 50% */
+#define BLEND2x16_50(d, s, mask) \
- (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
-
- (s & d & (~(mask | mask << 16))))
+static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
{
int width = info->d_width;
int height = info->d_height;
@@ -286,56 +322,163 @@
int srcskip = info->s_skip >> 1;
Uint16 *dstp = (Uint16 *)info->d_pixels;
int dstskip = info->d_skip >> 1;
-
unsigned alpha = info->src->alpha >> 3; /* downscale alpha to 5 bits */
while(height–) {
-
DUFFS_LOOP4({
-
Uint32 s = *srcp++;
-
Uint32 d = *dstp;
-
/*
-
* shift out the middle component (green) to the high 16
-
* bits, and process all three RGB components at the same
-
* time.
-
*/
-
s = (s | s << 16) & 0x07e0f81f;
-
d = (d | d << 16) & 0x07e0f81f;
-
d += (s - d) * alpha >> 5;
-
d &= 0x07e0f81f;
-
*dstp++ = d | d >> 16;
-
}, width);
-
srcp += srcskip;
-
dstp += dstskip;
-
if(((unsigned long)srcp ^ (unsigned long)dstp) & 2) {
-
/*
-
* Source and destination not aligned, pipeline it.
-
* This is mostly a win for big blits but no loss for
-
* small ones
-
*/
-
Uint32 prev_sw;
-
int w = width;
-
/* handle odd destination */
-
if((unsigned long)dstp & 2) {
-
Uint16 d = *dstp, s = *srcp;
-
*dstp = BLEND16_50(d, s, mask);
-
dstp++;
-
srcp++;
-
w--;
-
}
-
srcp++; /* srcp is now 32-bit aligned */
-
/* bootstrap pipeline with first halfword */
-
prev_sw = ((Uint32 *)srcp)[-1];
-
while(w > 1) {
-
Uint32 sw, dw, s;
-
sw = *(Uint32 *)srcp;
-
dw = *(Uint32 *)dstp;
-
if(SDL_BYTEORDER == SDL_BIG_ENDIAN)
-
s = (prev_sw << 16) + (sw >> 16);
-
else
-
s = (prev_sw >> 16) + (sw << 16);
-
prev_sw = sw;
-
*(Uint32 *)dstp = BLEND2x16_50(dw, s, mask);
-
dstp += 2;
-
srcp += 2;
-
w -= 2;
-
}
-
/* final pixel if any */
-
if(w) {
-
Uint16 d = *dstp, s;
-
if(SDL_BYTEORDER == SDL_BIG_ENDIAN)
-
s = prev_sw;
-
else
-
s = prev_sw >> 16;
-
*dstp = BLEND16_50(d, s, mask);
-
srcp++;
-
dstp++;
-
}
-
srcp += srcskip - 1;
-
dstp += dstskip;
-
} else {
-
/* source and destination are aligned */
-
int w = width;
-
/* first odd pixel? */
-
if((unsigned long)srcp & 2) {
-
Uint16 d = *dstp, s = *srcp;
-
*dstp = BLEND16_50(d, s, mask);
-
srcp++;
-
dstp++;
-
w--;
-
}
-
/* srcp and dstp are now 32-bit aligned */
-
while(w > 1) {
-
Uint32 sw = *(Uint32 *)srcp;
-
Uint32 dw = *(Uint32 *)dstp;
-
*(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask);
-
srcp += 2;
-
dstp += 2;
-
w -= 2;
-
}
-
/* last odd pixel? */
-
if(w) {
-
Uint16 d = *dstp, s = *srcp;
-
*dstp = BLEND16_50(d, s, mask);
-
srcp++;
-
dstp++;
-
}
-
srcp += srcskip;
-
dstp += dstskip;
-
}
- }
+}
+/* fast RGB565->RGB565 blending with surface alpha */
+static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
+{
- unsigned alpha = info->src->alpha;
- if(alpha == 128) {
-
Blit16to16SurfaceAlpha128(info, 0xf7de);
- } else {
-
int width = info->d_width;
-
int height = info->d_height;
-
Uint16 *srcp = (Uint16 *)info->s_pixels;
-
int srcskip = info->s_skip >> 1;
-
Uint16 *dstp = (Uint16 *)info->d_pixels;
-
int dstskip = info->d_skip >> 1;
-
alpha >>= 3; /* downscale alpha to 5 bits */
-
while(height--) {
-
DUFFS_LOOP4({
-
Uint32 s = *srcp++;
-
Uint32 d = *dstp;
-
/*
-
* shift out the middle component (green) to
-
* the high 16 bits, and process all three RGB
-
* components at the same time.
-
*/
-
s = (s | s << 16) & 0x07e0f81f;
-
d = (d | d << 16) & 0x07e0f81f;
-
d += (s - d) * alpha >> 5;
-
d &= 0x07e0f81f;
-
*dstp++ = d | d >> 16;
-
}, width);
-
srcp += srcskip;
-
dstp += dstskip;
-
}}
}
/* fast RGB555->RGB555 blending with surface alpha */
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
{
- int width = info->d_width;
- int height = info->d_height;
- Uint16 *srcp = (Uint16 *)info->s_pixels;
- int srcskip = info->s_skip >> 1;
- Uint16 *dstp = (Uint16 *)info->d_pixels;
- int dstskip = info->d_skip >> 1;
- unsigned alpha = info->src->alpha >> 3; /* downscale alpha to 5 bits */
- unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
- if(alpha == 128) {
-
Blit16to16SurfaceAlpha128(info, 0xfbde);
- } else {
-
int width = info->d_width;
-
int height = info->d_height;
-
Uint16 *srcp = (Uint16 *)info->s_pixels;
-
int srcskip = info->s_skip >> 1;
-
Uint16 *dstp = (Uint16 *)info->d_pixels;
-
int dstskip = info->d_skip >> 1;
-
alpha >>= 3; /* downscale alpha to 5 bits */
- while(height–) {
-
DUFFS_LOOP4({
-
Uint32 s = *srcp++;
-
Uint32 d = *dstp;
-
/*
-
* shift out the middle component (green) to the high 16
-
* bits, and process all three RGB components at the same
-
* time.
-
*/
-
s = (s | s << 16) & 0x03e07c1f;
-
d = (d | d << 16) & 0x03e07c1f;
-
d += (s - d) * alpha >> 5;
-
d &= 0x03e07c1f;
-
*dstp++ = d | d >> 16;
-
}, width);
-
srcp += srcskip;
-
dstp += dstskip;
-
while(height--) {
-
DUFFS_LOOP4({
-
Uint32 s = *srcp++;
-
Uint32 d = *dstp;
-
/*
-
* shift out the middle component (green) to
-
* the high 16 bits, and process all three RGB
-
* components at the same time.
-
*/
-
s = (s | s << 16) & 0x03e07c1f;
-
d = (d | d << 16) & 0x03e07c1f;
-
d += (s - d) * alpha >> 5;
-
d &= 0x03e07c1f;
-
*dstp++ = d | d >> 16;
-
}, width);
-
srcp += srcskip;
-
dstp += dstskip;
-
}}
}
Index: src/video/SDL_memops.h
RCS file: /cvs/SDL/src/video/Attic/SDL_memops.h,v
retrieving revision 1.1.2.16
diff -u -r1.1.2.16 SDL_memops.h
— src/video/SDL_memops.h 2001/02/22 01:14:05 1.1.2.16
+++ src/video/SDL_memops.h 2001/04/06 16:25:54
@@ -53,6 +53,17 @@
: “memory” );
} while(0)
+#define SDL_memcpy4(dst, src, len)
+do { \
- int ecx, edi, esi; \
- asm volatile ( \
-
"cld\n\t" \
-
"rep ; movsl" \
-
: "=&c" (ecx), "=&D" (edi), "=&S" (esi) \
-
: "0" ((unsigned)(len)), "1" (dst), "2" (src) \
-
: "memory" ); \
+} while(0)
+
#define SDL_revcpy(dst, src, len)
do {
int u0, u1, u2;
@@ -104,9 +115,15 @@
#ifndef SDL_memcpy
#define SDL_memcpy(dst, src, len) memcpy(dst, src, len)
#endif
+
+#ifndef SDL_memcpy4
+#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len) << 2)
+#endif
+
#ifndef SDL_revcpy
#define SDL_revcpy(dst, src, len) memmove(dst, src, len)
#endif
+
#ifndef SDL_memset4
#define SDL_memset4(dst, val, len)
do { \