New Alpha Blit Code

Rafal_Bursig · March 10, 2003, 7:49am

Hi All

Current Alpha Blit Code use 2 x "" per pixel (888/8888) and 4 x ""
per 2 pixels.
Uint32 alpha = xxx;
Uint32 s = *srcp++;
Uint32 d = *dstp;
Uint32 s1 = s & 0x00ff00ff;
Uint32 d1 = d & 0x00ff00ff;
d1 += (s1 - d1) * alpha >> 8;
d1 &= 0xff00ff;
s &= 0xff00;
d &= 0xff00;
d += (s - d) * alpha >> 8;
d &= 0xff00ff;
*dstp++ = d1 | d | 0xFF000000;

I propose always make 2 pixel and use only 3 x “", this save 1 "” and
speedup blit ~30%
( see attached file )

Rafal----------------------------------------------------------------------
Masz 35 mln zl. Kogo kupisz by wygrac - Zurawskiego czy Kucharskiego?

http://link.interia.pl/f16e9
-------------- next part --------------
/**********************************************************************
SDL_FillRectAlpha.c - description
-------------------
begin : Jan 10 2003
copyright : © 2002 by Rafa? Bursig
email : Rafa? Bursig <@Rafal_Bursig>
/
/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/

int SDL_FillRectAlpha(SDL_Surface *pSurface, SDL_Rect *pRect,
SDL_Color *pColor);

static int __FillRectAlpha888_32bit(SDL_Surface *pSurface, SDL_Rect *pRect,
SDL_Color *pColor)
{
register Uint32 A = pColor->unused;
register Uint32 dSIMD1, dSIMD2;
register Uint32 sSIMD1, sSIMD2 = SDL_MapRGB(pSurface->format,
pColor->r, pColor->g,
pColor->b);
Uint32 y, end;
Uint32 *start, *pixel;

sSIMD1 = sSIMD2 & 0x00FF00FF;

if (SDL_MUSTLOCK(pSurface)) {
SDL_LockSurface(pSurface);
}

if (pRect == NULL) {
end = pSurface->w * pSurface->h;
pixel = (Uint32 ) pSurface->pixels;
if (A == 128) { / 50% A */
for (y = 0; y < end; y++) {
dSIMD2 = *pixel;
*pixel++ = ((((sSIMD2 & 0x00fefefe) + (dSIMD2 & 0x00fefefe)) >> 1)
+ (sSIMD2 & dSIMD2 & 0x00010101)) | 0xFF000000;
}
} else {
sSIMD2 &= 0xFF00;
y = 0;
if(end & 0x01) {

dSIMD2 = pixel[0];

dSIMD1 = dSIMD2 & 0x00FF00FF;
dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
dSIMD1 &= 0x00FF00FF;

dSIMD2 = (dSIMD2 & 0xFF00);
dSIMD2 += (sSIMD2 - dSIMD2) * A >> 8;
dSIMD2 &= 0xFF00;

*pixel++ = dSIMD1 | dSIMD2 | 0xFF000000;
y = 1;
  }
  sSIMD2 = sSIMD2 >> 8 | sSIMD2 << 8;
  for (; y < end; y += 2) {
dSIMD1 = pixel[0] & 0x00FF00FF;
dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
dSIMD1 &= 0x00FF00FF;

dSIMD2 = ((pixel[0] & 0xFF00) >> 8)| ((pixel[1] & 0xFF00) << 8);
dSIMD2 += (sSIMD2 - dSIMD2) * A >> 8;
dSIMD2 &= 0x00FF00FF;

*pixel++ = dSIMD1 | ((dSIMD2 << 8) & 0xFF00) | 0xFF000000;

dSIMD1 = pixel[0] & 0x00FF00FF;
dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
dSIMD1 &= 0x00FF00FF;

*pixel++ = dSIMD1 | ((dSIMD2 >> 8) & 0xFF00) | 0xFF000000;
  }
}

} else {
/* correct pRect size */
if (pRect->x < 0) {
pRect->w += pRect->x;
pRect->x = 0;
} else {
if (pRect->x >= pSurface->w - pRect->w) {
pRect->w = pSurface->w - pRect->x;
}
}

if (pRect->y < 0) {
  pRect->h += pRect->y;
  pRect->y = 0;
} else {
  if (pRect->y >= pSurface->h - pRect->h) {
pRect->h = pSurface->h - pRect->y;
  }
}

start = pixel = (Uint32 *) pSurface->pixels +
(pRect->y * (pSurface->pitch >> 2)) + pRect->x;

if (A == 128) {		/* 50% A */

  for (y = 0; y < pRect->h; y++) {

for (end = 0; end < pRect->w; end++) {
  dSIMD2 = *pixel;
  *pixel++ = ((((sSIMD2 & 0x00fefefe) + (dSIMD2 & 0x00fefefe)) >> 1)
	      + (sSIMD2 & dSIMD2 & 0x00010101)) | 0xFF000000;
}

pixel = start + (pSurface->pitch >> 2);
start = pixel;
  }
} else {
  y = 0;
  sSIMD2 &= 0xFF00;
  sSIMD2 = sSIMD2 >> 8 | sSIMD2 << 8;      
  while (y != pRect->h) {

    if(pRect->w & 0x01) {
  dSIMD2 = pixel[0];
  dSIMD1 = dSIMD2 & 0x00FF00FF;
  dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
  dSIMD1 &= 0x00FF00FF;
  dSIMD2 &= 0xFF00;
  dSIMD2 += (((sSIMD2 << 8) & 0xFF00) - dSIMD2) * A >> 8;
  dSIMD2 &= 0xFF00;
  *pixel++ = dSIMD1 | dSIMD2 | 0xFF000000;
    }

    for (; (pixel - start) < pRect->w; pixel += 2) {
  
  dSIMD1 = pixel[0] & 0x00FF00FF;
  dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
  dSIMD1 &= 0x00FF00FF;

  dSIMD2 = ((pixel[0] & 0xFF00) >> 8)| ((pixel[1] & 0xFF00) << 8);
  dSIMD2 += (sSIMD2 - dSIMD2) * A >> 8;
  dSIMD2 &= 0x00FF00FF;

  pixel[0] = dSIMD1 | ((dSIMD2 << 8) & 0xFF00) | 0xFF000000;

  dSIMD1 = pixel[1] & 0x00FF00FF;
  dSIMD1 += (sSIMD1 - dSIMD1) * A >> 8;
  dSIMD1 &= 0x00FF00FF;

  pixel[1] = dSIMD1 | ((dSIMD2 >> 8) & 0xFF00) | 0xFF000000;
    }

pixel = start + (pSurface->pitch >> 2);
start = pixel;
y++;

  } /* while */
}

}

if (SDL_MUSTLOCK(pSurface)) {
SDL_UnlockSurface(pSurface);
}
return 0;
}

int SDL_FillRectAlpha(SDL_Surface *pSurface, SDL_Rect *pRect,
SDL_Color *pColor)
{
if(!pSurface || !pColor) {
return -2;
}

if (pRect && (pRect->x < - pRect->w || pRect->x >= pSurface->w ||
pRect->y < - pRect->h || pRect->y >= pSurface->h))
{
return -3;
}

if (!pColor->unused)
{/* Alpha == 0 */
return -4;
}

if (pColor->unused == 255)
{/* Alpha == 255 */
return SDL_FillRect(pSurface, pRect,
SDL_MapRGB(pSurface->format, pColor->r, pColor->g, pColor->b));
}

return __FillRectAlpha888_32bit(pSurface, pRect, pColor);
}

atrix2 · March 10, 2003, 8:49am

maybe this could be a compile time setting?

to some people, the inecreased alpha is worth more than speed after all.> ----- Original Message -----

From: bursig@poczta.fm (Rafal Bursig)
To: “sdl”
Sent: Monday, March 10, 2003 7:46 AM
Subject: [SDL] New Alpha Blit Code

Hi All

Current Alpha Blit Code use 2 x "" per pixel (888/8888) and 4 x ""
per 2 pixels.
Uint32 alpha = xxx;
Uint32 s = *srcp++;
Uint32 d = *dstp;
Uint32 s1 = s & 0x00ff00ff;
Uint32 d1 = d & 0x00ff00ff;
d1 += (s1 - d1) * alpha >> 8;
d1 &= 0xff00ff;
s &= 0xff00;
d &= 0xff00;
d += (s - d) * alpha >> 8;
d &= 0xff00ff;
*dstp++ = d1 | d | 0xFF000000;

I propose always make 2 pixel and use only 3 x “", this save 1 "” and
speedup blit ~30%
( see attached file )

Rafal

Masz 35 mln zl. Kogo kupisz by wygrac - Zurawskiego czy Kucharskiego?

http://link.interia.pl/f16e9

David_Olofson · March 10, 2003, 10:15am

I don’t get your point. Isn’t this about FillRect…? If so, it
doesn’t matter, because there’s no alpha channel anyway; only a full
rect alpha value.

Or does this change lose alpha accuracy? (I didn’t read the code that
carefully.)

//David Olofson - Programmer, Composer, Open Source Advocate

.- The Return of Audiality! --------------------------------.
| Free/Open Source Audio Engine for use in Games or Studio. |
| RT and off-line synth. Scripting. Sample accurate timing. |
`-----------------------------------> http://audiality.org -’
— http://olofson.net — http://www.reologica.se —On Monday 10 March 2003 17.54, Atrix Wolfe wrote:

maybe this could be a compile time setting?

to some people, the inecreased alpha is worth more than speed after
all.

Rafal_Bursig · March 10, 2003, 2:35pm

Dnia 2003.03.10 17:54 Atrix Wolfe napisa?(a):

maybe this could be a compile time setting?

to some people, the inecreased alpha is worth more than speed after
all.

What dou you mean ‘the inecreased alpha’ … 16bits alpha channel ?
Currenty I use 8bits alpha.

Rafal----------------------------------------------------------------------
KLIKAJ!!! Nie pytaj dlaczego… >>> http://link.interia.pl/f16e2