From 0237f339e6bec39c56e1364787fabdb0245d478f Mon Sep 17 00:00:00 2001
From: Miro Kropacek <[EMAIL REDACTED]>
Date: Sun, 27 Oct 2024 13:18:21 +0100
Subject: [PATCH] atari:video: use optimized c2p routines
They are well proven in ScummVM and much more efficient than the
previous ones.
TT version skips every second line which leads to a different look but
also it is much faster. Perhaps in the future I'll add an env variable
to switch between double lines and skipped lines.
ST version has been fixed and heavily optimized in the process, too.
---
src/video/ataricommon/SDL_ataric2p.S | 1468 ++++++++++++++++++------
src/video/ataricommon/SDL_ataric2p_s.h | 108 +-
src/video/gem/SDL_gemvideo.c | 48 +-
src/video/xbios/SDL_xbios.c | 53 +-
src/video/xbios/SDL_xbios_st.c | 8 +-
5 files changed, 1216 insertions(+), 469 deletions(-)
diff --git a/src/video/ataricommon/SDL_ataric2p.S b/src/video/ataricommon/SDL_ataric2p.S
index 97f374115..d50f29706 100644
--- a/src/video/ataricommon/SDL_ataric2p.S
+++ b/src/video/ataricommon/SDL_ataric2p.S
@@ -26,442 +26,1172 @@
Chunky to planar conversion routine
1 byte/pixel -> 4 or 8 bit planes
- Patrice Mandin
- Xavier Joubert
Mikael Kalms
+ Miro Kropacek
*/
- .globl SYM(SDL_Atari_C2pConvert)
.globl SYM(SDL_Atari_C2pConvert8)
+ .globl SYM(SDL_Atari_C2pConvert8_tt)
+ .globl SYM(SDL_Atari_C2pConvert8_rect)
.globl SYM(SDL_Atari_C2pConvert4)
- .globl SYM(SDL_Atari_C2pConvert4_pal)
+ .globl SYM(SDL_Atari_C2pConvert4_rect)
+
+ .globl SYM(SDL_Atari_C2pPalette4)
/* ------------ Conversion C2P, 8 bits ------------ */
.text
+
+| void SDL_Atari_C2pConvert8(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen);
SYM(SDL_Atari_C2pConvert8):
#if !defined(__mcoldfire__)
- movel sp@(4),c2p_source
- movel sp@(8),c2p_dest
- movel sp@(12),c2p_width
- movel sp@(16),c2p_height
- movel sp@(20),c2p_dblligne
- movel sp@(24),c2p_srcpitch
- movel sp@(28),c2p_dstpitch
-
- moveml d2-d7/a2-a6,sp@-
-
- movel c2p_source,c2p_cursrc
- movel c2p_dest,c2p_curdst
- movel #0x0f0f0f0f,d4
- movel #0x00ff00ff,d5
- movel #0x55555555,d6
- movew c2p_height+2,c2p_row
- movew c2p_width+2,d0
- andw #-8,d0
- movew d0,c2p_rowlen
-
-SDL_Atari_C2p8_rowloop:
-
- movel c2p_cursrc,a0
- movel c2p_curdst,a1
-
- movel a0,a2
- addw c2p_rowlen,a2
-
- movel a0@+,d0
- movel a0@+,d1
- movel a0@+,d2
- movel a0@+,d3
-/*
- d0 = a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
- d1 = e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
- d2 = i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
- d3 = m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
-*/
- movel d1,d7
- lsrl #4,d7
- eorl d0,d7
- andl d4,d7
- eorl d7,d0
- lsll #4,d7
- eorl d7,d1
-
- movel d3,d7
- lsrl #4,d7
- eorl d2,d7
- andl d4,d7
- eorl d7,d2
- lsll #4,d7
- eorl d7,d3
-
- movel d2,d7
- lsrl #8,d7
- eorl d0,d7
- andl d5,d7
- eorl d7,d0
- lsll #8,d7
- eorl d7,d2
-
- movel d3,d7
- lsrl #8,d7
- eorl d1,d7
- andl d5,d7
- eorl d7,d1
- lsll #8,d7
- eorl d7,d3
-/*
- d0 = a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
- d1 = a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
- d2 = b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
- d3 = b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
-*/
- bras SDL_Atari_C2p8_start
+#ifdef __FASTCALL__
+ | a0: chunky
+ move.l a1,d0 | chunky end
+ move.l 4(sp),a1 | screen
+#else
+ move.l (4,sp),a0 | chunky
+ move.l (8,sp),d0 | chunky end
+ move.l (12,sp),a1 | screen
+#endif
+ movem.l d2-d7/a2-a6,-(sp)
+ move.l d0,a2
+ move.l #0x0f0f0f0f,d4
+ move.l #0x00ff00ff,d5
+ move.l #0x55555555,d6
+
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ bra.s c2p1x1_8_start
+
+c2p1x1_8_pix16:
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ move.l a3,(a1)+
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ move.l a4,(a1)+
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ move.l a5,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ move.l a6,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+c2p1x1_8_start:
+ move.l d2,d7
+ lsr.l #1,d7
+ eor.l d0,d7
+ and.l d6,d7
+ eor.l d7,d0
+ add.l d7,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #1,d7
+ eor.l d1,d7
+ and.l d6,d7
+ eor.l d7,d1
+ add.l d7,d7
+ eor.l d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
+ | a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.w d2,d7
+ move.w d0,d2
+ swap d2
+ move.w d2,d0
+ move.w d7,d2
+ move.w d3,d7
+ move.w d1,d3
+ swap d3
+ move.w d3,d1
+ move.w d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
+ | c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.l d2,d7
+ lsr.l #2,d7
+ eor.l d0,d7
+ and.l #0x33333333,d7
+ eor.l d7,d0
+ lsl.l #2,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #2,d7
+ eor.l d1,d7
+ and.l #0x33333333,d7
+ eor.l d7,d1
+ lsl.l #2,d7
+ eor.l d7,d3
+
+ | a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
+ | a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
+ | a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
+ | a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
-SDL_Atari_C2p8_pix16:
+ swap d0
+ swap d1
+ swap d2
+ swap d3
- movel a0@+,d0
- movel a0@+,d1
- movel a0@+,d2
- movel a0@+,d3
-/*
- d0 = a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
- d1 = e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
- d2 = i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
- d3 = m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
-*/
- movel d1,d7
- lsrl #4,d7
- movel a3,a1@+
- eorl d0,d7
- andl d4,d7
- eorl d7,d0
- lsll #4,d7
- eorl d7,d1
-
- movel d3,d7
- lsrl #4,d7
- eorl d2,d7
- andl d4,d7
- eorl d7,d2
- movel a4,a1@+
- lsll #4,d7
- eorl d7,d3
-
- movel d2,d7
- lsrl #8,d7
- eorl d0,d7
- andl d5,d7
- eorl d7,d0
- movel a5,a1@+
- lsll #8,d7
- eorl d7,d2
-
- movel d3,d7
- lsrl #8,d7
- eorl d1,d7
- andl d5,d7
- eorl d7,d1
- movel a6,a1@+
- lsll #8,d7
- eorl d7,d3
-/*
- d0 = a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
- d1 = a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
- d2 = b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
- d3 = b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
-*/
+ move.l d0,a6
+ move.l d2,a5
+ move.l d1,a4
+ move.l d3,a3
+
+ cmp.l a0,a2
+ bne c2p1x1_8_pix16
+
+ move.l a3,(a1)+
+ move.l a4,(a1)+
+ move.l a5,(a1)+
+ move.l a6,(a1)+
+
+ movem.l (sp)+,d2-d7/a2-a6
+#endif
+ rts
-SDL_Atari_C2p8_start:
-
- movel d2,d7
- lsrl #1,d7
- eorl d0,d7
- andl d6,d7
- eorl d7,d0
- addl d7,d7
- eorl d7,d2
-
- movel d3,d7
- lsrl #1,d7
- eorl d1,d7
- andl d6,d7
- eorl d7,d1
- addl d7,d7
- eorl d7,d3
-/*
- d0 = a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
- d1 = a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
- d2 = a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
- d3 = a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
-*/
- movew d2,d7
- movew d0,d2
- swap d2
- movew d2,d0
- movew d7,d2
- movew d3,d7
- movew d1,d3
+| void SDL_Atari_C2pConvert8_tt(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen, uint32 screenPitch);
+SYM(SDL_Atari_C2pConvert8_tt):
+#if !defined(__mcoldfire__)
+ movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
+
+#ifdef __FASTCALL__
+ | a0: chunky
+ move.l a1,a2 | a2: chunky end
+ move.l (11*4+4,sp),a1 | a1: screen
+ | d0.l: screen pitch (double width)
+#else
+ move.l (11*4+4,sp),a0 | a0: chunky
+ move.l (11*4+8,sp),a2 | a2: chunky end
+ move.l (11*4+12,sp),a1 | a1: screen
+ move.l (11*4+16,sp),d0 | d0.l: screen pitch (double width)
+#endif
+
+ move.l sp,old_sp
+
+ move.l d0,screen_pitch
+
+ lsr.l #1,d0
+ lea (a1,d0.l),a7 | a7: end of first dst line
+
+ move.l d0,screen_offset
+
+ move.l #0x0f0f0f0f,d4
+ move.l #0x00ff00ff,d5
+ move.l #0x55555555,d6
+
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ bra.s c2p1x1_8_tt_start
+
+c2p1x1_8_tt_pix16:
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ move.l a3,(a1)+
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ move.l a4,(a1)+
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ move.l a5,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ move.l a6,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ cmp.l a1,a7 | end of dst line?
+ bne.s c2p1x1_8_tt_start
+
+ add.l (screen_offset,pc),a1
+ add.l (screen_pitch,pc),a7
+
+c2p1x1_8_tt_start:
+ move.l d2,d7
+ lsr.l #1,d7
+ eor.l d0,d7
+ and.l d6,d7
+ eor.l d7,d0
+ add.l d7,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #1,d7
+ eor.l d1,d7
+ and.l d6,d7
+ eor.l d7,d1
+ add.l d7,d7
+ eor.l d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
+ | a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.w d2,d7
+ move.w d0,d2
+ swap d2
+ move.w d2,d0
+ move.w d7,d2
+ move.w d3,d7
+ move.w d1,d3
swap d3
- movew d3,d1
- movew d7,d3
-/*
- d0 = a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
- d1 = a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
- d2 = c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
- d3 = c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
-*/
- movel d2,d7
- lsrl #2,d7
- eorl d0,d7
- andl #0x33333333,d7
- eorl d7,d0
- lsll #2,d7
- eorl d7,d2
-
- movel d3,d7
- lsrl #2,d7
- eorl d1,d7
- andl #0x33333333,d7
- eorl d7,d1
- lsll #2,d7
- eorl d7,d3
-/*
- d0 = a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
- d1 = a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
- d2 = a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
- d3 = a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
-*/
+ move.w d3,d1
+ move.w d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
+ | c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.l d2,d7
+ lsr.l #2,d7
+ eor.l d0,d7
+ and.l #0x33333333,d7
+ eor.l d7,d0
+ lsl.l #2,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #2,d7
+ eor.l d1,d7
+ and.l #0x33333333,d7
+ eor.l d7,d1
+ lsl.l #2,d7
+ eor.l d7,d3
+
+ | a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
+ | a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
+ | a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
+ | a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
+
swap d0
swap d1
swap d2
swap d3
- movel d0,a6
- movel d2,a5
- movel d1,a4
- movel d3,a3
+ move.l d0,a6
+ move.l d2,a5
+ move.l d1,a4
+ move.l d3,a3
- cmpl a0,a2
- bgt SDL_Atari_C2p8_pix16
+ cmp.l a0,a2
+ bne c2p1x1_8_tt_pix16
- movel a3,a1@+
- movel a4,a1@+
- movel a5,a1@+
- movel a6,a1@+
+ move.l a3,(a1)+
+ move.l a4,(a1)+
+ move.l a5,(a1)+
+ move.l a6,(a1)+
- /* Double the line ? */
+ move.l old_sp,sp
+ movem.l (sp)+,d2-d7/a2-a6
+#endif
+ rts
- movel c2p_srcpitch,d0
- movel c2p_dstpitch,d1
- tstl c2p_dblligne
- beqs SDL_Atari_C2p8_nodblline
+| void SDL_Atari_C2pConvert8_rect(const byte *pChunky, const byte *pChunkyEnd, uint32 chunkyWidth, uint32 chunkyPitch, byte *pScreen, uint32 screenPitch);
+SYM(SDL_Atari_C2pConvert8_rect):
+#if !defined(__mcoldfire__)
+ movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
+
+#ifdef __FASTCALL__
+ | a0: chunky
+ move.l a1,chunky_end
+ | d0.l: chunky width
+ move.l (11*4+4,sp),a1 | a1: screen
+ exg d1,d2 | d2.l: chunky pitch
+ | d1.l: screen pitch
+#else
+ move.l (11*4+4,sp),a0 | a0: chunky
+ move.l (11*4+8,sp),chunky_end
+ move.l (11*4+12,sp),d0 | d0.l: chunky width
+ move.l (11*4+16,sp),d2 | d2.l: chunky pitch
+ move.l (11*4+20,sp),a1 | a1: screen
+ move.l (11*4+24,sp),d1 | d1.l: screen pitch
+#endif
- movel c2p_curdst,a0
- movel a0,a1
- addl d1,a1
+ move.l sp,old_sp
+
+ lea (a0,d0.l),a2 | a2: end of first src line
+ lea (a1,d0.l),a7 | a7: end of first dst line
+
+ move.l d1,screen_pitch
+
+ sub.l d0,d1
+ move.l d1,screen_offset
+
+ move.l d2,chunky_pitch
+
+ sub.l d0,d2
+ move.l d2,chunky_offset
+
+ move.l #0x0f0f0f0f,d4
+ move.l #0x00ff00ff,d5
+ move.l #0x55555555,d6
+
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ bra.s c2p1x1_8_rect_start
+
+c2p1x1_8_rect_pix16:
+ move.l (a0)+,d0
+ move.l (a0)+,d1
+ move.l (a0)+,d2
+ move.l (a0)+,d3
+
+ | a7a6a5a4a3a2a1a0 b7b6b5b4b3b2b1b0 c7c6c5c4c3c2c1c0 d7d6d5d4d3d2d1d0
+ | e7e6e5e4e3e2e1e0 f7f6f5f4f3f2f1f0 g7g6g5g4g3g2g1g0 h7h6h5h4h3h2h1h0
+ | i7i6i5i4i3i2i1i0 j7j6j5j4j3j2j1j0 k7k6k5k4k3k2k1k0 l7l6l5l4l3l2l1l0
+ | m7m6m5m4m3m2m1m0 n7n6n5n4n3n2n1n0 o7o6o5o4o3o2o1o0 p7p6p5p4p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #4,d7
+ move.l a3,(a1)+
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #4,d7
+ eor.l d7,d1
+ move.l d3,d7
+ lsr.l #4,d7
+ eor.l d2,d7
+ and.l d4,d7
+ eor.l d7,d2
+ move.l a4,(a1)+
+ lsl.l #4,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 b7b6b5b4f7f6f5f4 c7c6c5c4g7g6g5g4 d7d6d5d4h7h6h5h4
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i7i6i5i4m7m6m5m4 j7j6j5j4n7n6n5n4 k7k6k5k4o7o6o5o4 l7l6l5l4p7p6p5p4
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d2,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ move.l a5,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #8,d7
+ eor.l d1,d7
+ and.l d5,d7
+ eor.l d7,d1
+ move.l a6,(a1)+
+ lsl.l #8,d7
+ eor.l d7,d3
+
+ | a7a6a5a4e7e6e5e4 i7i6i5i4m7m6m5m4 c7c6c5c4g7g6g5g4 k7k6k5k4o7o6o5o4
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b7b6b5b4f7f6f5f4 j7j6j5j4n7n6n5n4 d7d6d5d4h7h6h5h4 l7l6l5l4p7p6p5p4
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ cmp.l a1,a7 | end of dst line?
+ bne.s c2p1x1_8_rect_start
+
+ add.l (screen_offset,pc),a1
+ add.l (screen_pitch,pc),a7
+
+c2p1x1_8_rect_start:
+ move.l d2,d7
+ lsr.l #1,d7
+ eor.l d0,d7
+ and.l d6,d7
+ eor.l d7,d0
+ add.l d7,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #1,d7
+ eor.l d1,d7
+ and.l d6,d7
+ eor.l d7,d1
+ add.l d7,d7
+ eor.l d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
+ | a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.w d2,d7
+ move.w d0,d2
+ swap d2
+ move.w d2,d0
+ move.w d7,d2
+ move.w d3,d7
+ move.w d1,d3
+ swap d3
+ move.w d3,d1
+ move.w d7,d3
+
+ | a7b7a5b5e7f7e5f5 i7j7i5j5m7n7m5n5 a6b6a4b4e6f6e4f4 i6j6i4j4m6n6m4n4
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
+ | c7d7c5d5g7h7g5h5 k7l7k5l5o7p7o5p5 c6d6c4d4g6h6g4h4 k6l6k4l4o6p6o4p4
+ | c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.l d2,d7
+ lsr.l #2,d7
+ eor.l d0,d7
+ and.l #0x33333333,d7
+ eor.l d7,d0
+ lsl.l #2,d7
+ eor.l d7,d2
+ move.l d3,d7
+ lsr.l #2,d7
+ eor.l d1,d7
+ and.l #0x33333333,d7
+ eor.l d7,d1
+ lsl.l #2,d7
+ eor.l d7,d3
+
+ | a7b7c7d7e7f7g7h7 i7j7k7l7m7n7o7p7 a6b6c6d6e6f6g6h6 i6j6k6l6m6n6o6p6
+ | a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
+ | a5b5c5d5e5f5g5h5 i5j5k5l5m5n5o5p5 a4b4c4d4e4f4g4h4 i4j4k4l4m4n4o4p4
+ | a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
- movew c2p_width+2,d7
- lsrw #4,d7
- subql #1,d7
-SDL_Atari_C2p8_dblloop:
- movel a0@+,a1@+
- movel a0@+,a1@+
- movel a0@+,a1@+
- movel a0@+,a1@+
- dbra d7,SDL_Atari_C2p8_dblloop
+ swap d0
+ swap d1
+ swap d2
+ swap d3
- addl d1,c2p_curdst
+ move.l d0,a6
+ move.l d2,a5
+ move.l d1,a4
+ move.l d3,a3
-SDL_Atari_C2p8_nodblline:
+ cmp.l a0,a2 | end of src line?
+ bne c2p1x1_8_rect_pix16
- /* Next line */
+ cmp.l (chunky_end,pc),a2
+ beq.s c2p1x1_8_rect_done
- addl d0,c2p_cursrc
- addl d1,c2p_curdst
+ add.l (chunky_offset,pc),a0
+ add.l (chunky_pitch,pc),a2
- subqw #1,c2p_row
- bne SDL_Atari_C2p8_rowloop
+ bra c2p1x1_8_rect_pix16
- moveml sp@+,d2-d7/a2-a6
+c2p1x1_8_rect_done:
+ move.l a3,(a1)+
+ move.l a4,(a1)+
+ move.l a5,(a1)+
+ move.l a6,(a1)+
+
+ move.l old_sp,sp
+ movem.l (sp)+,d2-d7/a2-a6
#endif
rts
/* ------------ Conversion C2P, 4 bits ------------ */
+| void SDL_Atari_C2pConvert4(const byte *pChunky, const byte *pChunkyEnd, byte *pScreen);
SYM(SDL_Atari_C2pConvert4):
#if !defined(__mcoldfire__)
- movel sp@(4),c2p_source
- movel sp@(8),c2p_dest
- movel sp@(12),c2p_width
- movel sp@(16),c2p_height
- movel sp@(20),c2p_dblligne
- movel sp@(24),c2p_srcpitch
- movel sp@(28),c2p_dstpitch
-
- moveml d2-d7/a2-a6,sp@-
-
- movel c2p_source,a0
- movel c2p_dest,a1
- lea c2p_table,a2
- movel #0x00070001,d3
-#if defined(__mc68020__)
- moveq #0,d0
-#endif
-
- movel c2p_height,d7
- subql #1,d7
-c2p4_bcly:
- movel a0,a4 | Save start address of source
- movel a1,a5 | Save start address of dest
-
- | Conversion
-
- movel c2p_width,d6
- lsrw #4,d6
- subql #1,d6
-c2p4_bclx:
- | Octets 0-7
-
- moveq #0,d1
- moveq #7,d5
-c2p4_bcl07:
-#if defined(__mc68020__)
- moveb a0@+,d0
- lea a2@(0,d0:w:4),a3
+#ifdef __FASTCALL__
+ | a0: chunky
+ move.l a1,d0 | chunky end
+ move.l 4(sp),a1 | screen
#else
- moveq #0,d0
- moveb a0@+,d0
- lslw #2,d0
- lea a2@(0,d0:w),a3
+ move.l (4,sp),a0 | chunky
+ move.l (8,sp),d0 | chunky end
+ move.l (12,sp),a1 | screen
#endif
- lsll #1,d1
- orl a3@,d1
- dbra d5,c2p4_bcl07
+ movem.l d2-d7/a2-a6,-(sp)
+ move.l d0,a2
+ lea SYM(SDL_Atari_C2pPalette4),a3
+ lea (4,a0),a4
+
+ moveq #0,d3
+ move.l #0x33333333,d4
+ move.l #0x00ff00ff,d5
+ move.l #0x55555555,d6
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ addq.l #4,a0
+ addq.l #4,a4
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ addq.l #4,a0
+ addq.l #4,a4
+
+ bra c2p1x1_4_start
+
+c2p1x1_4_pix16:
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d0
+ lsl.b #4,d0
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d0
+
+ move.l a5,(a1)+
+ addq.l #4,a0
+ addq.l #4,a4
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a3,d3.l),d1
+ lsl.b #4,d1
+ move.b (a4)+,d3
+ or.b (a3,d3.l),d1
+
+ move.l a6,(a1)+
+ addq.l #4,a0
+ addq.l #4,a4
+
+c2p1x1_4_start:
+
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #8,d7
+ eor.l d0,d7
+ and.l d5,d7
+ eor.l d7,d0
+ lsl.l #8,d7
+ eor.l d7,d1
+
+ | a3a2a1a0e3e2e1e0 i3i2i1i0m3m2m1m0 c3c2c1c0g3g2g1g0 k3k2k1k0o3o2o1o0
+ | b3b2b1b0f3f2f1f0 j3j2j1j0n3n2n1n0 d3d2d1d0h3h2h1h0 l3l2l1l0p3p2p1p0
+
+ move.l d1,d7
+ lsr.l #1,d7
+ eor.l d0,d7
+ and.l d6,d7
+ eor.l d7,d0
+ add.l d7,d7
+ eor.l d7,d1
+
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1
+ | a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
+
+ move.w d1,d7
+ move.w d0,d1
+ swap d1
+ move.w d1,d0
+ move.w d7,d1
- movepl d1,a1@(0)
- addw d3,a1
- swap d3
-
- | Octets 8-15
-
- moveq #0,d1
- moveq #7,d5
-c2p4_bcl815:
-#if defined(__mc68020__)
- moveb a0@+,d0
- lea a2@(0,d0:w:4),a3
-#else
- moveq #0,d0
- moveb a0@+,d0
- lslw #2,d0
- lea a2@(0,d0:w),a3
-#endif
- lsll #1,d1
- orl a3@,d1
- dbra d5,c2p4_bcl815
+ | a3b3a1b1e3f3e1f1 i3j3i1j1m3n3m1n1 a2b2a0b0e2f2e0f0 i2j2i0j0m2n2m0n0
+ | c3d3c1d1g3h3g1h1 k3l3k1l1o3p3o1p1 c2d2c0d0g2h2g0h0 k2l2k0l0o2p2o0p0
- movepl d1,a1@(0)
- addw d3,a1
- swap d3
+ move.l d1,d7
+ lsr.l #2,d7
+ eor.l d0,d7
+ and.l d4,d7
+ eor.l d7,d0
+ lsl.l #2,d7
+ eor.l d7,d1
- dbra d6,c2p4_bclx
+ | a3b3c3d3e3f3g3h3 i3j3k3l3m3n3o3p3 a2b2c2d2e2f2g2h2 i2j2k2l2m2n2o2p2
+ | a1b1c1d1e1f1g1h1 i1j1k1l1m1n1o1p1 a0b0c0d0e0f0g0h0 i0j0k0l0m0n0o0p0
- | Double line ?
+ swap d0
+ swap d1
- tstl c2p_dblligne
- beqs c2p4_nodblligne
+ move.l d1,a5
+ move.l d0,a6
- movel a5,a6 | src line
- movel a5,a1 | dest line
- addl c2p_dstpitch,a1
+ cmp.l a0,a2
+ bne c2p1x1_4_pix16
- movel c2p_width,d6
- lsrw #3,d6
- subql #1,d6
-c2p4_copydbl:
- movel a6@+,a1@+
- dbra d6,c2p4_copydbl
+ move.l a5,(a1)+
+ move.l a6,(a1)+
- addl c2p_dstpitch,a5
-c2p4_nodblligne:
+ movem.l (sp)+,d2-d7/a2-a6
+#endif
+ rts
- | Next line
- movel a4,a0
- addl c2p_srcpitch,a0
- movel a5,a1
- addl c2p_dstpitch,a1
+| void SDL_Atari_C2pConvert4_rect(const byte *pChunky, const byte *pChunkyEnd, uint32 chunkyWidth, uint32 chunkyPitch, byte *pScreen, uint32 screenPitch);
+SYM(SDL_Atari_C2pConvert4_rect):
+#if !defined(__mcoldfire__)
+ movem.l d2-d7/a2-a6,-(sp) | 6 + 5 = 11 longs
+
+#ifdef __FASTCALL__
+ | a0: chunky
+ move.l a1,chunky_end
+ | d0.l: chunky width
+ move.l (11*4+4,sp),a1 | a1: screen
+ exg d1,d2 | d2.l: chunky pitch
+ | d1.l: screen pitch
+#else
+ move.l (11*4+4,sp),a0 | a0: chunky
+ move.l (11*4+8,sp),chunky_end
+ move.l (11*4+12,sp),d0 | d0.l: chunky width
+ move.l (11*4+16,sp),d2 | d2.l: chunky pitch
+ move.l (11*4+20,sp),a1 | a1: screen
+ move.l (11*4+24,sp),d1 | d1.l: screen pitch
+#endif
- dbra d7,c2p4_bcly
+ move.l d0,d3 | d3.l: screen width
+ lsr.l #1,d3 |
+
+ lea (a0,d0.l),a2 | a2: end of first src line
+ lea (a1,d3.l),a3 | a3: end of first dst line
+ lea SYM(SDL_Atari_C2pPalette4),a4
+
+ move.l d1,screen_pitch
+
+ sub.l d3,d1
+ move.l d1,screen_offset
+
+ move.l d2,chunky_pitch
+
+ sub.l d0,d2
+ move.l d2,chunky_offset
+
+ moveq #0,d3
+ move.l #0x33333333,d4
+ move.l #0x00ff00ff,d5
+ move.l #0x55555555,d6
+
+ lea (a0,4),a5
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+
+ addq.l #4,a0
+ addq.l #4,a5
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+
+ addq.l #4,a0
+
+ bra c2p1x1_4_rect_start
+
+c2p1x1_4_rect_pix16:
+ move.l a5,(a1)+
+
+ lea (a0,4),a5
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+ lsl.l #8,d0
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d0
+ lsl.b #4,d0
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d0
+
+ move.l a6,(a1)+
+ addq.l #4,a0
+ addq.l #4,a5
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+ lsl.l #8,d1
+
+ move.b (a0)+,d3
+ move.b (a4,d3.l),d1
+ lsl.b #4,d1
+ move.b (a5)+,d3
+ or.b (a4,d3.l),d1
+
+ addq.l #4,a0
+
+ cmp.l a1,a3 | end of dst line?
+ bne.s c2p1x1_4_rect_start
+
+ add.l (screen_offset,pc),a1
+ add.l (screen_pitch,pc),a3
+
+c2p1x1_4_rect_start:
+
+ | a3a2a1a0e3e2e1e0 b3b2b1b0f3f2f1f0 c3c2c1c0g3g2g1g0 d3d2d1d0h3h2h1h0
+ | i3i2i1i0m3m2m1m0 j3j2j1j0n3n2n1n0 k3k2k1k0o3o2o1o0 l3l2l1l0p3p2p1p0
+
+
(Patch may be truncated, please check the link at the top of this post.)