From 257277903e23c3e4cb2565524b34f80b73ac20da Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Tue, 19 Jul 2022 02:16:08 -0400
Subject: [PATCH] audio: first attempt at rewriting the channel converters.
This is not ready for production use!
---
src/audio/SDL_audio_c.h | 2 +-
src/audio/SDL_audiocvt.c | 1626 ++++++++++++++++++++++++++------------
2 files changed, 1132 insertions(+), 496 deletions(-)
diff --git a/src/audio/SDL_audio_c.h b/src/audio/SDL_audio_c.h
index bfa0760de59..3f22cd614a3 100644
--- a/src/audio/SDL_audio_c.h
+++ b/src/audio/SDL_audio_c.h
@@ -25,7 +25,7 @@
#include "../SDL_internal.h"
#ifndef DEBUG_CONVERT
-#define DEBUG_CONVERT 0
+#define DEBUG_CONVERT 1
#endif
#if DEBUG_CONVERT
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index 0b95b417c75..1117420e935 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -63,61 +63,41 @@
# endif
#endif
-#if HAVE_SSE3_INTRINSICS
-/* Convert from stereo to mono. Average left and right. */
-static void SDLCALL
-SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT * cvt, SDL_AudioFormat format)
-{
- const __m128 divby2 = _mm_set1_ps(0.5f);
- float *dst = (float *) cvt->buf;
- const float *src = dst;
- int i = cvt->len_cvt / 8;
-
- LOG_DEBUG_CONVERT("stereo", "mono (using SSE3)");
- SDL_assert(format == AUDIO_F32SYS);
-
- /* Do SSE blocks as long as we have 16 bytes available.
- Just use unaligned load/stores, if the memory at runtime is
- aligned it'll be just as fast on modern processors */
- while (i >= 4) { /* 4 * float32 */
- _mm_storeu_ps(dst, _mm_mul_ps(_mm_hadd_ps(_mm_loadu_ps(src), _mm_loadu_ps(src+4)), divby2));
- i -= 4; src += 8; dst += 4;
- }
-
- /* Finish off any leftovers with scalar operations. */
- while (i) {
- *dst = (src[0] + src[1]) * 0.5f;
- dst++; i--; src += 2;
- }
-
- cvt->len_cvt /= 2;
- if (cvt->filters[++cvt->filter_index]) {
- cvt->filters[cvt->filter_index] (cvt, format);
- }
-}
-#endif
-
-/* Convert from stereo to mono. Average left and right. */
-static void SDLCALL
-SDL_ConvertStereoToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
-{
- float *dst = (float *) cvt->buf;
- const float *src = dst;
- int i;
-
- LOG_DEBUG_CONVERT("stereo", "mono");
- SDL_assert(format == AUDIO_F32SYS);
-
- for (i = cvt->len_cvt / 8; i; --i, src += 2) {
- *(dst++) = (src[0] + src[1]) * 0.5f;
- }
-
- cvt->len_cvt /= 2;
- if (cvt->filters[++cvt->filter_index]) {
- cvt->filters[cvt->filter_index] (cvt, format);
- }
-}
-
+/*
+ * CHANNEL LAYOUTS AS SDL EXPECTS THEM:
+ *
+ * (Even if the platform expects something else later, that
+ * SDL will swizzle between the app and the platform).
+ *
+ * Abbreviations:
+ * - FRONT=single mono speaker
+ * - FL=front left speaker
+ * - FR=front right speaker
+ * - FC=front center speaker
+ * - BL=back left speaker
+ * - BR=back right speaker
+ * - SR=side right speaker
+ * - SL=side left speaker
+ * - BC=back center speaker
+ * - LFE=low-frequency speaker
+ *
+ * These are listed in the order they are laid out in
+ * memory, so "FL+FR" means "the front left speaker is
+ * layed out in memory first, then the front right, then
+ * it repeats for the next audio frame".
+ *
+ * 1 channel (mono) layout: FRONT
+ * 2 channels (stereo) layout: FL+FR
+ * 3 channels (2.1) layout: FL+FR+LFE
+ * 4 channels (quad) layout: FL+FR+BL+BR
+ * 5 channels (4.1) layout: FL+FR+LFE+BL+BR
+ * 6 channels (5.1) layout: FL+FR+FC+LFE+BL+BR
+ * 7 channels (6.1) layout: FL+FR+FC+LFE+BC+SL+SR
+ * 8 channels (7.1) layout: FL+FR+FC+LFE+BL+BR+SL+SR
+ */
+
+
+#if 0 /* !!! FIXME: these need to be updated to match the new scalar code. */
#if HAVE_AVX_INTRINSICS
/* MSVC will always accept AVX intrinsics when compiling for x64 */
#if defined(__clang__) || defined(__GNUC__)
@@ -225,258 +205,979 @@ SDL_Convert51ToStereo_SSE(SDL_AudioCVT * cvt, SDL_AudioFormat format)
_mm_storeu_ps(dst, out);
- i -= 2; src += 12; dst += 4;
- }
+ i -= 2; src += 12; dst += 4;
+ }
+
+
+ /* Finish off any leftovers with scalar operations. */
+ while (i) {
+ const float front_center_distributed = src[2] * 0.5f;
+ dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
+ dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
+ i--; src += 6; dst+=2;
+ }
+
+ cvt->len_cvt /= 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+#endif
+
+#if HAVE_NEON_INTRINSICS
+/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
+static void SDLCALL
+SDL_Convert51ToStereo_NEON(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i = cvt->len_cvt / (sizeof (float) * 6);
+ const float two_fifths_f = 1.0f / 2.5f;
+ const float32x4_t two_fifths_v = vdupq_n_f32(two_fifths_f);
+ const float32x4_t half = vdupq_n_f32(0.5f);
+
+ LOG_DEBUG_CONVERT("5.1", "stereo (using NEON)");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
+
+ /* Just use unaligned load/stores, it's the same NEON instructions and
+ hopefully even unaligned NEON is faster than the scalar fallback. */
+ while (i >= 2) {
+ /* Two 5.1 samples (12 floats) fit nicely in three 128bit */
+ /* registers. Using shuffles they can be rearranged so that */
+ /* the conversion math can be vectorized. */
+ const float32x4_t in0 = vld1q_f32(src); /* 0FL 0FR 0FC 0LF */
+ const float32x4_t in1 = vld1q_f32(src + 4); /* 0BL 0BR 1FL 1FR */
+ const float32x4_t in2 = vld1q_f32(src + 8); /* 1FC 1LF 1BL 1BR */
+
+ /* 0FC 0FC 1FC 1FC */
+ const float32x4_t fc_distributed = vmulq_f32(half, vcombine_f32(vdup_lane_f32(vget_high_f32(in0), 0), vdup_lane_f32(vget_low_f32(in2), 0)));
+
+ /* 0FL 0FR 1BL 1BR */
+ const float32x4_t blended = vcombine_f32(vget_low_f32(in0), vget_high_f32(in2));
+
+ /* 0FL 0FR 1BL 1BR */
+ /* + 0BL 0BR 1FL 1FR */
+ /* = 0L 0R 1L 1R */
+ float32x4_t out = vaddq_f32(blended, in1);
+ out = vaddq_f32(out, fc_distributed);
+ out = vmulq_f32(out, two_fifths_v);
+
+ vst1q_f32(dst, out);
+
+ i -= 2; src += 12; dst += 4;
+ }
+
+ /* Finish off any leftovers with scalar operations. */
+ while (i) {
+ const float front_center_distributed = src[2] * 0.5f;
+ dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
+ dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
+ i--; src += 6; dst+=2;
+ }
+
+ cvt->len_cvt /= 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+#endif
+#endif
+
+
+/* Channel conversion is now mostly following this scheme, borrowed from FNA
+ (which is following the scheme of XNA)...
+ https://github.com/FNA-XNA/FAudio/blob/master/src/matrix_defaults.inl */
+
+/* CONVERT FROM MONO... */
+/* Mono duplicates to stereo and all other channels are silenced. */
+
+#define CVT_MONO_TO(toname, tonamestr, num_channels, zeroingcode) \
+ static void SDLCALL SDL_ConvertMonoTo##toname(SDL_AudioCVT * cvt, SDL_AudioFormat format) { \
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 1; \
+ float *dst = ((float *) (cvt->buf + cvt->len_cvt * num_channels)) - num_channels; \
+ int i; \
+ LOG_DEBUG_CONVERT("mono", tonamestr); \
+ SDL_assert(format == AUDIO_F32SYS); \
+ SDL_assert(num_channels >= 2); \
+ for (i = cvt->len_cvt / sizeof (float); i; i--, src--, dst -= num_channels) { \
+ dst[0] = dst[1] = *src; \
+ zeroingcode; \
+ } \
+ cvt->len_cvt *= num_channels; \
+ if (cvt->filters[++cvt->filter_index]) { \
+ cvt->filters[cvt->filter_index] (cvt, format); \
+ } \
+ }
+CVT_MONO_TO(Stereo, "stereo", 2, {});
+CVT_MONO_TO(21, "2.1", 3, { dst[2] = 0.0f; });
+CVT_MONO_TO(Quad, "quad", 4, { dst[2] = dst[3] = 0.0f; });
+CVT_MONO_TO(41, "4.1", 5, { dst[2] = dst[3] = dst[4] = 0.0f; });
+CVT_MONO_TO(51, "5.1", 6, { dst[2] = dst[3] = dst[4] = dst[5] = 0.0f; });
+CVT_MONO_TO(61, "6.1", 7, { dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = 0.0f; });
+CVT_MONO_TO(71, "7.1", 8, { dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = 0.0f; });
+#undef CVT_MONO_TO
+
+
+
+/* CONVERT FROM STEREO... */
+/* Stereo duplicates to two front speakers and all other channels are silenced. */
+
+#if HAVE_SSE3_INTRINSICS
+/* Convert from stereo to mono. Average left and right. */
+static void SDLCALL
+SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const __m128 divby2 = _mm_set1_ps(0.5f);
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i = cvt->len_cvt / 8;
+
+ LOG_DEBUG_CONVERT("stereo", "mono (using SSE3)");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* Do SSE blocks as long as we have 16 bytes available.
+ Just use unaligned load/stores, if the memory at runtime is
+ aligned it'll be just as fast on modern processors */
+ while (i >= 4) { /* 4 * float32 */
+ _mm_storeu_ps(dst, _mm_mul_ps(_mm_hadd_ps(_mm_loadu_ps(src), _mm_loadu_ps(src+4)), divby2));
+ i -= 4; src += 8; dst += 4;
+ }
+
+ /* Finish off any leftovers with scalar operations. */
+ while (i) {
+ *dst = (src[0] + src[1]) * 0.5f;
+ dst++; i--; src += 2;
+ }
+
+ cvt->len_cvt /= 2;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+#endif
+
+/* Convert from stereo to mono. Average left and right. */
+static void SDLCALL
+SDL_ConvertStereoToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("stereo", "mono");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 2); i; --i, src += 2) {
+ *(dst++) = (src[0] + src[1]) * 0.5f;
+ }
+
+ cvt->len_cvt /= 2;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+#define CVT_STEREO_TO(toname, tonamestr, num_channels, zeroingcode) \
+ static void SDLCALL SDL_ConvertStereoTo##toname(SDL_AudioCVT * cvt, SDL_AudioFormat format) { \
+ int i; \
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 2; \
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 2) * num_channels))) - num_channels; \
+ LOG_DEBUG_CONVERT("stereo", tonamestr); \
+ SDL_assert(format == AUDIO_F32SYS); \
+ SDL_assert(num_channels >= 3); \
+ for (i = cvt->len_cvt / (sizeof (float) * 2); i; --i, dst -= num_channels, src -= 2) { \
+ dst[0] = src[0]; \
+ dst[1] = src[1]; \
+ zeroingcode; \
+ } \
+ cvt->len_cvt = (cvt->len_cvt / 2) * num_channels; \
+ if (cvt->filters[++cvt->filter_index]) { \
+ cvt->filters[cvt->filter_index] (cvt, format); \
+ } \
+ }
+
+CVT_STEREO_TO(21, "2.1", 3, { dst[2] = 0.0f; });
+CVT_STEREO_TO(Quad, "quad", 4, { dst[2] = dst[3] = 0.0f; });
+CVT_STEREO_TO(41, "4.1", 5, { dst[2] = dst[3] = dst[4] = 0.0f; });
+CVT_STEREO_TO(51, "5.1", 6, { dst[2] = dst[3] = dst[4] = dst[5] = 0.0f; });
+CVT_STEREO_TO(61, "6.1", 7, { dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = 0.0f; });
+CVT_STEREO_TO(71, "7.1", 8, { dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = 0.0f; });
+#undef CVT_STEREO_TO
+
+
+
+/* CONVERT FROM 2.1... */
+/* 2.1 duplicates to two front speakers (and LFE when available) and all other channels are silenced. */
+
+/* Convert from 2.1 to mono. Average left and right, drop LFE. */
+static void SDLCALL
+SDL_Convert21ToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("2.1", "mono");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 2); i; --i, src += 3) {
+ *(dst++) = (src[0] + src[1]) * 0.5f;
+ }
+
+ cvt->len_cvt /= 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+#define CVT_21_TO(toname, tonamestr, num_channels, customcode) \
+ static void SDLCALL SDL_Convert21To##toname(SDL_AudioCVT * cvt, SDL_AudioFormat format) { \
+ int i; \
+ float lf, rf, lfe; \
+ const float *src = (const float *) (cvt->buf + cvt->len_cvt); \
+ float *dst = (float *) (cvt->buf + ((cvt->len_cvt / 3) * num_channels)); \
+ LOG_DEBUG_CONVERT("2.1", tonamestr); \
+ SDL_assert(format == AUDIO_F32SYS); \
+ SDL_assert(num_channels >= 2); \
+ for (i = cvt->len_cvt / (sizeof (float) * 3); i; --i) { \
+ dst -= num_channels; \
+ src -= 2; \
+ lf = src[0]; \
+ rf = src[1]; \
+ lfe = src[2]; \
+ dst[0] = lf; \
+ dst[1] = rf; \
+ customcode; \
+ } \
+ cvt->len_cvt = (cvt->len_cvt / 3) * num_channels; \
+ if (cvt->filters[++cvt->filter_index]) { \
+ cvt->filters[cvt->filter_index] (cvt, format); \
+ } \
+ }
+
+CVT_21_TO(Stereo, "stereo", 2, { (void) lfe; });
+CVT_21_TO(Quad, "quad", 4, { (void) lfe; dst[2] = dst[3] = 0.0f; });
+CVT_21_TO(41, "4.1", 5, { dst[2] = lfe; dst[3] = dst[4] = 0.0f; });
+CVT_21_TO(51, "5.1", 6, { dst[2] = 0.0f; dst[3] = lfe; dst[4] = dst[5] = 0.0f; });
+CVT_21_TO(61, "6.1", 7, { dst[2] = 0.0f; dst[3] = lfe; dst[4] = dst[5] = dst[6] = 0.0f; });
+CVT_21_TO(71, "7.1", 8, { dst[2] = 0.0f; dst[3] = lfe; dst[4] = dst[5] = dst[6] = dst[7] = 0.0f; });
+#undef CVT_21_TO
+
+
+/* CONVERT FROM QUAD... */
+
+static void SDLCALL
+SDL_ConvertQuadToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "mono");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* !!! FIXME: could benefit from SIMD */
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src += 4) {
+ *(dst++) = (src[0] + src[1] + src[3] + src[4]) * 0.25f;
+ }
+
+ cvt->len_cvt /= 4;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "stereo");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* !!! FIXME: could benefit from SIMD */
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src += 4) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float bl = src[2];
+ const float br = src[3];
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right)...but this can't possibly be right, right...? */
+ *(dst++) = (fl * 0.421000004f) + (bl * 0.358999997f) + (br * 0.219999999f);
+ *(dst++) = (fr * 0.421000004f) + (br * 0.358999997f) + (bl * 0.219999999f);
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 2;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadTo21(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "2.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* !!! FIXME: could benefit from SIMD */
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src += 4) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float bl = src[2];
+ const float br = src[3];
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right)...but this can't possibly be right, right...? */
+ *(dst++) = (fl * 0.421000004f) + (bl * 0.358999997f) + (br * 0.219999999f);
+ *(dst++) = (fr * 0.421000004f) + (br * 0.358999997f) + (bl * 0.219999999f);
+ *(dst++) = 0.0f; /* lfe */
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadTo41(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 4;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 4) * 5))) - 5;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "4.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src -= 4, dst -= 5) {
+ dst[4] = src[3];
+ dst[3] = src[2];
+ dst[2] = 0.0f; /* LFE */
+ dst[1] = src[1];
+ dst[0] = src[0];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 5;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadTo51(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 4;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 4) * 6))) - 6;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "5.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src -= 4, dst -= 6) {
+ dst[5] = src[3];
+ dst[4] = src[2];
+ dst[3] = 0.0f; /* LFE */
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1];
+ dst[0] = src[0];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 6;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadTo61(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 4;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 4) * 7))) - 7;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "6.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* !!! FIXME: I'm skeptical XNA/FNA's conversion is right, here. */
+
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src -= 4, dst -= 7) {
+ const float bl = src[2];
+ const float br = src[3];
+ dst[6] = br * 0.796000004f;
+ dst[5] = bl * 0.796000004f;
+ dst[4] = (bl + br) * 0.5f; /* average BL+BR to BC */
+ dst[3] = 0.0f; /* LFE */
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1] * 0.939999998f;
+ dst[0] = src[0] * 0.939999998f;
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 7;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_ConvertQuadTo71(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 4;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 4) * 8))) - 8;
+ int i;
+
+ LOG_DEBUG_CONVERT("quad", "7.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src -= 4, dst -= 8) {
+ dst[7] = 0.0f; /* SR */
+ dst[6] = 0.0f; /* SL */
+ dst[5] = src[3];
+ dst[4] = src[2];
+ dst[3] = 0.0f; /* LFE */
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1];
+ dst[0] = src[0];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 4) * 8;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+
+/* CONVERT FROM 4.1... */
+
+static void SDLCALL
+SDL_Convert41ToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "mono");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src += 5) {
+ *(dst++) = (src[0] + src[1] + src[3] + src[4]) * 0.25f;
+ }
+
+ cvt->len_cvt /= 5;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41ToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "stereo");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src += 5) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float bl = src[3];
+ const float br = src[4];
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right) and a little of the LFE...but this can't possibly be right, right...? */
+ *(dst++) = (fl * 0.374222219f) + (bl * 0.319111109f) + (br * 0.195555553f);
+ *(dst++) = (fr * 0.374222219f) + (br * 0.319111109f) + (bl * 0.195555553f);
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 2;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41To21(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "2.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src += 5) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float lfe = src[2];
+ const float bl = src[3];
+ const float br = src[4];
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right) and a little of the LFE...but this can't possibly be right, right...? */
+ *(dst++) = (fl * 0.374222219f) + (bl * 0.319111109f) + (br * 0.195555553f);
+ *(dst++) = (fr * 0.374222219f) + (br * 0.319111109f) + (bl * 0.195555553f);
+ *(dst++) = lfe;
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41ToQuad(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "quad");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src += 5) {
+ /* !!! FIXME: FNA/XNA mixes a little of the LFE into every channel...but this can't possibly be right, right...? I just drop the LFE and copy the channels. */
+ *(dst++) = src[0];
+ *(dst++) = src[1];
+ *(dst++) = src[3];
+ *(dst++) = src[4];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 4;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41To51(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 5;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 5) * 6))) - 6;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "5.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src -= 5, dst -= 6) {
+ dst[5] = src[4];
+ dst[4] = src[3];
+ dst[3] = src[2];
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1];
+ dst[0] = src[0];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 6;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41To61(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 5;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 4) * 7))) - 7;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "6.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ /* !!! FIXME: I'm skeptical XNA/FNA's conversion is right, here. */
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src -= 5, dst -= 7) {
+ const float bl = src[3];
+ const float br = src[4];
+ dst[6] = br * 0.796000004f;
+ dst[5] = bl * 0.796000004f;
+ dst[4] = (bl + br) * 0.5f; /* average BL+BR to BC */
+ dst[3] = src[2];
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1] * 0.939999998f;
+ dst[0] = src[0] * 0.939999998f;
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 7;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert41To71(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ const float *src = ((const float *) (cvt->buf + cvt->len_cvt)) - 5;
+ float *dst = ((float *) (cvt->buf + ((cvt->len_cvt / 5) * 8))) - 8;
+ int i;
+
+ LOG_DEBUG_CONVERT("4.1", "7.1");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 5); i; --i, src -= 5, dst -= 8) {
+ dst[7] = 0.0f; /* SR */
+ dst[6] = 0.0f; /* SL */
+ dst[5] = src[4];
+ dst[4] = src[3];
+ dst[3] = src[2];
+ dst[2] = 0.0f; /* FC */
+ dst[1] = src[1];
+ dst[0] = src[0];
+ }
+
+ cvt->len_cvt = (cvt->len_cvt / 5) * 8;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+
+
+/* CONVERT FROM 5.1... */
+
+static void SDLCALL
+SDL_Convert51ToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("5.1", "mono");
+ SDL_assert(format == AUDIO_F32SYS);
+
+ for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6) {
+ *(dst++) = (src[0] + src[1] + src[2] + src[4] + src[5]) * 0.200000003f;
+ }
+
+ cvt->len_cvt /= 6;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
+
+static void SDLCALL
+SDL_Convert51ToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
+
+ LOG_DEBUG_CONVERT("5.1", "stereo");
+ SDL_assert(format == AUDIO_F32SYS);
+ for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float fc = src[2];
+ const float bl = src[4];
+ const float br = src[5];
+ const float extra = 0.090909094f / 4.0f; /* this was the LFE distribution, we'll just split it between the other channels for now. */
- /* Finish off any leftovers with scalar operations. */
- while (i) {
- const float front_center_distributed = src[2] * 0.5f;
- dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
- dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
- i--; src += 6; dst+=2;
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right) and a little of the LFE...but this can't possibly be right, right...? */
+ *(dst++) = (fl * (0.294545442f+extra)) + (fc * (0.208181813f+extra)) + (bl * (0.251818180f+extra)) + (br * (0.154545456f+extra));
+ *(dst++) = (fr * (0.294545442f+extra)) + (fc * (0.208181813f+extra)) + (br * (0.251818180f+extra)) + (bl * (0.154545456f+extra));
}
- cvt->len_cvt /= 3;
+ cvt->len_cvt = (cvt->len_cvt / 6) * 2;
if (cvt->filters[++cvt->filter_index]) {
cvt->filters[cvt->filter_index] (cvt, format);
}
}
-#endif
-#if HAVE_NEON_INTRINSICS
-/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
static void SDLCALL
-SDL_Convert51ToStereo_NEON(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+SDL_Convert51To21(SDL_AudioCVT * cvt, SDL_AudioFormat format)
{
float *dst = (float *) cvt->buf;
const float *src = dst;
- int i = cvt->len_cvt / (sizeof (float) * 6);
- const float two_fifths_f = 1.0f / 2.5f;
- const float32x4_t two_fifths_v = vdupq_n_f32(two_fifths_f);
- const float32x4_t half = vdupq_n_f32(0.5f);
+ int i;
- LOG_DEBUG_CONVERT("5.1", "stereo (using NEON)");
+ LOG_DEBUG_CONVERT("5.1", "2.1");
SDL_assert(format == AUDIO_F32SYS);
- /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
-
- /* Just use unaligned load/stores, it's the same NEON instructions and
- hopefully even unaligned NEON is faster than the scalar fallback. */
- while (i >= 2) {
- /* Two 5.1 samples (12 floats) fit nicely in three 128bit */
- /* registers. Using shuffles they can be rearranged so that */
- /* the conversion math can be vectorized. */
- const float32x4_t in0 = vld1q_f32(src); /* 0FL 0FR 0FC 0LF */
- const float32x4_t in1 = vld1q_f32(src + 4); /* 0BL 0BR 1FL 1FR */
- const float32x4_t in2 = vld1q_f32(src + 8); /* 1FC 1LF 1BL 1BR */
-
- /* 0FC 0FC 1FC 1FC */
- const float32x4_t fc_distributed = vmulq_f32(half, vcombine_f32(vdup_lane_f32(vget_high_f32(in0), 0), vdup_lane_f32(vget_low_f32(in2), 0)));
+ for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6) {
+ const float fl = src[0];
+ const float fr = src[1];
+ const float fc = src[2];
+ const float lfe = src[3];
+ const float bl = src[4];
+ const float br = src[5];
- /* 0FL 0FR 1BL 1BR */
- const float32x4_t blended = vcombine_f32(vget_low_f32(in0), vget_high_f32(in2));
+ /* !!! FIXME: FNA/XNA mixes a little of the back right into the left (and back left into the right) and a little of the LFE...but this can't possibly be right, right...? */
+ *(dst++) = (fl * 0.324000001f) + (fc * 0.229000002f) + (bl * 0.277000010f) + (br * 0.170000002f);
+ *(dst++) = (fr * 0.324000001f) + (fc * 0.229000002f) + (br * 0.277000010f) + (bl * 0.170000002f);
+ *(dst++) = lfe;
+ }
- /* 0FL 0FR 1BL 1BR */
- /* + 0BL 0BR 1FL 1FR */
- /* = 0L 0R 1L 1R */
- float32x4_t out = vaddq_f32(blended, in1);
- out = vaddq_f32(out, fc_distributed);
- out = vmulq_f32(out, two_fifths_v);
+ cvt->len_cvt = (cvt->len_cvt / 6) * 3;
+ if (cvt->filters[++cvt->filter_index]) {
+ cvt->filters[cvt->filter_index] (cvt, format);
+ }
+}
- vst1q_f32(dst, out);
+static void SDLCALL
+SDL_Convert51ToQuad(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+ float *dst = (float *) cvt->buf;
+ const float *src = dst;
+ int i;
- i -= 2; src += 12; dst += 4;
- }
+ LOG_DEBUG_CONVERT("5.1", "quad");
+ SDL_assert(format == AUDIO_F32SYS);
- /* Finish off any leftovers with scalar operations. */
- while (i) {
- const float front_center_distributed = src[2] * 0.5f;
- dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
- dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
- i--; src += 6; dst+=2;
+ for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6) {
+ const float fl = src[0];
+ const float fr =
(Patch may be truncated, please check the link at the top of this post.)