aom: add aom_highbd_10_mse16x16_avx2

From 72bbef26ed929fc3a5ea6dae3a4069135a6aad99 Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Fri, 22 Mar 2024 15:14:27 -0700
Subject: [PATCH] add aom_highbd_10_mse16x16_avx2

highbd_10_variance_avx2 has been available since:
f2b7da03c3 Add avx2 variants of highbd 8x8 and 16x16 var modules

but the corresponding mse function wasn't created. Only 16x16 is added
in this change as 8x8 is slower than sse2. There's no sse2 for 8x16 or
16x8, but it should be explored before adding avx2.

The 8 and 12 bit variants are also avoided to keep the library size
down.

Change-Id: If701627d31059ea413aba63a422fa89e0ea33e74
---
 aom_dsp/aom_dsp_rtcd_defs.pl       |  5 +++++
 aom_dsp/x86/highbd_variance_avx2.c | 11 +++++++++++
 test/variance_test.cc              |  5 +++++
 3 files changed, 21 insertions(+)

diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 7d8cfb948..7e746e9cb 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1355,6 +1355,11 @@ ()
         specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
         specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
         specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
+      } elsif ($bd eq 10) {
+        specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/;
+        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
+        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
+        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
       } else {
         specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
         specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
diff --git a/aom_dsp/x86/highbd_variance_avx2.c b/aom_dsp/x86/highbd_variance_avx2.c
index adbb736cf..21e9e8b28 100644
--- a/aom_dsp/x86/highbd_variance_avx2.c
+++ b/aom_dsp/x86/highbd_variance_avx2.c
@@ -741,6 +741,17 @@ VAR_FN(8, 32, 8, 8)
 
 #undef VAR_FN
 
+unsigned int aom_highbd_10_mse16x16_avx2(const uint8_t *src8, int src_stride,
+                                         const uint8_t *ref8, int ref_stride,
+                                         unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_10_variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
+                          highbd_calc16x16var_avx2, 16);
+  return *sse;
+}
+
 #define SSE2_HEIGHT(H)                                                 \
   uint32_t aom_highbd_10_sub_pixel_variance8x##H##_sse2(               \
       const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 11859034d..4afc7ce62 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -2828,6 +2828,11 @@ INSTANTIATE_TEST_SUITE_P(
                       MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
                       MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
                       MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+    AVX2, AvxHBDMseTest,
+    ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10)));
+#endif  // HAVE_AVX2
 
 const VarianceParams kArrayHBDVariance_sse2[] = {
   VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),