aom: *variance*.c: make some functions static

From 1f03a0dbb8a749d80b9d8c5058d8e2a904cbc55d Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Fri, 16 Feb 2024 19:17:11 -0800
Subject: [PATCH] *variance*.c: make some functions static

Fixes some -Wmissing-prototypes warnings.

Bug: aomedia:3416
Change-Id: If89d559126eaf84951c8de98e22e38b6723a4eab
---
 aom_dsp/variance.c                 | 124 ++++++++++++++---------------
 aom_dsp/x86/highbd_variance_avx2.c |  50 ++++++------
 aom_dsp/x86/highbd_variance_sse2.c |  12 +--
 aom_dsp/x86/variance_avx2.c        |  26 +++---
 aom_dsp/x86/variance_impl_avx2.c   |   6 +-
 aom_dsp/x86/variance_sse2.c        |  12 +--
 6 files changed, 114 insertions(+), 116 deletions(-)

diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index f02c3077ae..1b8ced4a01 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -70,12 +70,10 @@ uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
 // It defines the offset required to move from one input to the next.
-void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
-                                             unsigned int src_pixels_per_line,
-                                             unsigned int pixel_step,
-                                             unsigned int output_height,
-                                             unsigned int output_width,
-                                             const uint8_t *filter) {
+static void var_filter_block2d_bil_first_pass_c(
+    const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
+    unsigned int pixel_step, unsigned int output_height,
+    unsigned int output_width, const uint8_t *filter) {
   unsigned int i, j;
 
   for (i = 0; i < output_height; ++i) {
@@ -100,12 +98,10 @@ void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
 // filter is applied horizontally (pixel_step = 1) or vertically
 // (pixel_step = stride). It defines the offset required to move from one input
 // to the next. Output is 8-bit.
-void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
-                                              unsigned int src_pixels_per_line,
-                                              unsigned int pixel_step,
-                                              unsigned int output_height,
-                                              unsigned int output_width,
-                                              const uint8_t *filter) {
+static void var_filter_block2d_bil_second_pass_c(
+    const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
+    unsigned int pixel_step, unsigned int output_height,
+    unsigned int output_width, const uint8_t *filter) {
   unsigned int i, j;
 
   for (i = 0; i < output_height; ++i) {
@@ -129,19 +125,19 @@ void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
   }
 
-#define SUBPIX_VAR(W, H)                                                      \
-  uint32_t aom_sub_pixel_variance##W##x##H##_c(                               \
-      const uint8_t *a, int a_stride, int xoffset, int yoffset,               \
-      const uint8_t *b, int b_stride, uint32_t *sse) {                        \
-    uint16_t fdata3[(H + 1) * W];                                             \
-    uint8_t temp2[H * W];                                                     \
-                                                                              \
-    aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
-                                            bilinear_filters_2t[xoffset]);    \
-    aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,       \
-                                             bilinear_filters_2t[yoffset]);   \
-                                                                              \
-    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);             \
+#define SUBPIX_VAR(W, H)                                                  \
+  uint32_t aom_sub_pixel_variance##W##x##H##_c(                           \
+      const uint8_t *a, int a_stride, int xoffset, int yoffset,           \
+      const uint8_t *b, int b_stride, uint32_t *sse) {                    \
+    uint16_t fdata3[(H + 1) * W];                                         \
+    uint8_t temp2[H * W];                                                 \
+                                                                          \
+    var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
+                                        bilinear_filters_2t[xoffset]);    \
+    var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,       \
+                                         bilinear_filters_2t[yoffset]);   \
+                                                                          \
+    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);         \
   }
 
 #define SUBPIX_AVG_VAR(W, H)                                                   \
@@ -153,10 +149,10 @@ void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
     uint8_t temp2[H * W];                                                      \
     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
                                                                                \
-    aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,  \
-                                            bilinear_filters_2t[xoffset]);     \
-    aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
-                                             bilinear_filters_2t[yoffset]);    \
+    var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,      \
+                                        bilinear_filters_2t[xoffset]);         \
+    var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,            \
+                                         bilinear_filters_2t[yoffset]);        \
                                                                                \
     aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);                     \
                                                                                \
@@ -170,10 +166,10 @@ void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
     uint8_t temp2[H * W];                                                      \
     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
                                                                                \
-    aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,  \
-                                            bilinear_filters_2t[xoffset]);     \
-    aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
-                                             bilinear_filters_2t[yoffset]);    \
+    var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,      \
+                                        bilinear_filters_2t[xoffset]);         \
+    var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,            \
+                                         bilinear_filters_2t[yoffset]);        \
                                                                                \
     aom_dist_wtd_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
                                                                                \
@@ -730,24 +726,24 @@ void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
   }
 }
 
-#define MASK_SUBPIX_VAR(W, H)                                                  \
-  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                     \
-      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
-      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
-      const uint8_t *msk, int msk_stride, int invert_mask,                     \
-      unsigned int *sse) {                                                     \
-    uint16_t fdata3[(H + 1) * W];                                              \
-    uint8_t temp2[H * W];                                                      \
-    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
-                                                                               \
-    aom_var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, \
-                                            W, bilinear_filters_2t[xoffset]);  \
-    aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
-                                             bilinear_filters_2t[yoffset]);    \
-                                                                               \
-    aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride,  \
-                         invert_mask);                                         \
-    return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);          \
+#define MASK_SUBPIX_VAR(W, H)                                                 \
+  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
+      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,         \
+      const uint8_t *msk, int msk_stride, int invert_mask,                    \
+      unsigned int *sse) {                                                    \
+    uint16_t fdata3[(H + 1) * W];                                             \
+    uint8_t temp2[H * W];                                                     \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                               \
+                                                                              \
+    var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, W, \
+                                        bilinear_filters_2t[xoffset]);        \
+    var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,           \
+                                         bilinear_filters_2t[yoffset]);       \
+                                                                              \
+    aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
+                         invert_mask);                                        \
+    return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);         \
   }
 
 MASK_SUBPIX_VAR(4, 4)
@@ -924,19 +920,19 @@ static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
     return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
   }
 
-#define OBMC_SUBPIX_VAR(W, H)                                                  \
-  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                       \
-      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
-      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
-    uint16_t fdata3[(H + 1) * W];                                              \
-    uint8_t temp2[H * W];                                                      \
-                                                                               \
-    aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
-                                            W, bilinear_filters_2t[xoffset]);  \
-    aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
-                                             bilinear_filters_2t[yoffset]);    \
-                                                                               \
-    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);          \
+#define OBMC_SUBPIX_VAR(W, H)                                                 \
+  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                      \
+      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,           \
+      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {          \
+    uint16_t fdata3[(H + 1) * W];                                             \
+    uint8_t temp2[H * W];                                                     \
+                                                                              \
+    var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, W, \
+                                        bilinear_filters_2t[xoffset]);        \
+    var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,           \
+                                         bilinear_filters_2t[yoffset]);       \
+                                                                              \
+    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);         \
   }
 
 OBMC_VAR(4, 4)
diff --git a/aom_dsp/x86/highbd_variance_avx2.c b/aom_dsp/x86/highbd_variance_avx2.c
index b4ff91d856..c39c238604 100644
--- a/aom_dsp/x86/highbd_variance_avx2.c
+++ b/aom_dsp/x86/highbd_variance_avx2.c
@@ -618,9 +618,9 @@ static uint32_t aom_highbd_var_filter_block2d_bil_avx2(
   return (var > 0) ? var : 0;
 }
 
-void aom_highbd_calc8x8var_avx2(const uint16_t *src, int src_stride,
-                                const uint16_t *ref, int ref_stride,
-                                uint32_t *sse, int *sum) {
+static void highbd_calc8x8var_avx2(const uint16_t *src, int src_stride,
+                                   const uint16_t *ref, int ref_stride,
+                                   uint32_t *sse, int *sum) {
   __m256i v_sum_d = _mm256_setzero_si256();
   __m256i v_sse_d = _mm256_setzero_si256();
   for (int i = 0; i < 8; i += 2) {
@@ -653,9 +653,9 @@ void aom_highbd_calc8x8var_avx2(const uint16_t *src, int src_stride,
   *sse = _mm_extract_epi32(v_d, 1);
 }
 
-void aom_highbd_calc16x16var_avx2(const uint16_t *src, int src_stride,
-                                  const uint16_t *ref, int ref_stride,
-                                  uint32_t *sse, int *sum) {
+static void highbd_calc16x16var_avx2(const uint16_t *src, int src_stride,
+                                     const uint16_t *ref, int ref_stride,
+                                     uint32_t *sse, int *sum) {
   __m256i v_sum_d = _mm256_setzero_si256();
   __m256i v_sse_d = _mm256_setzero_si256();
   const __m256i one = _mm256_set1_epi16(1);
@@ -703,19 +703,19 @@ static void highbd_10_variance_avx2(const uint16_t *src, int src_stride,
   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
 }
 
-#define VAR_FN(w, h, block_size, shift)                                    \
-  uint32_t aom_highbd_10_variance##w##x##h##_avx2(                         \
-      const uint8_t *src8, int src_stride, const uint8_t *ref8,            \
-      int ref_stride, uint32_t *sse) {                                     \
-    int sum;                                                               \
-    int64_t var;                                                           \
-    uint16_t *src = CONVERT_TO_SHORTPTR(src8);                             \
-    uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                             \
-    highbd_10_variance_avx2(                                               \
-        src, src_stride, ref, ref_stride, w, h, sse, &sum,                 \
-        aom_highbd_calc##block_size##x##block_size##var_avx2, block_size); \
-    var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift);               \
-    return (var >= 0) ? (uint32_t)var : 0;                                 \
+#define VAR_FN(w, h, block_size, shift)                                        \
+  uint32_t aom_highbd_10_variance##w##x##h##_avx2(                             \
+      const uint8_t *src8, int src_stride, const uint8_t *ref8,                \
+      int ref_stride, uint32_t *sse) {                                         \
+    int sum;                                                                   \
+    int64_t var;                                                               \
+    uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                 \
+    uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                 \
+    highbd_10_variance_avx2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+                            highbd_calc##block_size##x##block_size##var_avx2,  \
+                            block_size);                                       \
+    var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift);                   \
+    return (var >= 0) ? (uint32_t)var : 0;                                     \
   }
 
 VAR_FN(128, 128, 16, 14)
@@ -782,8 +782,8 @@ HIGHBD_SUBPIX_VAR(8, 8)
 
 #undef HIGHBD_SUBPIX_VAR
 
-uint64_t aom_mse_4xh_16bit_highbd_avx2(uint16_t *dst, int dstride,
-                                       uint16_t *src, int sstride, int h) {
+static uint64_t mse_4xh_16bit_highbd_avx2(uint16_t *dst, int dstride,
+                                          uint16_t *src, int sstride, int h) {
   uint64_t sum = 0;
   __m128i reg0_4x16, reg1_4x16, reg2_4x16, reg3_4x16;
   __m256i src0_8x16, src1_8x16, src_16x16;
@@ -840,8 +840,8 @@ uint64_t aom_mse_4xh_16bit_highbd_avx2(uint16_t *dst, int dstride,
   return sum;
 }
 
-uint64_t aom_mse_8xh_16bit_highbd_avx2(uint16_t *dst, int dstride,
-                                       uint16_t *src, int sstride, int h) {
+static uint64_t mse_8xh_16bit_highbd_avx2(uint16_t *dst, int dstride,
+                                          uint16_t *src, int sstride, int h) {
   uint64_t sum = 0;
   __m256i src0_8x16, src1_8x16, src_16x16;
   __m256i dst0_8x16, dst1_8x16, dst_16x16;
@@ -897,8 +897,8 @@ uint64_t aom_mse_wxh_16bit_highbd_avx2(uint16_t *dst, int dstride,
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
          "w=8/4 and h=8/4 must satisfy");
   switch (w) {
-    case 4: return aom_mse_4xh_16bit_highbd_avx2(dst, dstride, src, sstride, h);
-    case 8: return aom_mse_8xh_16bit_highbd_avx2(dst, dstride, src, sstride, h);
+    case 4: return mse_4xh_16bit_highbd_avx2(dst, dstride, src, sstride, h);
+    case 8: return mse_8xh_16bit_highbd_avx2(dst, dstride, src, sstride, h);
     default: assert(0 && "unsupported width"); return -1;
   }
 }
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index e897aab645..2fc2e1c0dd 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -637,8 +637,8 @@ void aom_highbd_dist_wtd_comp_avg_pred_sse2(
   }
 }
 
-uint64_t aom_mse_4xh_16bit_highbd_sse2(uint16_t *dst, int dstride,
-                                       uint16_t *src, int sstride, int h) {
+static uint64_t mse_4xh_16bit_highbd_sse2(uint16_t *dst, int dstride,
+                                          uint16_t *src, int sstride, int h) {
   uint64_t sum = 0;
   __m128i reg0_4x16, reg1_4x16;
   __m128i src_8x16;
@@ -682,8 +682,8 @@ uint64_t aom_mse_4xh_16bit_highbd_sse2(uint16_t *dst, int dstride,
   return sum;
 }
 
-uint64_t aom_mse_8xh_16bit_highbd_sse2(uint16_t *dst, int dstride,
-                                       uint16_t *src, int sstride, int h) {
+static uint64_t mse_8xh_16bit_highbd_sse2(uint16_t *dst, int dstride,
+                                          uint16_t *src, int sstride, int h) {
   uint64_t sum = 0;
   __m128i src_8x16;
   __m128i dst_8x16;
@@ -728,8 +728,8 @@ uint64_t aom_mse_wxh_16bit_highbd_sse2(uint16_t *dst, int dstride,
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
          "w=8/4 and h=8/4 must satisfy");
   switch (w) {
-    case 4: return aom_mse_4xh_16bit_highbd_sse2(dst, dstride, src, sstride, h);
-    case 8: return aom_mse_8xh_16bit_highbd_sse2(dst, dstride, src, sstride, h);
+    case 4: return mse_4xh_16bit_highbd_sse2(dst, dstride, src, sstride, h);
+    case 8: return mse_8xh_16bit_highbd_sse2(dst, dstride, src, sstride, h);
     default: assert(0 && "unsupported width"); return -1;
   }
 }
diff --git a/aom_dsp/x86/variance_avx2.c b/aom_dsp/x86/variance_avx2.c
index 046d6f10f8..0f872fc392 100644
--- a/aom_dsp/x86/variance_avx2.c
+++ b/aom_dsp/x86/variance_avx2.c
@@ -518,8 +518,8 @@ void aom_highbd_comp_mask_pred_avx2(uint8_t *comp_pred8, const uint8_t *pred8,
   }
 }
 
-uint64_t aom_mse_4xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
-                                int sstride, int h) {
+static uint64_t mse_4xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
+                                   int sstride, int h) {
   uint64_t sum = 0;
   __m128i dst0_4x8, dst1_4x8, dst2_4x8, dst3_4x8, dst_16x8;
   __m128i src0_4x16, src1_4x16, src2_4x16, src3_4x16;
@@ -575,8 +575,9 @@ uint64_t aom_mse_4xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
 // In src buffer, each 4x4 block in a 32x32 filter block is stored sequentially.
 // Hence src_blk_stride is same as block width. Whereas dst buffer is a frame
 // buffer, thus dstride is a frame level stride.
-uint64_t aom_mse_4xh_quad_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
-                                     int src_blk_stride, int h) {
+static uint64_t mse_4xh_quad_16bit_avx2(uint8_t *dst, int dstride,
+                                        uint16_t *src, int src_blk_stride,
+                                        int h) {
   uint64_t sum = 0;
   __m128i dst0_16x8, dst1_16x8, dst2_16x8, dst3_16x8;
   __m256i dst0_16x16, dst1_16x16, dst2_16x16, dst3_16x16;
@@ -665,8 +666,8 @@ uint64_t aom_mse_4xh_quad_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   return sum;
 }
 
-uint64_t aom_mse_8xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
-                                int sstride, int h) {
+static uint64_t mse_8xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
+                                   int sstride, int h) {
   uint64_t sum = 0;
   __m128i dst0_8x8, dst1_8x8, dst3_16x8;
   __m256i src0_8x16, src1_8x16, src_16x16, dst_16x16;
@@ -715,8 +716,9 @@ uint64_t aom_mse_8xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
 // In src buffer, each 8x8 block in a 64x64 filter block is stored sequentially.
 // Hence src_blk_stride is same as block width. Whereas dst buffer is a frame
 // buffer, thus dstride is a frame level stride.
-uint64_t aom_mse_8xh_dual_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
-                                     int src_blk_stride, int h) {
+static uint64_t mse_8xh_dual_16bit_avx2(uint8_t *dst, int dstride,
+                                        uint16_t *src, int src_blk_stride,
+                                        int h) {
   uint64_t sum = 0;
   __m128i dst0_16x8, dst1_16x8;
   __m256i dst0_16x16, dst1_16x16;
@@ -780,8 +782,8 @@ uint64_t aom_mse_wxh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
          "w=8/4 and h=8/4 must be satisfied");
   switch (w) {
-    case 4: return aom_mse_4xh_16bit_avx2(dst, dstride, src, sstride, h);
-    case 8: return aom_mse_8xh_16bit_avx2(dst, dstride, src, sstride, h);
+    case 4: return mse_4xh_16bit_avx2(dst, dstride, src, sstride, h);
+    case 8: return mse_8xh_16bit_avx2(dst, dstride, src, sstride, h);
     default: assert(0 && "unsupported width"); return -1;
   }
 }
@@ -795,8 +797,8 @@ uint64_t aom_mse_16xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
          "w=8/4 and h=8/4 must be satisfied");
   switch (w) {
-    case 4: return aom_mse_4xh_quad_16bit_avx2(dst, dstride, src, w * h, h);
-    case 8: return aom_mse_8xh_dual_16bit_avx2(dst, dstride, src, w * h, h);
+    case 4: return mse_4xh_quad_16bit_avx2(dst, dstride, src, w * h, h);
+    case 8: return mse_8xh_dual_16bit_avx2(dst, dstride, src, w * h, h);
     default: assert(0 && "unsupported width"); return -1;
   }
 }
diff --git a/aom_dsp/x86/variance_impl_avx2.c b/aom_dsp/x86/variance_impl_avx2.c
index 9e9e70ea01..57a1cee781 100644
--- a/aom_dsp/x86/variance_impl_avx2.c
+++ b/aom_dsp/x86/variance_impl_avx2.c
@@ -648,7 +648,7 @@ MAKE_SUB_PIXEL_VAR_16XH(4, 2)
 #endif
 
 #define MAKE_SUB_PIXEL_AVG_VAR_32XH(height, log2height)                       \
-  int aom_sub_pixel_avg_variance32x##height##_imp_avx2(                       \
+  static int sub_pixel_avg_variance32x##height##_imp_avx2(                    \
       const uint8_t *src, int src_stride, int x_offset, int y_offset,         \
       const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride, \
       unsigned int *sse) {                                                    \
@@ -876,7 +876,7 @@ MAKE_SUB_PIXEL_VAR_16XH(4, 2)
       const uint8_t *src, int src_stride, int x_offset, int y_offset,         \
       const uint8_t *dst, int dst_stride, unsigned int *sse,                  \
       const uint8_t *sec_ptr) {                                               \
-    const int sum = aom_sub_pixel_avg_variance32x##height##_imp_avx2(         \
+    const int sum = sub_pixel_avg_variance32x##height##_imp_avx2(             \
         src, src_stride, x_offset, y_offset, dst, dst_stride, sec_ptr, 32,    \
         sse);                                                                 \
     return *sse - (unsigned int)(((int64_t)sum * sum) >> (5 + log2height));   \
@@ -899,7 +899,7 @@ MAKE_SUB_PIXEL_AVG_VAR_32XH(16, 4)
       const uint8_t *sec_ptr = sec;                                       \
       for (int j = 0; j < (h / hf); ++j) {                                \
         unsigned int sse2;                                                \
-        const int se2 = aom_sub_pixel_avg_variance##wf##x##hf##_imp_avx2( \
+        const int se2 = sub_pixel_avg_variance##wf##x##hf##_imp_avx2(     \
             src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \
             sec_ptr, w, &sse2);                                           \
         dst_ptr += hf * dst_stride;                                       \
diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index 9800b1117d..81b30072a5 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c
@@ -706,8 +706,8 @@ void aom_highbd_comp_mask_pred_sse2(uint8_t *comp_pred8, const uint8_t *pred8,
   }
 }
 
-uint64_t aom_mse_4xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
-                                int sstride, int h) {
+static uint64_t mse_4xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
+                                   int sstride, int h) {
   uint64_t sum = 0;
   __m128i dst0_8x8, dst1_8x8, dst_16x8;
   __m128i src0_16x4, src1_16x4, src_16x8;
@@ -740,8 +740,8 @@ uint64_t aom_mse_4xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
   return sum;
 }
 
-uint64_t aom_mse_8xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
-                                int sstride, int h) {
+static uint64_t mse_8xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
+                                   int sstride, int h) {
   uint64_t sum = 0;
   __m128i dst_8x8, dst_16x8;
   __m128i src_16x8;
@@ -777,8 +777,8 @@ uint64_t aom_mse_wxh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
          "w=8/4 and h=8/4 must satisfy");
   switch (w) {
-    case 4: return aom_mse_4xh_16bit_sse2(dst, dstride, src, sstride, h);
-    case 8: return aom_mse_8xh_16bit_sse2(dst, dstride, src, sstride, h);
+    case 4: return mse_4xh_16bit_sse2(dst, dstride, src, sstride, h);
+    case 8: return mse_8xh_16bit_sse2(dst, dstride, src, sstride, h);
     default: assert(0 && "unsupported width"); return -1;
   }
 }