aom: remove aom_dist_wtd_comp_avg_upsampled_pred*()

From 68bc71348beb562d1a83b18d36ae875bc45a585e Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Thu, 15 Aug 2024 12:30:36 -0700
Subject: [PATCH] remove aom_dist_wtd_comp_avg_upsampled_pred*()

This function was renamed from aom_jnt_comp_avg_upsampled_pred() in:
  0c96c11e58 Rename jnt_comp convolves to dist_wtd_comp
but had been unused since:
  c05147803f use_jnt_comp_avg should be 0 in motion search

Bug: aomedia:3416
Change-Id: I5cc92a2165bea54a1354004bd213c56c2ab95edf
---
 av1/av1.cmake                          |   9 --
 av1/common/av1_rtcd_defs.pl            |   6 -
 av1/encoder/arm/reconinter_enc_neon.c  |  13 --
 av1/encoder/reconinter_enc.c           |  24 ----
 av1/encoder/x86/reconinter_enc_ssse3.c |  67 -----------
 test/comp_avg_pred_test.cc             | 160 -------------------------
 6 files changed, 279 deletions(-)
 delete mode 100644 av1/encoder/x86/reconinter_enc_ssse3.c

diff --git a/av1/av1.cmake b/av1/av1.cmake
index e67ac8dff4..cdb97afc3f 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -330,9 +330,6 @@ if(NOT CONFIG_EXCLUDE_SIMD_MISMATCH)
               "${AOM_ROOT}/av1/encoder/x86/ml_sse3.h")
 endif()
 
-list(APPEND AOM_AV1_ENCODER_INTRIN_SSSE3
-            "${AOM_ROOT}/av1/encoder/x86/reconinter_enc_ssse3.c")
-
 list(APPEND AOM_AV1_ENCODER_ASM_SSSE3_X86_64
             "${AOM_ROOT}/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm")
 
@@ -667,12 +664,6 @@ function(setup_av1_targets)
                                       "AOM_AV1_DECODER_INTRIN_SSSE3")
       endif()
     endif()
-    if(CONFIG_AV1_ENCODER)
-      if(AOM_AV1_ENCODER_INTRIN_SSSE3)
-        add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_encoder"
-                                      "AOM_AV1_ENCODER_INTRIN_SSSE3")
-      endif()
-    endif()
   endif()
 
   if(HAVE_SSE4_1)
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 94a5171080..1254715f83 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -270,12 +270,6 @@ ()
                                                    int ref_stride, int subpel_search";
   specialize qw/aom_comp_avg_upsampled_pred sse2 neon/;
 
-  add_proto qw/void aom_dist_wtd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
-                                                       const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
-                                                       int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
-                                                       int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search";
-  specialize qw/aom_dist_wtd_comp_avg_upsampled_pred ssse3 neon/;
-
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void aom_highbd_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                    const MV *const mv, uint8_t *comp_pred8, int width, int height, int subpel_x_q3,
diff --git a/av1/encoder/arm/reconinter_enc_neon.c b/av1/encoder/arm/reconinter_enc_neon.c
index 4ebb34cb08..87e91460ab 100644
--- a/av1/encoder/arm/reconinter_enc_neon.c
+++ b/av1/encoder/arm/reconinter_enc_neon.c
@@ -138,19 +138,6 @@ void aom_comp_avg_upsampled_pred_neon(MACROBLOCKD *xd,
   aom_comp_avg_pred_neon(comp_pred, pred, width, height, comp_pred, width);
 }
 
-void aom_dist_wtd_comp_avg_upsampled_pred_neon(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
-    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
-    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
-  aom_upsampled_pred_neon(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
-                          subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                          subpel_search);
-
-  aom_dist_wtd_comp_avg_pred_neon(comp_pred, pred, width, height, comp_pred,
-                                  width, jcp_param);
-}
-
 #if CONFIG_AV1_HIGHBITDEPTH
 void aom_highbd_upsampled_pred_neon(MACROBLOCKD *xd,
                                     const struct AV1Common *const cm,
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c
index 0396603ca1..4150ea4069 100644
--- a/av1/encoder/reconinter_enc.c
+++ b/av1/encoder/reconinter_enc.c
@@ -534,30 +534,6 @@ void aom_comp_mask_upsampled_pred(MACROBLOCKD *xd, const AV1_COMMON *const cm,
                      mask_stride, invert_mask);
 }
 
-void aom_dist_wtd_comp_avg_upsampled_pred_c(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
-    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
-    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
-  int i, j;
-  const int fwd_offset = jcp_param->fwd_offset;
-  const int bck_offset = jcp_param->bck_offset;
-
-  aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
-                       subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                       subpel_search);
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
-      tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
-      comp_pred[j] = (uint8_t)tmp;
-    }
-    comp_pred += width;
-    pred += width;
-  }
-}
-
 #if CONFIG_AV1_HIGHBITDEPTH
 void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
                                  const struct AV1Common *const cm, int mi_row,
diff --git a/av1/encoder/x86/reconinter_enc_ssse3.c b/av1/encoder/x86/reconinter_enc_ssse3.c
deleted file mode 100644
index f31c0eaa7e..0000000000
--- a/av1/encoder/x86/reconinter_enc_ssse3.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>  // SSE2
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-static inline void compute_dist_wtd_avg(__m128i *p0, __m128i *p1,
-                                        const __m128i *w, const __m128i *r,
-                                        void *const result) {
-  __m128i p_lo = _mm_unpacklo_epi8(*p0, *p1);
-  __m128i mult_lo = _mm_maddubs_epi16(p_lo, *w);
-  __m128i round_lo = _mm_add_epi16(mult_lo, *r);
-  __m128i shift_lo = _mm_srai_epi16(round_lo, DIST_PRECISION_BITS);
-
-  __m128i p_hi = _mm_unpackhi_epi8(*p0, *p1);
-  __m128i mult_hi = _mm_maddubs_epi16(p_hi, *w);
-  __m128i round_hi = _mm_add_epi16(mult_hi, *r);
-  __m128i shift_hi = _mm_srai_epi16(round_hi, DIST_PRECISION_BITS);
-
-  xx_storeu_128(result, _mm_packus_epi16(shift_lo, shift_hi));
-}
-
-void aom_dist_wtd_comp_avg_upsampled_pred_ssse3(
-    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
-    const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
-    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
-    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
-  int n;
-  int i;
-  aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
-                     subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
-  /*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
-  assert(!(width * height & 15));
-  n = width * height >> 4;
-
-  const int8_t w0 = (int8_t)jcp_param->fwd_offset;
-  const int8_t w1 = (int8_t)jcp_param->bck_offset;
-  const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
-                                 w1, w0, w1, w0);
-  const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
-  const __m128i r = _mm_set1_epi16(round);
-
-  for (i = 0; i < n; i++) {
-    __m128i p0 = xx_loadu_128(comp_pred);
-    __m128i p1 = xx_loadu_128(pred);
-
-    compute_dist_wtd_avg(&p0, &p1, &w, &r, comp_pred);
-
-    comp_pred += 16;
-    pred += 16;
-  }
-}
diff --git a/test/comp_avg_pred_test.cc b/test/comp_avg_pred_test.cc
index 18c077b1ae..02e0210ad5 100644
--- a/test/comp_avg_pred_test.cc
+++ b/test/comp_avg_pred_test.cc
@@ -47,9 +47,6 @@ typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
 
 typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> AV1DistWtdCompAvgParam;
 
-typedef std::tuple<distwtdcompavgupsampled_func, BLOCK_SIZE>
-    AV1DistWtdCompAvgUpsampledParam;
-
 typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam;
 
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -92,14 +89,6 @@ ::testing::internal::ParamGenerator<AV1DistWtdCompAvgParam> BuildParams(
 }
 #endif  // HAVE_SSSE3
 
-#if HAVE_SSSE3 || HAVE_NEON
-::testing::internal::ParamGenerator<AV1DistWtdCompAvgUpsampledParam>
-BuildParams(distwtdcompavgupsampled_func filter) {
-  return ::testing::Combine(::testing::Values(filter),
-                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-#endif  // HAVE_SSSE3 || HAVE_NEON
-
 class AV1DistWtdCompAvgTest
     : public ::testing::TestWithParam<AV1DistWtdCompAvgParam> {
  public:
@@ -205,135 +194,6 @@ class AV1DistWtdCompAvgTest
 
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgTest);
 
-class AV1DistWtdCompAvgUpsampledTest
-    : public ::testing::TestWithParam<AV1DistWtdCompAvgUpsampledParam> {
- public:
-  ~AV1DistWtdCompAvgUpsampledTest() override = default;
-  void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
- protected:
-  void RunCheckOutput(distwtdcompavgupsampled_func test_impl) {
-    const int w = kMaxSize, h = kMaxSize;
-    const int block_idx = GET_PARAM(1);
-
-    uint8_t pred8[kMaxSize * kMaxSize];
-    uint8_t ref8[kMaxSize * kMaxSize];
-    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
-    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
-    for (int i = 0; i < h; ++i)
-      for (int j = 0; j < w; ++j) {
-        pred8[i * w + j] = rnd_.Rand8();
-        ref8[i * w + j] = rnd_.Rand8();
-      }
-    const int in_w = block_size_wide[block_idx];
-    const int in_h = block_size_high[block_idx];
-
-    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
-    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
-    int sub_x_q3, sub_y_q3;
-    int subpel_search;
-    for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
-         ++subpel_search) {
-      for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
-        for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
-          for (int ii = 0; ii < 2; ii++) {
-            for (int jj = 0; jj < 4; jj++) {
-              dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
-              dist_wtd_comp_params.bck_offset =
-                  quant_dist_lookup_table[jj][1 - ii];
-
-              const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
-              const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
-
-              aom_dist_wtd_comp_avg_upsampled_pred_c(
-                  nullptr, nullptr, 0, 0, nullptr, output,
-                  pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
-                  sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
-                  &dist_wtd_comp_params, subpel_search);
-              test_impl(nullptr, nullptr, 0, 0, nullptr, output2,
-                        pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
-                        sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
-                        &dist_wtd_comp_params, subpel_search);
-
-              for (int i = 0; i < in_h; ++i) {
-                for (int j = 0; j < in_w; ++j) {
-                  int idx = i * in_w + j;
-                  ASSERT_EQ(output[idx], output2[idx])
-                      << "Mismatch at unit tests for "
-                         "AV1DistWtdCompAvgUpsampledTest\n"
-                      << in_w << "x" << in_h << " Pixel mismatch at index "
-                      << idx << " = (" << i << ", " << j
-                      << "), sub pixel offset = (" << sub_y_q3 << ", "
-                      << sub_x_q3 << ")";
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  void RunSpeedTest(distwtdcompavgupsampled_func test_impl) {
-    const int w = kMaxSize, h = kMaxSize;
-    const int block_idx = GET_PARAM(1);
-
-    uint8_t pred8[kMaxSize * kMaxSize];
-    uint8_t ref8[kMaxSize * kMaxSize];
-    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
-    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
-    for (int i = 0; i < h; ++i)
-      for (int j = 0; j < w; ++j) {
-        pred8[i * w + j] = rnd_.Rand8();
-        ref8[i * w + j] = rnd_.Rand8();
-      }
-    const int in_w = block_size_wide[block_idx];
-    const int in_h = block_size_high[block_idx];
-
-    DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
-    dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
-
-    dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
-    dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
-
-    int sub_x_q3 = 0;
-    int sub_y_q3 = 0;
-
-    const int num_loops = 1000000000 / (in_w + in_h);
-    aom_usec_timer timer;
-    aom_usec_timer_start(&timer);
-    int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
-
-    for (int i = 0; i < num_loops; ++i)
-      aom_dist_wtd_comp_avg_upsampled_pred_c(
-          nullptr, nullptr, 0, 0, nullptr, output, pred8, in_w, in_h, sub_x_q3,
-          sub_y_q3, ref8, in_w, &dist_wtd_comp_params, subpel_search);
-
-    aom_usec_timer_mark(&timer);
-    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-    printf("distwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
-           1000.0 * elapsed_time / num_loops);
-
-    aom_usec_timer timer1;
-    aom_usec_timer_start(&timer1);
-
-    for (int i = 0; i < num_loops; ++i)
-      test_impl(nullptr, nullptr, 0, 0, nullptr, output2, pred8, in_w, in_h,
-                sub_x_q3, sub_y_q3, ref8, in_w, &dist_wtd_comp_params,
-                subpel_search);
-
-    aom_usec_timer_mark(&timer1);
-    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
-    printf("distwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
-           1000.0 * elapsed_time1 / num_loops);
-  }
-
-  libaom_test::ACMRandom rnd_;
-};  // class AV1DistWtdCompAvgUpsampledTest
-
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgUpsampledTest);
-
 class DistWtdCompAvgTest
     : public ::testing::WithParamInterface<DistWtdCompAvgParam>,
       public ::testing::Test {
@@ -790,26 +650,6 @@ INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DistWtdCompAvgTest,
                          BuildParams(aom_dist_wtd_comp_avg_pred_ssse3));
 #endif
 
-TEST_P(AV1DistWtdCompAvgUpsampledTest, DISABLED_Speed) {
-  RunSpeedTest(GET_PARAM(0));
-}
-
-TEST_P(AV1DistWtdCompAvgUpsampledTest, CheckOutput) {
-  RunCheckOutput(GET_PARAM(0));
-}
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_SUITE_P(
-    SSSE3, AV1DistWtdCompAvgUpsampledTest,
-    BuildParams(aom_dist_wtd_comp_avg_upsampled_pred_ssse3));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_SUITE_P(
-    NEON, AV1DistWtdCompAvgUpsampledTest,
-    BuildParams(aom_dist_wtd_comp_avg_upsampled_pred_neon));
-#endif  // HAVE_NEON
-
 TEST_P(DistWtdCompAvgTest, MaxRef) {
   FillConstant(reference_data_, reference_stride_, mask_);
   FillConstant(second_pred_, width_, 0);