From 68bc71348beb562d1a83b18d36ae875bc45a585e Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Thu, 15 Aug 2024 12:30:36 -0700
Subject: [PATCH] remove aom_dist_wtd_comp_avg_upsampled_pred*()
This function was renamed from aom_jnt_comp_avg_upsampled_pred() in:
0c96c11e58 Rename jnt_comp convolves to dist_wtd_comp
but had been unused since:
c05147803f use_jnt_comp_avg should be 0 in motion search
Bug: aomedia:3416
Change-Id: I5cc92a2165bea54a1354004bd213c56c2ab95edf
---
av1/av1.cmake | 9 --
av1/common/av1_rtcd_defs.pl | 6 -
av1/encoder/arm/reconinter_enc_neon.c | 13 --
av1/encoder/reconinter_enc.c | 24 ----
av1/encoder/x86/reconinter_enc_ssse3.c | 67 -----------
test/comp_avg_pred_test.cc | 160 -------------------------
6 files changed, 279 deletions(-)
delete mode 100644 av1/encoder/x86/reconinter_enc_ssse3.c
diff --git a/av1/av1.cmake b/av1/av1.cmake
index e67ac8dff4..cdb97afc3f 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -330,9 +330,6 @@ if(NOT CONFIG_EXCLUDE_SIMD_MISMATCH)
"${AOM_ROOT}/av1/encoder/x86/ml_sse3.h")
endif()
-list(APPEND AOM_AV1_ENCODER_INTRIN_SSSE3
- "${AOM_ROOT}/av1/encoder/x86/reconinter_enc_ssse3.c")
-
list(APPEND AOM_AV1_ENCODER_ASM_SSSE3_X86_64
"${AOM_ROOT}/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm")
@@ -667,12 +664,6 @@ function(setup_av1_targets)
"AOM_AV1_DECODER_INTRIN_SSSE3")
endif()
endif()
- if(CONFIG_AV1_ENCODER)
- if(AOM_AV1_ENCODER_INTRIN_SSSE3)
- add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_SSSE3")
- endif()
- endif()
endif()
if(HAVE_SSE4_1)
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 94a5171080..1254715f83 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -270,12 +270,6 @@ ()
int ref_stride, int subpel_search";
specialize qw/aom_comp_avg_upsampled_pred sse2 neon/;
- add_proto qw/void aom_dist_wtd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search";
- specialize qw/aom_dist_wtd_comp_avg_upsampled_pred ssse3 neon/;
-
if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint8_t *comp_pred8, int width, int height, int subpel_x_q3,
diff --git a/av1/encoder/arm/reconinter_enc_neon.c b/av1/encoder/arm/reconinter_enc_neon.c
index 4ebb34cb08..87e91460ab 100644
--- a/av1/encoder/arm/reconinter_enc_neon.c
+++ b/av1/encoder/arm/reconinter_enc_neon.c
@@ -138,19 +138,6 @@ void aom_comp_avg_upsampled_pred_neon(MACROBLOCKD *xd,
aom_comp_avg_pred_neon(comp_pred, pred, width, height, comp_pred, width);
}
-void aom_dist_wtd_comp_avg_upsampled_pred_neon(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
- aom_upsampled_pred_neon(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride,
- subpel_search);
-
- aom_dist_wtd_comp_avg_pred_neon(comp_pred, pred, width, height, comp_pred,
- width, jcp_param);
-}
-
#if CONFIG_AV1_HIGHBITDEPTH
void aom_highbd_upsampled_pred_neon(MACROBLOCKD *xd,
const struct AV1Common *const cm,
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c
index 0396603ca1..4150ea4069 100644
--- a/av1/encoder/reconinter_enc.c
+++ b/av1/encoder/reconinter_enc.c
@@ -534,30 +534,6 @@ void aom_comp_mask_upsampled_pred(MACROBLOCKD *xd, const AV1_COMMON *const cm,
mask_stride, invert_mask);
}
-void aom_dist_wtd_comp_avg_upsampled_pred_c(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
- int i, j;
- const int fwd_offset = jcp_param->fwd_offset;
- const int bck_offset = jcp_param->bck_offset;
-
- aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride,
- subpel_search);
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
- tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
- comp_pred[j] = (uint8_t)tmp;
- }
- comp_pred += width;
- pred += width;
- }
-}
-
#if CONFIG_AV1_HIGHBITDEPTH
void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
const struct AV1Common *const cm, int mi_row,
diff --git a/av1/encoder/x86/reconinter_enc_ssse3.c b/av1/encoder/x86/reconinter_enc_ssse3.c
deleted file mode 100644
index f31c0eaa7e..0000000000
--- a/av1/encoder/x86/reconinter_enc_ssse3.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-static inline void compute_dist_wtd_avg(__m128i *p0, __m128i *p1,
- const __m128i *w, const __m128i *r,
- void *const result) {
- __m128i p_lo = _mm_unpacklo_epi8(*p0, *p1);
- __m128i mult_lo = _mm_maddubs_epi16(p_lo, *w);
- __m128i round_lo = _mm_add_epi16(mult_lo, *r);
- __m128i shift_lo = _mm_srai_epi16(round_lo, DIST_PRECISION_BITS);
-
- __m128i p_hi = _mm_unpackhi_epi8(*p0, *p1);
- __m128i mult_hi = _mm_maddubs_epi16(p_hi, *w);
- __m128i round_hi = _mm_add_epi16(mult_hi, *r);
- __m128i shift_hi = _mm_srai_epi16(round_hi, DIST_PRECISION_BITS);
-
- xx_storeu_128(result, _mm_packus_epi16(shift_lo, shift_hi));
-}
-
-void aom_dist_wtd_comp_avg_upsampled_pred_ssse3(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
- int n;
- int i;
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
- /*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
- assert(!(width * height & 15));
- n = width * height >> 4;
-
- const int8_t w0 = (int8_t)jcp_param->fwd_offset;
- const int8_t w1 = (int8_t)jcp_param->bck_offset;
- const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
- w1, w0, w1, w0);
- const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
- const __m128i r = _mm_set1_epi16(round);
-
- for (i = 0; i < n; i++) {
- __m128i p0 = xx_loadu_128(comp_pred);
- __m128i p1 = xx_loadu_128(pred);
-
- compute_dist_wtd_avg(&p0, &p1, &w, &r, comp_pred);
-
- comp_pred += 16;
- pred += 16;
- }
-}
diff --git a/test/comp_avg_pred_test.cc b/test/comp_avg_pred_test.cc
index 18c077b1ae..02e0210ad5 100644
--- a/test/comp_avg_pred_test.cc
+++ b/test/comp_avg_pred_test.cc
@@ -47,9 +47,6 @@ typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> AV1DistWtdCompAvgParam;
-typedef std::tuple<distwtdcompavgupsampled_func, BLOCK_SIZE>
- AV1DistWtdCompAvgUpsampledParam;
-
typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam;
#if CONFIG_AV1_HIGHBITDEPTH
@@ -92,14 +89,6 @@ ::testing::internal::ParamGenerator<AV1DistWtdCompAvgParam> BuildParams(
}
#endif // HAVE_SSSE3
-#if HAVE_SSSE3 || HAVE_NEON
-::testing::internal::ParamGenerator<AV1DistWtdCompAvgUpsampledParam>
-BuildParams(distwtdcompavgupsampled_func filter) {
- return ::testing::Combine(::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-#endif // HAVE_SSSE3 || HAVE_NEON
-
class AV1DistWtdCompAvgTest
: public ::testing::TestWithParam<AV1DistWtdCompAvgParam> {
public:
@@ -205,135 +194,6 @@ class AV1DistWtdCompAvgTest
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgTest);
-class AV1DistWtdCompAvgUpsampledTest
- : public ::testing::TestWithParam<AV1DistWtdCompAvgUpsampledParam> {
- public:
- ~AV1DistWtdCompAvgUpsampledTest() override = default;
- void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
- protected:
- void RunCheckOutput(distwtdcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
- dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
- int sub_x_q3, sub_y_q3;
- int subpel_search;
- for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
- ++subpel_search) {
- for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
- for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
- dist_wtd_comp_params.bck_offset =
- quant_dist_lookup_table[jj][1 - ii];
-
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
-
- aom_dist_wtd_comp_avg_upsampled_pred_c(
- nullptr, nullptr, 0, 0, nullptr, output,
- pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
- &dist_wtd_comp_params, subpel_search);
- test_impl(nullptr, nullptr, 0, 0, nullptr, output2,
- pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
- &dist_wtd_comp_params, subpel_search);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for "
- "AV1DistWtdCompAvgUpsampledTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << sub_y_q3 << ", "
- << sub_x_q3 << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- void RunSpeedTest(distwtdcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
- dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
-
- dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
- dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
-
- int sub_x_q3 = 0;
- int sub_y_q3 = 0;
-
- const int num_loops = 1000000000 / (in_w + in_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter.
-
- for (int i = 0; i < num_loops; ++i)
- aom_dist_wtd_comp_avg_upsampled_pred_c(
- nullptr, nullptr, 0, 0, nullptr, output, pred8, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8, in_w, &dist_wtd_comp_params, subpel_search);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("distwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(nullptr, nullptr, 0, 0, nullptr, output2, pred8, in_w, in_h,
- sub_x_q3, sub_y_q3, ref8, in_w, &dist_wtd_comp_params,
- subpel_search);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("distwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time1 / num_loops);
- }
-
- libaom_test::ACMRandom rnd_;
-}; // class AV1DistWtdCompAvgUpsampledTest
-
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgUpsampledTest);
-
class DistWtdCompAvgTest
: public ::testing::WithParamInterface<DistWtdCompAvgParam>,
public ::testing::Test {
@@ -790,26 +650,6 @@ INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DistWtdCompAvgTest,
BuildParams(aom_dist_wtd_comp_avg_pred_ssse3));
#endif
-TEST_P(AV1DistWtdCompAvgUpsampledTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0));
-}
-
-TEST_P(AV1DistWtdCompAvgUpsampledTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(0));
-}
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_SUITE_P(
- SSSE3, AV1DistWtdCompAvgUpsampledTest,
- BuildParams(aom_dist_wtd_comp_avg_upsampled_pred_ssse3));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_SUITE_P(
- NEON, AV1DistWtdCompAvgUpsampledTest,
- BuildParams(aom_dist_wtd_comp_avg_upsampled_pred_neon));
-#endif // HAVE_NEON
-
TEST_P(DistWtdCompAvgTest, MaxRef) {
FillConstant(reference_data_, reference_stride_, mask_);
FillConstant(second_pred_, width_, 0);