aom: cfl: add missing prototypes

From 2a3f8ed51db1e5885acd83d0f17d431e6641f4d2 Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Fri, 26 Apr 2024 11:31:59 -0700
Subject: [PATCH] cfl: add missing prototypes

fixes -Wmissing-prototypes warning

Bug: aomedia:3416
Change-Id: I7661fce3f4c2ccaaef59f144030079dd5c126a13
---
 av1/common/cfl.c         |  5 +++--
 av1/common/cfl.h         | 27 +++++++++++++++------------
 av1/common/ppc/cfl_ppc.c |  4 ++++
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index 0e37d45980..bd11c4a6a0 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -159,8 +159,9 @@ static INLINE void cfl_predict_lbd_c(const int16_t *ac_buf_q3, uint8_t *dst,
 CFL_PREDICT_FN(c, lbd)
 
 #if CONFIG_AV1_HIGHBITDEPTH
-void cfl_predict_hbd_c(const int16_t *ac_buf_q3, uint16_t *dst, int dst_stride,
-                       int alpha_q3, int bit_depth, int width, int height) {
+static INLINE void cfl_predict_hbd_c(const int16_t *ac_buf_q3, uint16_t *dst,
+                                     int dst_stride, int alpha_q3,
+                                     int bit_depth, int width, int height) {
   for (int j = 0; j < height; j++) {
     for (int i = 0; i < width; i++) {
       dst[i] = clip_pixel_highbd(
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index dcaa87bd48..dbb94d665b 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -95,6 +95,8 @@ void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
 // will be constant allowing for loop unrolling and other constant propagated
 // goodness.
 #define CFL_SUBSAMPLE(arch, sub, bd, width, height)                       \
+  void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch(          \
+      const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3);      \
   void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch(          \
       const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) {     \
     cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride,    \
@@ -170,6 +172,8 @@ void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
 // will be constant allowing for loop unrolling and other constant propagated
 // goodness.
 #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2)       \
+  void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
+                                                        int16_t *dst);       \
   void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
                                                         int16_t *dst) {      \
     subtract_average_##arch(src, dst, width, height, round_offset,           \
@@ -220,22 +224,21 @@ void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
     return sub_avg[tx_size % TX_SIZES_ALL];                               \
   }
 
-// For VSX SIMD optimization, the C versions of width == 4 subtract are
-// faster than the VSX. As such, the VSX code calls the C versions.
-void cfl_subtract_average_4x4_c(const uint16_t *src, int16_t *dst);
-void cfl_subtract_average_4x8_c(const uint16_t *src, int16_t *dst);
-void cfl_subtract_average_4x16_c(const uint16_t *src, int16_t *dst);
-
-#define CFL_PREDICT_lbd(arch, width, height)                              \
-  void cfl_predict_lbd_##width##x##height##_##arch(                       \
-      const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride,           \
-      int alpha_q3) {                                                     \
-    cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \
-                           height);                                       \
+#define CFL_PREDICT_lbd(arch, width, height)                                   \
+  void cfl_predict_lbd_##width##x##height##_##arch(                            \
+      const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, int alpha_q3); \
+  void cfl_predict_lbd_##width##x##height##_##arch(                            \
+      const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride,                \
+      int alpha_q3) {                                                          \
+    cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width,      \
+                           height);                                            \
   }
 
 #if CONFIG_AV1_HIGHBITDEPTH
 #define CFL_PREDICT_hbd(arch, width, height)                                   \
+  void cfl_predict_hbd_##width##x##height##_##arch(                            \
+      const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
+      int bd);                                                                 \
   void cfl_predict_hbd_##width##x##height##_##arch(                            \
       const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
       int bd) {                                                                \
diff --git a/av1/common/ppc/cfl_ppc.c b/av1/common/ppc/cfl_ppc.c
index 6f88768f2f..27a7f07a0d 100644
--- a/av1/common/ppc/cfl_ppc.c
+++ b/av1/common/ppc/cfl_ppc.c
@@ -124,6 +124,10 @@ CFL_SUB_AVG_X(vsx, 32, 32, 512, 10)
 
 // Based on observation, for small blocks VSX does not outperform C (no 64bit
 // load and store intrinsics). So we call the C code for block widths 4.
+extern void cfl_subtract_average_4x4_c(const uint16_t *src, int16_t *dst);
+extern void cfl_subtract_average_4x8_c(const uint16_t *src, int16_t *dst);
+extern void cfl_subtract_average_4x16_c(const uint16_t *src, int16_t *dst);
+
 cfl_subtract_average_fn cfl_get_subtract_average_fn_vsx(TX_SIZE tx_size) {
   static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {
     cfl_subtract_average_4x4_c,     /* 4x4 */