aom: Rework downsampling pyramid setup

From 72e349ed3de821683edf8f279de384ff143a4bee Mon Sep 17 00:00:00 2001
From: Rachel Barker <[EMAIL REDACTED]>
Date: Tue, 13 Feb 2024 14:31:10 +0000
Subject: [PATCH] Rework downsampling pyramid setup

Instead of requesting the desired number of pyramid levels when
allocating, instead allocate all levels and fill them in on demand.
This allows much more flexibility in how pyramids are used.

Memory increase is minimal - higher pyramid levels are much smaller
than lower levels, and any unused levels are never touched and so
do not need to be assigned pages by the operating system.

Change-Id: Ib62b75d483438e0fe7b95d39989fdd925667deb7
---
 aom_dsp/flow_estimation/corner_match.c    |   4 +-
 aom_dsp/flow_estimation/disflow.c         |  24 +--
 aom_dsp/flow_estimation/flow_estimation.c |   8 -
 aom_dsp/flow_estimation/flow_estimation.h |   5 -
 aom_dsp/pyramid.c                         | 179 +++++++++++++---------
 aom_dsp/pyramid.h                         |  59 ++++---
 aom_scale/aom_scale_rtcd.pl               |   4 +-
 aom_scale/generic/yv12config.c            |  29 ++--
 aom_scale/generic/yv12extend.c            |   6 +-
 aom_scale/yv12config.h                    |  20 ++-
 av1/av1_cx_iface.c                        |   2 +-
 av1/common/resize.c                       |  13 +-
 av1/common/resize.h                       |   5 +-
 av1/common/restoration.c                  |   3 +-
 av1/decoder/decodeframe.c                 |   6 +-
 av1/decoder/obu.c                         |   2 +-
 av1/encoder/allintra_vis.c                |   2 +-
 av1/encoder/av1_temporal_denoiser.c       |   8 +-
 av1/encoder/encode_strategy.c             |   4 +-
 av1/encoder/encodeframe.c                 |   2 +-
 av1/encoder/encoder.c                     |  28 ++--
 av1/encoder/encoder.h                     |   4 +-
 av1/encoder/encoder_alloc.h               |   3 +-
 av1/encoder/encoder_utils.c               |   8 +-
 av1/encoder/lookahead.c                   |   8 +-
 av1/encoder/lookahead.h                   |  20 +--
 av1/encoder/picklpf.c                     |   2 +-
 av1/encoder/pickrst.c                     |   2 +-
 av1/encoder/superres_scale.c              |   2 +-
 av1/encoder/temporal_filter.c             |   2 +-
 av1/encoder/tpl_model.c                   |   2 +-
 av1/encoder/tune_butteraugli.c            |  10 +-
 av1/encoder/tune_vmaf.c                   |  36 ++---
 test/hbd_metrics_test.cc                  |   8 +-
 34 files changed, 272 insertions(+), 248 deletions(-)

diff --git a/aom_dsp/flow_estimation/corner_match.c b/aom_dsp/flow_estimation/corner_match.c
index 7b2b9fc33c..5f995aeb6b 100644
--- a/aom_dsp/flow_estimation/corner_match.c
+++ b/aom_dsp/flow_estimation/corner_match.c
@@ -263,7 +263,7 @@ bool av1_compute_global_motion_feature_match(
   CornerList *ref_corners = ref->corners;
 
   // Precompute information we will need about each frame
-  if (!aom_compute_pyramid(src, bit_depth, src_pyramid)) {
+  if (aom_compute_pyramid(src, bit_depth, 1, src_pyramid) < 0) {
     *mem_alloc_failed = true;
     return false;
   }
@@ -271,7 +271,7 @@ bool av1_compute_global_motion_feature_match(
     *mem_alloc_failed = true;
     return false;
   }
-  if (!aom_compute_pyramid(ref, bit_depth, ref_pyramid)) {
+  if (aom_compute_pyramid(ref, bit_depth, 1, ref_pyramid) < 0) {
     *mem_alloc_failed = true;
     return false;
   }
diff --git a/aom_dsp/flow_estimation/disflow.c b/aom_dsp/flow_estimation/disflow.c
index 82b531c729..eb2bb47f38 100644
--- a/aom_dsp/flow_estimation/disflow.c
+++ b/aom_dsp/flow_estimation/disflow.c
@@ -603,9 +603,9 @@ static void upscale_flow_component(double *flow, int cur_width, int cur_height,
 
 // make sure flow_u and flow_v start at 0
 static bool compute_flow_field(const ImagePyramid *src_pyr,
-                               const ImagePyramid *ref_pyr, FlowField *flow) {
+                               const ImagePyramid *ref_pyr, int n_levels,
+                               FlowField *flow) {
   bool mem_status = true;
-  assert(src_pyr->n_levels == ref_pyr->n_levels);
 
   double *flow_u = flow->u;
   double *flow_v = flow->v;
@@ -613,7 +613,7 @@ static bool compute_flow_field(const ImagePyramid *src_pyr,
   double *tmpbuf0;
   double *tmpbuf;
 
-  if (src_pyr->n_levels < 2) {
+  if (n_levels < 2) {
     // tmpbuf not needed
     tmpbuf0 = NULL;
     tmpbuf = NULL;
@@ -639,7 +639,7 @@ static bool compute_flow_field(const ImagePyramid *src_pyr,
   // correspondences by interpolating this flow field, and then refine the
   // correspondences themselves. This is both faster and gives better output
   // compared to refining the flow field at level 0 and then interpolating.
-  for (int level = src_pyr->n_levels - 1; level >= 1; --level) {
+  for (int level = n_levels - 1; level >= 1; --level) {
     const PyramidLayer *cur_layer = &src_pyr->layers[level];
     const int cur_width = cur_layer->width;
     const int cur_height = cur_layer->height;
@@ -772,7 +772,13 @@ bool av1_compute_global_motion_disflow(TransformationType type,
   ImagePyramid *src_pyramid = src->y_pyramid;
   CornerList *src_corners = src->corners;
   ImagePyramid *ref_pyramid = ref->y_pyramid;
-  if (!aom_compute_pyramid(src, bit_depth, src_pyramid)) {
+
+  const int src_layers =
+      aom_compute_pyramid(src, bit_depth, DISFLOW_PYRAMID_LEVELS, src_pyramid);
+  const int ref_layers =
+      aom_compute_pyramid(ref, bit_depth, DISFLOW_PYRAMID_LEVELS, ref_pyramid);
+
+  if (src_layers < 0 || ref_layers < 0) {
     *mem_alloc_failed = true;
     return false;
   }
@@ -780,10 +786,8 @@ bool av1_compute_global_motion_disflow(TransformationType type,
     *mem_alloc_failed = true;
     return false;
   }
-  if (!aom_compute_pyramid(ref, bit_depth, ref_pyramid)) {
-    *mem_alloc_failed = true;
-    return false;
-  }
+
+  assert(src_layers == ref_layers);
 
   const int src_width = src_pyramid->layers[0].width;
   const int src_height = src_pyramid->layers[0].height;
@@ -796,7 +800,7 @@ bool av1_compute_global_motion_disflow(TransformationType type,
     return false;
   }
 
-  if (!compute_flow_field(src_pyramid, ref_pyramid, flow)) {
+  if (!compute_flow_field(src_pyramid, ref_pyramid, src_layers, flow)) {
     *mem_alloc_failed = true;
     free_flow_field(flow);
     return false;
diff --git a/aom_dsp/flow_estimation/flow_estimation.c b/aom_dsp/flow_estimation/flow_estimation.c
index 0f47f86f55..5eb36a2341 100644
--- a/aom_dsp/flow_estimation/flow_estimation.c
+++ b/aom_dsp/flow_estimation/flow_estimation.c
@@ -18,14 +18,6 @@
 #include "aom_ports/mem.h"
 #include "aom_scale/yv12config.h"
 
-// For each global motion method, how many pyramid levels should we allocate?
-// Note that this is a maximum, and fewer levels will be allocated if the frame
-// is not large enough to need all of the specified levels
-const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS] = {
-  1,   // GLOBAL_MOTION_METHOD_FEATURE_MATCH
-  16,  // GLOBAL_MOTION_METHOD_DISFLOW
-};
-
 // clang-format off
 const double kIdentityParams[MAX_PARAMDIM] = {
   0.0, 0.0, 1.0, 0.0, 0.0, 1.0
diff --git a/aom_dsp/flow_estimation/flow_estimation.h b/aom_dsp/flow_estimation/flow_estimation.h
index 2dfae24980..ec5e217e08 100644
--- a/aom_dsp/flow_estimation/flow_estimation.h
+++ b/aom_dsp/flow_estimation/flow_estimation.h
@@ -61,11 +61,6 @@ typedef struct {
   double rx, ry;
 } Correspondence;
 
-// For each global motion method, how many pyramid levels should we allocate?
-// Note that this is a maximum, and fewer levels will be allocated if the frame
-// is not large enough to need all of the specified levels
-extern const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS];
-
 // Which global motion method should we use in practice?
 // Disflow is both faster and gives better results than feature matching in
 // practically all cases, so we use disflow by default
diff --git a/aom_dsp/pyramid.c b/aom_dsp/pyramid.c
index 94c970b78a..5de001dbd5 100644
--- a/aom_dsp/pyramid.c
+++ b/aom_dsp/pyramid.c
@@ -26,18 +26,16 @@
 //   levels. This is counted in the size checked against the max allocation
 //   limit
 // * Then calls aom_alloc_pyramid() to actually create the pyramid
-// * Pyramid is initially marked as invalid (no data)
-// * Whenever pyramid is needed, we check the valid flag. If set, use existing
-//   data. If not set, compute full pyramid
-// * Whenever frame buffer is reused, clear the valid flag
+// * Pyramid is initially marked as containing no valid data
+// * Each pyramid layer is computed on-demand, the first time it is requested
+// * Whenever frame buffer is reused, reset the counter of filled levels.
+//   This invalidates all of the existing pyramid levels.
 // * Whenever frame buffer is resized, reallocate pyramid
 
-size_t aom_get_pyramid_alloc_size(int width, int height, int n_levels,
-                                  bool image_is_16bit) {
-  // Limit number of levels on small frames
+size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) {
+  // Allocate the maximum possible number of layers for this width and height
   const int msb = get_msb(AOMMIN(width, height));
-  const int max_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
-  n_levels = AOMMIN(n_levels, max_levels);
+  const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
 
   size_t alloc_size = 0;
   alloc_size += sizeof(ImagePyramid);
@@ -100,12 +98,10 @@ size_t aom_get_pyramid_alloc_size(int width, int height, int n_levels,
   return alloc_size;
 }
 
-ImagePyramid *aom_alloc_pyramid(int width, int height, int n_levels,
-                                bool image_is_16bit) {
-  // Limit number of levels on small frames
+ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) {
+  // Allocate the maximum possible number of layers for this width and height
   const int msb = get_msb(AOMMIN(width, height));
-  const int max_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
-  n_levels = AOMMIN(n_levels, max_levels);
+  const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
 
   ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
   if (!pyr) {
@@ -118,8 +114,8 @@ ImagePyramid *aom_alloc_pyramid(int width, int height, int n_levels,
     return NULL;
   }
 
-  pyr->valid = false;
-  pyr->n_levels = n_levels;
+  pyr->max_levels = n_levels;
+  pyr->filled_levels = 0;
 
   // Compute sizes and offsets for each pyramid level
   // These are gathered up first, so that we can allocate all pyramid levels
@@ -248,46 +244,67 @@ static INLINE void fill_border(uint8_t *img_buf, const int width,
   }
 }
 
-// Compute coarse to fine pyramids for a frame
+// Compute downsampling pyramid for a frame
+//
+// This function will ensure that the first `n_levels` levels of the pyramid
+// are filled, unless the frame is too small to have this many levels.
+// In that case, we will fill all available levels and then stop.
+//
+// Returns the actual number of levels filled, capped at n_levels,
+// or -1 on error.
+//
 // This must only be called while holding frame_pyr->mutex
-static INLINE bool fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
-                                ImagePyramid *frame_pyr) {
-  int n_levels = frame_pyr->n_levels;
+static INLINE int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                               int n_levels, ImagePyramid *frame_pyr) {
+  int already_filled_levels = frame_pyr->filled_levels;
+
+  // This condition should already be enforced by aom_compute_pyramid
+  assert(n_levels <= frame_pyr->max_levels);
+
+  if (already_filled_levels >= n_levels) {
+    return n_levels;
+  }
+
   const int frame_width = frame->y_crop_width;
   const int frame_height = frame->y_crop_height;
   const int frame_stride = frame->y_stride;
   assert((frame_width >> n_levels) >= 0);
   assert((frame_height >> n_levels) >= 0);
 
-  PyramidLayer *first_layer = &frame_pyr->layers[0];
-  if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
-    // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
-    assert(first_layer->width == frame_width);
-    assert(first_layer->height == frame_height);
-
-    uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
-    uint8_t *pyr_buffer = first_layer->buffer;
-    int pyr_stride = first_layer->stride;
-    for (int y = 0; y < frame_height; y++) {
-      uint16_t *frame_row = frame_buffer + y * frame_stride;
-      uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
-      for (int x = 0; x < frame_width; x++) {
-        pyr_row[x] = frame_row[x] >> (bit_depth - 8);
+  if (already_filled_levels == 0) {
+    // Fill in largest level from the original image
+    PyramidLayer *first_layer = &frame_pyr->layers[0];
+    if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
+      // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
+      assert(first_layer->width == frame_width);
+      assert(first_layer->height == frame_height);
+
+      uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
+      uint8_t *pyr_buffer = first_layer->buffer;
+      int pyr_stride = first_layer->stride;
+      for (int y = 0; y < frame_height; y++) {
+        uint16_t *frame_row = frame_buffer + y * frame_stride;
+        uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
+        for (int x = 0; x < frame_width; x++) {
+          pyr_row[x] = frame_row[x] >> (bit_depth - 8);
+        }
       }
+
+      fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
+    } else {
+      // For frames stored in an 8-bit buffer, we don't need to copy anything -
+      // we can just reference the original image buffer
+      first_layer->buffer = frame->y_buffer;
+      first_layer->width = frame_width;
+      first_layer->height = frame_height;
+      first_layer->stride = frame_stride;
     }
 
-    fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
-  } else {
-    // For frames stored in an 8-bit buffer, we need to configure the first
-    // pyramid layer to point at the original image buffer
-    first_layer->buffer = frame->y_buffer;
-    first_layer->width = frame_width;
-    first_layer->height = frame_height;
-    first_layer->stride = frame_stride;
+    already_filled_levels = 1;
   }
 
   // Fill in the remaining levels through progressive downsampling
-  for (int level = 1; level < n_levels; ++level) {
+  for (int level = already_filled_levels; level < n_levels; ++level) {
     PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
     uint8_t *prev_buffer = prev_layer->buffer;
     int prev_stride = prev_layer->stride;
@@ -314,11 +331,16 @@ static INLINE bool fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
     //    TODO(rachelbarker): Use optimized downsample-by-2 function
     if (!av1_resize_plane(prev_buffer, this_height << 1, this_width << 1,
                           prev_stride, this_buffer, this_height, this_width,
-                          this_stride))
-      return false;
+                          this_stride)) {
+      // If we can't allocate memory, we'll have to terminate early
+      frame_pyr->filled_levels = n_levels;
+      return -1;
+    }
     fill_border(this_buffer, this_width, this_height, this_stride);
   }
-  return true;
+
+  frame_pyr->filled_levels = n_levels;
+  return n_levels;
 }
 
 // Fill out a downsampling pyramid for a given frame.
@@ -327,63 +349,72 @@ static INLINE bool fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
 // regardless of the input bit depth. Additional levels are then downscaled
 // by powers of 2.
 //
-// For small input frames, the number of levels actually constructed
-// will be limited so that the smallest image is at least MIN_PYRAMID_SIZE
-// pixels along each side.
+// This function will ensure that the first `n_levels` levels of the pyramid
+// are filled, unless the frame is too small to have this many levels.
+// In that case, we will fill all available levels and then stop.
+// No matter how small the frame is, at least one level is guaranteed
+// to be filled.
 //
-// However, if the input frame has a side of length < MIN_PYRAMID_SIZE,
-// we will still construct the top level.
-bool aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
-                         ImagePyramid *pyr) {
+// Returns the actual number of levels filled, capped at n_levels,
+// or -1 on error.
+int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                        int n_levels, ImagePyramid *pyr) {
   assert(pyr);
 
   // Per the comments in the ImagePyramid struct, we must take this mutex
-  // before reading or writing the "valid" flag, and hold it while computing
-  // the pyramid, to ensure proper behaviour if multiple threads call this
-  // function simultaneously
+  // before reading or writing the filled_levels field, and hold it while
+  // computing any additional pyramid levels, to ensure proper behaviour
+  // when multithreading is used
 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
 
-  if (!pyr->valid) {
-    pyr->valid = fill_pyramid(frame, bit_depth, pyr);
+  n_levels = AOMMIN(n_levels, pyr->max_levels);
+  int result = n_levels;
+  if (pyr->filled_levels < n_levels) {
+    // Compute any missing levels that we need
+    result = fill_pyramid(frame, bit_depth, n_levels, pyr);
   }
-  bool valid = pyr->valid;
-
-  // At this point, the pyramid is guaranteed to be valid, and can be safely
-  // read from without holding the mutex any more
 
+  // At this point, as long as result >= 0, the requested number of pyramid
+  // levels are guaranteed to be valid, and can be safely read from without
+  // holding the mutex any further
+  assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels));
 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
-  return valid;
+  return result;
 }
 
 #ifndef NDEBUG
-// Check if a pyramid has already been computed.
+// Check if a pyramid has already been computed to at least n levels
 // This is mostly a debug helper - as it is necessary to hold pyr->mutex
-// while reading the valid flag, we cannot just write:
-//   assert(pyr->valid);
+// while reading the number of already-computed levels, we cannot just write:
+//   assert(pyr->filled_levels >= n_levels);
 // This function allows the check to be correctly written as:
-//   assert(aom_is_pyramid_valid(pyr));
-bool aom_is_pyramid_valid(ImagePyramid *pyr) {
+//   assert(aom_is_pyramid_valid(pyr, n_levels));
+//
+// Note: This deliberately does not restrict n_levels based on the maximum
+// number of permitted levels for the frame size. This allows the check to
+// catch cases where the caller forgets to handle the case where
+// max_levels is less than the requested number of levels
+bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) {
   assert(pyr);
 
   // Per the comments in the ImagePyramid struct, we must take this mutex
-  // before reading or writing the "valid" flag, and hold it while computing
-  // the pyramid, to ensure proper behaviour if multiple threads call this
-  // function simultaneously
+  // before reading or writing the filled_levels field, to ensure proper
+  // behaviour when multithreading is used
 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
 
-  bool valid = pyr->valid;
+  bool result = (pyr->filled_levels >= n_levels);
 
 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
 
-  return valid;
+  return result;
 }
 #endif
 
@@ -394,7 +425,7 @@ void aom_invalidate_pyramid(ImagePyramid *pyr) {
 #if CONFIG_MULTITHREAD
     pthread_mutex_lock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
-    pyr->valid = false;
+    pyr->filled_levels = 0;
 #if CONFIG_MULTITHREAD
     pthread_mutex_unlock(&pyr->mutex);
 #endif  // CONFIG_MULTITHREAD
diff --git a/aom_dsp/pyramid.h b/aom_dsp/pyramid.h
index a3d63d338b..745bb7e525 100644
--- a/aom_dsp/pyramid.h
+++ b/aom_dsp/pyramid.h
@@ -57,23 +57,31 @@ typedef struct image_pyramid {
   // same time
   //
   // Semantics:
-  // * This mutex must be held whenever reading or writing the `valid` flag
+  // * This mutex must be held whenever reading or writing the
+  //   `filled_levels` field
   //
   // * This mutex must also be held while computing the image pyramid,
   //   to ensure that only one thread may do so at a time.
   //
-  // * However, once you have read the valid flag and seen a true value,
-  //   it is safe to drop the mutex and read from the remaining fields.
-  //   This is because, once the image pyramid is computed, its contents
+  // * However, once you have read the filled_levels field and observed
+  //   a value N, it is safe to drop the mutex and read from the remaining
+  //   fields, including the first N pyramid levels (but no higher).
+  //   Note that filled_levels must be read once and cached in a local variable
+  //   in order for this to be safe - it cannot be re-read without retaking
+  //   the mutex.
+  //
+  //   This works because, once the image pyramid is computed, its contents
   //   will not be changed until the parent frame buffer is recycled,
   //   which will not happen until there are no more outstanding references
   //   to the frame buffer.
   pthread_mutex_t mutex;
 #endif
-  // Flag indicating whether the pyramid contains valid data
-  bool valid;
-  // Number of allocated/filled levels in this pyramid
-  int n_levels;
+  // Maximum number of levels for the given frame size
+  // We always allocate enough memory for this many levels, as the memory
+  // cost of higher levels of the pyramid is minimal.
+  int max_levels;
+  // Number of levels which currently hold valid data
+  int filled_levels;
   // Pointer to allocated buffer
   uint8_t *buffer_alloc;
   // Data for each level
@@ -82,11 +90,9 @@ typedef struct image_pyramid {
   PyramidLayer *layers;
 } ImagePyramid;
 
-size_t aom_get_pyramid_alloc_size(int width, int height, int n_levels,
-                                  bool image_is_16bit);
+size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit);
 
-ImagePyramid *aom_alloc_pyramid(int width, int height, int n_levels,
-                                bool image_is_16bit);
+ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit);
 
 // Fill out a downsampling pyramid for a given frame.
 //
@@ -94,23 +100,28 @@ ImagePyramid *aom_alloc_pyramid(int width, int height, int n_levels,
 // regardless of the input bit depth. Additional levels are then downscaled
 // by powers of 2.
 //
-// For small input frames, the number of levels actually constructed
-// will be limited so that the smallest image is at least MIN_PYRAMID_SIZE
-// pixels along each side.
+// This function will ensure that the first `n_levels` levels of the pyramid
+// are filled, unless the frame is too small to have this many levels.
+// In that case, we will fill all available levels and then stop.
 //
-// However, if the input frame has a side of length < MIN_PYRAMID_SIZE,
-// we will still construct the top level.
-bool aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
-                         ImagePyramid *pyr);
+// Returns the actual number of levels filled, capped at n_levels,
+// or -1 on error.
+int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                        int n_levels, ImagePyramid *pyr);
 
 #ifndef NDEBUG
-// Check if a pyramid has already been computed.
+// Check if a pyramid has already been computed to at least n levels
 // This is mostly a debug helper - as it is necessary to hold pyr->mutex
-// while reading the valid flag, we cannot just write:
-//   assert(pyr->valid);
+// while reading the number of already-computed levels, we cannot just write:
+//   assert(pyr->filled_levels >= n_levels);
 // This function allows the check to be correctly written as:
-//   assert(aom_is_pyramid_valid(pyr));
-bool aom_is_pyramid_valid(ImagePyramid *pyr);
+//   assert(aom_is_pyramid_valid(pyr, n_levels));
+//
+// Note: This deliberately does not restrict n_levels based on the maximum
+// number of permitted levels for the frame size. This allows the check to
+// catch cases where the caller forgets to handle the case where
+// max_levels is less than the requested number of levels
+bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels);
 #endif
 
 // Mark a pyramid as no longer containing valid data.
diff --git a/aom_scale/aom_scale_rtcd.pl b/aom_scale/aom_scale_rtcd.pl
index 0e65f54ce1..43c2b59424 100644
--- a/aom_scale/aom_scale_rtcd.pl
+++ b/aom_scale/aom_scale_rtcd.pl
@@ -10,6 +10,8 @@
 ##
 sub aom_scale_forward_decls() {
 print <<EOF
+#include <stdbool.h>
+
 struct yv12_buffer_config;
 EOF
 }
@@ -26,7 +28,7 @@ ()
   add_proto qw/void aom_vertical_band_2_1_scale_i/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
 }
 
-add_proto qw/int aom_yv12_realloc_with_new_border/, "struct yv12_buffer_config *ybf, int new_border, int byte_alignment, int num_pyramid_levels, int num_planes";
+add_proto qw/int aom_yv12_realloc_with_new_border/, "struct yv12_buffer_config *ybf, int new_border, int byte_alignment, bool alloc_pyramid, int num_planes";
 
 add_proto qw/void aom_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
 
diff --git a/aom_scale/generic/yv12config.c b/aom_scale/generic/yv12config.c
index 94b400b9e0..63a1e4e747 100644
--- a/aom_scale/generic/yv12config.c
+++ b/aom_scale/generic/yv12config.c
@@ -60,7 +60,7 @@ static int realloc_frame_buffer_aligned(
     const uint64_t uvplane_size, const int aligned_width,
     const int aligned_height, const int uv_width, const int uv_height,
     const int uv_stride, const int uv_border_w, const int uv_border_h,
-    int num_pyramid_levels, int alloc_y_plane_only) {
+    bool alloc_pyramid, int alloc_y_plane_only) {
   if (ybf) {
     const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
     const uint64_t frame_size =
@@ -71,8 +71,8 @@ static int realloc_frame_buffer_aligned(
 #if CONFIG_REALTIME_ONLY || !CONFIG_AV1_ENCODER
     // We should only need an 8-bit version of the source frame if we are
     // encoding in non-realtime mode
-    (void)num_pyramid_levels;
-    assert(num_pyramid_levels == 0);
+    (void)alloc_pyramid;
+    assert(!alloc_pyramid);
 #endif  // CONFIG_REALTIME_ONLY || !CONFIG_AV1_ENCODER
 
 #if defined AOM_MAX_ALLOCABLE_MEMORY
@@ -80,9 +80,8 @@ static int realloc_frame_buffer_aligned(
     uint64_t alloc_size = frame_size;
 #if CONFIG_AV1_ENCODER && !CONFIG_REALTIME_ONLY
     // The size of ybf->y_pyramid
-    if (num_pyramid_levels > 0) {
-      alloc_size += aom_get_pyramid_alloc_size(
-          width, height, num_pyramid_levels, use_highbitdepth);
+    if (alloc_pyramid) {
+      alloc_size += aom_get_pyramid_alloc_size(width, height, use_highbitdepth);
       alloc_size += av1_get_corner_list_size();
     }
 #endif  // CONFIG_AV1_ENCODER && !CONFIG_REALTIME_ONLY
@@ -190,9 +189,8 @@ static int realloc_frame_buffer_aligned(
       av1_free_corner_list(ybf->corners);
       ybf->corners = NULL;
     }
-    if (num_pyramid_levels > 0) {
-      ybf->y_pyramid = aom_alloc_pyramid(width, height, num_pyramid_levels,
-                                         use_highbitdepth);
+    if (alloc_pyramid) {
+      ybf->y_pyramid = aom_alloc_pyramid(width, height, use_highbitdepth);
       if (!ybf->y_pyramid) return AOM_CODEC_MEM_ERROR;
       ybf->corners = av1_alloc_corner_list();
       if (!ybf->corners) return AOM_CODEC_MEM_ERROR;
@@ -237,7 +235,7 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                              int border, int byte_alignment,
                              aom_codec_frame_buffer_t *fb,
                              aom_get_frame_buffer_cb_fn_t cb, void *cb_priv,
-                             int num_pyramid_levels, int alloc_y_plane_only) {
+                             bool alloc_pyramid, int alloc_y_plane_only) {
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
     return AOM_CODEC_MEM_ERROR;
@@ -264,21 +262,20 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
         ybf, width, height, ss_x, ss_y, use_highbitdepth, border,
         byte_alignment, fb, cb, cb_priv, y_stride, yplane_size, uvplane_size,
         aligned_width, aligned_height, uv_width, uv_height, uv_stride,
-        uv_border_w, uv_border_h, num_pyramid_levels, alloc_y_plane_only);
+        uv_border_w, uv_border_h, alloc_pyramid, alloc_y_plane_only);
   }
   return AOM_CODEC_MEM_ERROR;
 }
 
 int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                            int ss_x, int ss_y, int use_highbitdepth, int border,
-                           int byte_alignment, int num_pyramid_levels,
+                           int byte_alignment, bool alloc_pyramid,
                            int alloc_y_plane_only) {
   if (ybf) {
     aom_free_frame_buffer(ybf);
-    return aom_realloc_frame_buffer(ybf, width, height, ss_x, ss_y,
-                                    use_highbitdepth, border, byte_alignment,
-                                    NULL, NULL, NULL, num_pyramid_levels,
-                                    alloc_y_plane_only);
+    return aom_realloc_frame_buffer(
+        ybf, width, height, ss_x, ss_y, use_highbitdepth, border,
+        byte_alignment, NULL, NULL, NULL, alloc_pyramid, alloc_y_plane_only);
   }
   return AOM_CODEC_MEM_ERROR;
 }
diff --git a/aom_scale/generic/yv12extend.c b/aom_scale/generic/yv12extend.c
index 727a99e053..384b72c21e 100644
--- a/aom_scale/generic/yv12extend.c
+++ b/aom_scale/generic/yv12extend.c
@@ -497,8 +497,8 @@ void aom_yv12_partial_coloc_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
 }
 
 int aom_yv12_realloc_with_new_border_c(YV12_BUFFER_CONFIG *ybf, int new_border,
-                                       int byte_alignment,
-                                       int num_pyramid_levels, int num_planes) {
+                                       int byte_alignment, bool alloc_pyramid,
+                                       int num_planes) {
   if (ybf) {
     if (new_border == ybf->border) return 0;
     YV12_BUFFER_CONFIG new_buf;
@@ -506,7 +506,7 @@ int aom_yv12_realloc_with_new_border_c(YV12_BUFFER_CONFIG *ybf, int new_border,
     const int error = aom_alloc_frame_buffer(
         &new_buf, ybf->y_crop_width, ybf->y_crop_height, ybf->subsampling_x,
         ybf->subsampling_y, ybf->flags & YV12_FLAG_HIGHBITDEPTH, new_border,
-        byte_alignment, num_pyramid_levels, 0);
+        byte_alignment, alloc_pyramid, 0);
     if (error) return error;
     // Copy image buffer
     aom_yv12_copy_frame(ybf, &new_buf, num_planes);
diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index ebc318b9a7..bc05de2102 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h
@@ -16,6 +16,8 @@
 extern "C" {
 #endif
 
+#include <stdbool.h>
+
 #include "config/aom_config.h"
 
 #include "aom/aom_codec.h"
@@ -150,7 +152,7 @@ typedef struct yv12_buffer_config {
 // available return values.
 int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                            int ss_x, int ss_y, int use_highbitdepth, int border,
-                           int byte_alignment, int num_pyramid_levels,
+                           int byte_alignment, bool alloc_pyramid,
                            int alloc_y_plane_only);
 
 // Updates the yv12 buffer config with the frame buffer. |byte_alignment| must
@@ -160,15 +162,11 @@ int aom_alloc_frame_b

(Patch may be truncated, please check the link at the top of this post.)