1 files changed, 247 insertions, 21 deletions
diff --git a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
index caa15c21e2..18ae0f28f4 100644
--- a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
+++ b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
@@ -351,8 +351,14 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
   const uint32_t scale_vert = plane_ptr->subsampling_y;
   uint8_t *const dst_ptr = plane_ptr->dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
-  const int y_range = (MAX_MIB_SIZE >> scale_vert);
-  const int x_range = (MAX_MIB_SIZE >> scale_horz);
+  const int plane_mi_rows =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+  const int plane_mi_cols =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+                             (MAX_MIB_SIZE >> scale_vert));
+  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+                             (MAX_MIB_SIZE >> scale_horz));
   for (int y = 0; y < y_range; y++) {
     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
     for (int x = 0; x < x_range;) {
@@ -376,8 +382,8 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
       }
 
 #if CONFIG_AV1_HIGHBITDEPTH
-      const int use_highbitdepth = cm->seq_params.use_highbitdepth;
-      const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
+      const int use_highbitdepth = cm->seq_params->use_highbitdepth;
+      const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
       switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
@@ -456,6 +462,84 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
   }
 }
 
+void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm,
+                                    const MACROBLOCKD *const xd,
+                                    const int plane,
+                                    const MACROBLOCKD_PLANE *const plane_ptr,
+                                    const uint32_t mi_row,
+                                    const uint32_t mi_col) {
+  const uint32_t scale_horz = plane_ptr->subsampling_x;
+  const uint32_t scale_vert = plane_ptr->subsampling_y;
+  uint8_t *const dst_ptr = plane_ptr->dst.buf;
+  const int dst_stride = plane_ptr->dst.stride;
+  const int plane_mi_rows =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+  const int plane_mi_cols =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+                             (MAX_MIB_SIZE >> scale_vert));
+  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+                             (MAX_MIB_SIZE >> scale_horz));
+  assert(!plane);
+  assert(!(y_range % 2));
+  for (int y = 0; y < y_range; y += 2) {
+    uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
+    for (int x = 0; x < x_range;) {
+      // inner loop always filter vertical edges in a MI block. If MI size
+      // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
+      // If 4x4 transform is used, it will then filter the internal edge
+      //  aligned with a 4x4 block
+      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+      uint32_t advance_units;
+      TX_SIZE tx_size;
+      AV1_DEBLOCKING_PARAMETERS params;
+      memset(&params, 0, sizeof(params));
+
+      tx_size =
+          set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
+                             VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
+      if (tx_size == TX_INVALID) {
+        params.filter_length = 0;
+        tx_size = TX_4X4;
+      }
+
+      switch (params.filter_length) {
+        // apply 4-tap filtering
+        case 4:
+          aom_lpf_vertical_4_dual(p, dst_stride, params.mblim, params.lim,
+                                  params.hev_thr, params.mblim, params.lim,
+                                  params.hev_thr);
+          break;
+        case 6:  // apply 6-tap filter for chroma plane only
+          assert(plane != 0);
+          aom_lpf_vertical_6_dual(p, dst_stride, params.mblim, params.lim,
+                                  params.hev_thr, params.mblim, params.lim,
+                                  params.hev_thr);
+          break;
+        // apply 8-tap filtering
+        case 8:
+          aom_lpf_vertical_8_dual(p, dst_stride, params.mblim, params.lim,
+                                  params.hev_thr, params.mblim, params.lim,
+                                  params.hev_thr);
+          break;
+        // apply 14-tap filtering
+        case 14:
+          aom_lpf_vertical_14_dual(p, dst_stride, params.mblim, params.lim,
+                                   params.hev_thr, params.mblim, params.lim,
+                                   params.hev_thr);
+          break;
+        // no filtering
+        default: break;
+      }
+      // advance the destination pointer
+      advance_units = tx_size_wide_unit[tx_size];
+      x += advance_units;
+      p += advance_units * MI_SIZE;
+    }
+  }
+}
+
 void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
                                  const MACROBLOCKD *const xd, const int plane,
                                  const MACROBLOCKD_PLANE *const plane_ptr,
@@ -464,8 +548,14 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
   const uint32_t scale_vert = plane_ptr->subsampling_y;
   uint8_t *const dst_ptr = plane_ptr->dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
-  const int y_range = (MAX_MIB_SIZE >> scale_vert);
-  const int x_range = (MAX_MIB_SIZE >> scale_horz);
+  const int plane_mi_rows =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+  const int plane_mi_cols =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+                             (MAX_MIB_SIZE >> scale_vert));
+  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+                             (MAX_MIB_SIZE >> scale_horz));
   for (int x = 0; x < x_range; x++) {
     uint8_t *p = dst_ptr + x * MI_SIZE;
     for (int y = 0; y < y_range;) {
@@ -489,8 +579,8 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
       }
 
 #if CONFIG_AV1_HIGHBITDEPTH
-      const int use_highbitdepth = cm->seq_params.use_highbitdepth;
-      const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
+      const int use_highbitdepth = cm->seq_params->use_highbitdepth;
+      const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
       switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
@@ -572,6 +662,84 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
   }
 }
 
+void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm,
+                                    const MACROBLOCKD *const xd,
+                                    const int plane,
+                                    const MACROBLOCKD_PLANE *const plane_ptr,
+                                    const uint32_t mi_row,
+                                    const uint32_t mi_col) {
+  const uint32_t scale_horz = plane_ptr->subsampling_x;
+  const uint32_t scale_vert = plane_ptr->subsampling_y;
+  uint8_t *const dst_ptr = plane_ptr->dst.buf;
+  const int dst_stride = plane_ptr->dst.stride;
+  const int plane_mi_rows =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+  const int plane_mi_cols =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+                             (MAX_MIB_SIZE >> scale_vert));
+  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+                             (MAX_MIB_SIZE >> scale_horz));
+  assert(!plane);
+  for (int x = 0; x < x_range; x += 2) {
+    uint8_t *p = dst_ptr + x * MI_SIZE;
+    for (int y = 0; y < y_range;) {
+      // inner loop always filter vertical edges in a MI block. If MI size
+      // is 8x8, it will first filter the vertical edge aligned with a 8x8
+      // block. If 4x4 transform is used, it will then filter the internal
+      // edge aligned with a 4x4 block
+      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+      uint32_t advance_units;
+      TX_SIZE tx_size;
+      AV1_DEBLOCKING_PARAMETERS params;
+      memset(&params, 0, sizeof(params));
+
+      tx_size = set_lpf_parameters(
+          &params, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
+          curr_x, curr_y, plane, plane_ptr);
+      if (tx_size == TX_INVALID) {
+        params.filter_length = 0;
+        tx_size = TX_4X4;
+      }
+
+      switch (params.filter_length) {
+        // apply 4-tap filtering
+        case 4:
+          aom_lpf_horizontal_4_dual(p, dst_stride, params.mblim, params.lim,
+                                    params.hev_thr, params.mblim, params.lim,
+                                    params.hev_thr);
+          break;
+        // apply 6-tap filtering
+        case 6:
+          assert(plane != 0);
+          aom_lpf_horizontal_6_dual(p, dst_stride, params.mblim, params.lim,
+                                    params.hev_thr, params.mblim, params.lim,
+                                    params.hev_thr);
+          break;
+        // apply 8-tap filtering
+        case 8:
+          aom_lpf_horizontal_8_dual(p, dst_stride, params.mblim, params.lim,
+                                    params.hev_thr, params.mblim, params.lim,
+                                    params.hev_thr);
+          break;
+        // apply 14-tap filtering
+        case 14:
+          aom_lpf_horizontal_14_dual(p, dst_stride, params.mblim, params.lim,
+                                     params.hev_thr, params.mblim, params.lim,
+                                     params.hev_thr);
+          break;
+        // no filtering
+        default: break;
+      }
+      // advance the destination pointer
+      advance_units = tx_size_high_unit[tx_size];
+      y += advance_units;
+      p += advance_units * dst_stride * MI_SIZE;
+    }
+  }
+}
+
 void av1_filter_block_plane_vert_test(const AV1_COMMON *const cm,
                                       const MACROBLOCKD *const xd,
                                       const int plane,
@@ -661,7 +829,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
 #if CONFIG_LPF_MASK
                              int is_decoding,
 #endif
-                             int plane_start, int plane_end) {
+                             int plane_start, int plane_end, int is_realtime) {
   struct macroblockd_plane *pd = xd->plane;
   const int col_start = 0;
   const int col_end = cm->mi_params.mi_cols;
@@ -679,7 +847,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
       else if (plane == 2 && !(cm->lf.filter_level_v))
         continue;
 
-      av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0,
+      av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, 0, 0,
                            plane, plane + 1);
 
       av1_build_bitmask_vert_info(cm, &pd[plane], plane);
@@ -716,49 +884,106 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
       continue;
     else if (plane == 2 && !(cm->lf.filter_level_v))
       continue;
-
     if (cm->lf.combine_vert_horz_lf) {
       // filter all vertical and horizontal edges in every 128x128 super block
       for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
         for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
           // filter vertical edges
-          av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
-                               mi_col, plane, plane + 1);
+          av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+                               mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+          (void)is_realtime;
           av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
                                       mi_col);
+#else
+          if (is_realtime && !plane) {
+            av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row,
+                                           mi_col);
+
+          } else {
+            av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+                                        mi_col);
+          }
+#endif
           // filter horizontal edges
           if (mi_col - MAX_MIB_SIZE >= 0) {
-            av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
+            av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
                                  mi_row, mi_col - MAX_MIB_SIZE, plane,
                                  plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+            (void)is_realtime;
             av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
                                         mi_col - MAX_MIB_SIZE);
+#else
+            if (is_realtime && !plane) {
+              av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+                                             mi_col - MAX_MIB_SIZE);
+            } else {
+              av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+                                          mi_col - MAX_MIB_SIZE);
+            }
+#endif
           }
         }
         // filter horizontal edges
-        av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+        av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row,
                              mi_col - MAX_MIB_SIZE, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+        (void)is_realtime;
         av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
                                     mi_col - MAX_MIB_SIZE);
+#else
+        if (is_realtime && !plane) {
+          av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+                                         mi_col - MAX_MIB_SIZE);
+
+        } else {
+          av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+                                      mi_col - MAX_MIB_SIZE);
+        }
+#endif
       }
     } else {
       // filter all vertical edges in every 128x128 super block
       for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
         for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
-          av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
-                               mi_col, plane, plane + 1);
+          av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+                               mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+          (void)is_realtime;
           av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
                                       mi_col);
+#else
+          if (is_realtime && !plane) {
+            av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row,
+                                           mi_col);
+          } else {
+            av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+                                        mi_col);
+          }
+#endif
         }
       }
 
       // filter all horizontal edges in every 128x128 super block
       for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
         for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
-          av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
-                               mi_col, plane, plane + 1);
+          av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+                               mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+          (void)is_realtime;
           av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
                                       mi_col);
+#else
+          if (is_realtime && !plane) {
+            av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+                                           mi_col);
+
+          } else {
+            av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+                                        mi_col);
+          }
+#endif
         }
       }
     }
@@ -770,7 +995,8 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 #if CONFIG_LPF_MASK
                            int is_decoding,
 #endif
-                           int plane_start, int plane_end, int partial_frame) {
+                           int plane_start, int plane_end, int partial_frame,
+                           int is_realtime) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
 
   start_mi_row = 0;
@@ -786,5 +1012,5 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 #if CONFIG_LPF_MASK
                    is_decoding,
 #endif
-                   plane_start, plane_end);
+                   plane_start, plane_end, is_realtime);
 }