1 files changed, 171 insertions, 77 deletions
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index ab27d1f88..a953a6fb4 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -383,14 +383,16 @@ static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
                                             int highbd_flag,
 #endif
                                             int pixels_wide, int pixels_high,
-                                            int is_key_frame) {
+                                            int is_key_frame,
+                                            int border_offset_4x4) {
   int k;
   for (k = 0; k < 4; k++) {
     int x4_idx = x8_idx + ((k & 1) << 2);
     int y4_idx = y8_idx + ((k >> 1) << 2);
     unsigned int sse = 0;
     int sum = 0;
-    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
+    if (x4_idx < pixels_wide - border_offset_4x4 &&
+        y4_idx < pixels_high - border_offset_4x4) {
       int s_avg;
       int d_avg = 128;
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -429,8 +431,7 @@ static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
 }
 
 static AOM_INLINE void tune_thresh_based_on_qindex_window(
-    int qindex, int th, int source_sad, int ag_idx, int64_t thresholds[]) {
-  const int win = 45;
+    int qindex, int th, int win, int fac, int64_t thresholds[]) {
   double weight;
 
   if (qindex < th - win)
@@ -443,7 +444,6 @@ static AOM_INLINE void tune_thresh_based_on_qindex_window(
       (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
   thresholds[2] =
       (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
-  const int fac = (!ag_idx && source_sad != kLowSad) ? 1 : 2;
   thresholds[3] =
       (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
 }
@@ -451,7 +451,9 @@ static AOM_INLINE void tune_thresh_based_on_qindex_window(
 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
                                           int q, int content_lowsumdiff,
                                           int source_sad_nonrd,
-                                          int source_sad_rd, int segment_id) {
+                                          int source_sad_rd, int segment_id,
+                                          uint64_t blk_sad,
+                                          int lighting_change) {
   AV1_COMMON *const cm = &cpi->common;
   const int is_key_frame = frame_is_intra_only(cm);
   const int threshold_multiplier = is_key_frame ? 120 : 1;
@@ -510,12 +512,12 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
   else
     threshold_base =
         scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
-                                  cm->height, cpi->svc.non_reference_frame);
+                                  cm->height, cpi->ppi->rtc_ref.non_reference_frame);
 #else
   // Increase base variance threshold based on content_state/sum_diff level.
-  threshold_base =
-      scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
-                                cm->height, cpi->svc.non_reference_frame);
+  threshold_base = scale_part_thresh_content(
+      threshold_base, cpi->oxcf.speed, cm->width, cm->height,
+      cpi->ppi->rtc_ref.non_reference_frame);
 #endif
   thresholds[0] = threshold_base >> 1;
   thresholds[1] = threshold_base;
@@ -565,11 +567,13 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
     thresholds[2] = (5 * threshold_base) >> 2;
   } else if (cm->width < 1920 && cm->height < 1080) {
     thresholds[2] = threshold_base << 1;
-  } else {
+  } else if (cm->width < 2560 && cm->height < 1440) {
     thresholds[2] = (5 * threshold_base) >> 1;
+  } else {
+    thresholds[2] = (7 * threshold_base) >> 1;
   }
   // Tune thresholds less or more aggressively to prefer larger partitions
-  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 4) {
+  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
     double weight;
     const int win = 20;
     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
@@ -585,14 +589,23 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
       }
     }
     if (cm->width * cm->height <= 352 * 288) {
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
       if (segment_id == 0) {
         thresholds[1] <<= 2;
-        thresholds[2] <<= (source_sad_nonrd == kLowSad) ? 5 : 4;
+        thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
       } else {
         thresholds[1] <<= 1;
         thresholds[2] <<= 3;
       }
+      // Allow for split to 8x8 for superblocks where part of it has
+      // moving boundary. So allow for sb with source_sad above threshold,
+      // and avoid very large source_sad or high source content, to avoid
+      // too many 8x8 within superblock.
+      if (segment_id == 0 && cpi->rc.avg_source_sad < 25000 &&
+          blk_sad > 25000 && blk_sad < 50000 && !lighting_change) {
+        thresholds[2] = (3 * thresholds[2]) >> 2;
+        thresholds[3] = thresholds[2] << 3;
+      }
       // Condition the increase of partition thresholds on the segment
       // and the content. Avoid the increase for superblocks which have
       // high source sad, unless the whole frame has very high motion
@@ -602,7 +615,7 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
                (source_sad_nonrd != kHighSad ||
                 cpi->rc.avg_source_sad > 50000)) {
       thresholds[0] = (3 * thresholds[0]) >> 1;
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
       if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
         thresholds[1] =
             (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
@@ -616,16 +629,16 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
           (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
       thresholds[2] =
           (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
     }
   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
-    tune_thresh_based_on_qindex_window(
-        current_qindex, QINDEX_LARGE_BLOCK_THR, source_sad_nonrd,
-        cpi->sf.rt_sf.prefer_large_partition_blocks - 2, thresholds);
+    thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
+    thresholds[2] =
+        (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
-    thresholds[3] <<= 2;
-    thresholds[1] <<= (source_sad_nonrd == kLowSad) ? 1 : 0;
-    thresholds[2] <<= (source_sad_nonrd == kLowSad) ? 1 : 0;
+    const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
+    tune_thresh_based_on_qindex_window(current_qindex, QINDEX_LARGE_BLOCK_THR,
+                                       45, fac, thresholds);
   }
   if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
     thresholds[3] = INT64_MAX;
@@ -916,7 +929,7 @@ void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
     return;
   } else {
     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
-                       0, 0);
+                       0, 0, 0, 0);
     // The threshold below is not changed locally.
     cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
   }
@@ -1010,13 +1023,23 @@ static void fill_variance_tree_leaves(
   const int compute_minmax_variance = 0;
   const int segment_id = xd->mi[0]->segment_id;
   int pixels_wide = 128, pixels_high = 128;
-
+  int border_offset_4x4 = 0;
+  int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
   if (is_small_sb) {
     pixels_wide = 64;
     pixels_high = 64;
   }
   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+#if CONFIG_AV1_TEMPORAL_DENOISING
+  temporal_denoising |= cpi->oxcf.noise_sensitivity;
+#endif
+  // For temporal filtering or temporal denoiser enabled: since the source
+  // is modified we need to avoid 4x4 avg along superblock boundary, since
+  // simd code will load 8 pixels for 4x4 avg and so can access source
+  // data outside superblock (while its being modified by temporal filter).
+  // Temporal filtering is never done on key frames.
+  if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
   for (int m = 0; m < num_64x64_blocks; m++) {
     const int x64_idx = ((m & 1) << 6);
     const int y64_idx = ((m >> 1) << 6);
@@ -1096,12 +1119,12 @@ static void fill_variance_tree_leaves(
             int x8_idx = x16_idx + ((k & 1) << 3);
             int y8_idx = y16_idx + ((k >> 1) << 3);
             VP8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
-            fill_variance_4x4avg(src, src_stride, dst, dst_stride, x8_idx,
-                                 y8_idx, vst2,
+            fill_variance_4x4avg(
+                src, src_stride, dst, dst_stride, x8_idx, y8_idx, vst2,
 #if CONFIG_AV1_HIGHBITDEPTH
-                                 xd->cur_buf->flags,
+                xd->cur_buf->flags,
 #endif
-                                 pixels_wide, pixels_high, is_key_frame);
+                pixels_wide, pixels_high, is_key_frame, border_offset_4x4);
           }
         }
       }
@@ -1110,7 +1133,8 @@ static void fill_variance_tree_leaves(
 }
 
 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
-                         unsigned int *y_sad_g, unsigned int *y_sad_last,
+                         unsigned int *y_sad_g, unsigned int *y_sad_alt,
+                         unsigned int *y_sad_last,
                          MV_REFERENCE_FRAME *ref_frame_partition, int mi_row,
                          int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
@@ -1118,17 +1142,24 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
   const int num_planes = av1_num_planes(cm);
   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
-  // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
-  // is!!
   MB_MODE_INFO *mi = xd->mi[0];
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
   assert(yv12 != NULL);
   const YV12_BUFFER_CONFIG *yv12_g = NULL;
-
-  // For non-SVC GOLDEN is another temporal reference. Check if it should be
-  // used as reference for partitioning.
-  if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
-      x->content_state_sb.source_sad_nonrd != kZeroSad) {
+  const YV12_BUFFER_CONFIG *yv12_alt = NULL;
+  // Check if LAST is a reference. For spatial layers always use it as
+  // reference scaling (golden or altref being lower resolution) is not
+  // handled/check here.
+  int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
+                     cpi->svc.number_spatial_layers > 1;
+  int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
+  int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
+                    cpi->sf.rt_sf.use_nonrd_altref_frame;
+
+  // For 1 spatial layer: GOLDEN is another temporal reference.
+  // Check if it should be used as reference for partitioning.
+  if (cpi->svc.number_spatial_layers == 1 && use_golden_ref &&
+      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
     yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
     if (yv12_g && yv12_g != yv12) {
       av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
@@ -1139,30 +1170,47 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     }
   }
 
-  av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
-                       get_ref_scale_factors(cm, LAST_FRAME), num_planes);
-  mi->ref_frame[0] = LAST_FRAME;
-  mi->ref_frame[1] = NONE_FRAME;
-  mi->bsize = cm->seq_params->sb_size;
-  mi->mv[0].as_int = 0;
-  mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
-  if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
-    if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
-      const MV dummy_mv = { 0, 0 };
-      *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
-                                             mi_row, mi_col, &dummy_mv);
+  // For 1 spatial layer: ALTREF is another temporal reference.
+  // Check if it should be used as reference for partitioning.
+  if (cpi->svc.number_spatial_layers == 1 && use_alt_ref &&
+      (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
+      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
+    yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
+    if (yv12_alt && yv12_alt != yv12) {
+      av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
+                           get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
+      *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+          xd->plane[0].pre[0].stride);
     }
   }
-  if (*y_sad == UINT_MAX) {
-    *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
-        x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
-        xd->plane[0].pre[0].stride);
+
+  if (use_last_ref) {
+    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                         get_ref_scale_factors(cm, LAST_FRAME), num_planes);
+    mi->ref_frame[0] = LAST_FRAME;
+    mi->ref_frame[1] = NONE_FRAME;
+    mi->bsize = cm->seq_params->sb_size;
+    mi->mv[0].as_int = 0;
+    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
+    if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
+      if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
+        const MV dummy_mv = { 0, 0 };
+        *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
+                                               mi_row, mi_col, &dummy_mv);
+      }
+    }
+    if (*y_sad == UINT_MAX) {
+      *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+          xd->plane[0].pre[0].stride);
+    }
+    *y_sad_last = *y_sad;
   }
-  *y_sad_last = *y_sad;
 
-  // Pick the ref frame for partitioning, use golden frame only if its
-  // lower sad.
-  if (*y_sad_g < 0.9 * *y_sad) {
+  // Pick the ref frame for partitioning, use golden or altref frame only if
+  // its lower sad, bias to LAST with factor 0.9.
+  if (*y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt) {
     av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                          get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
     mi->ref_frame[0] = GOLDEN_FRAME;
@@ -1170,6 +1218,14 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     *y_sad = *y_sad_g;
     *ref_frame_partition = GOLDEN_FRAME;
     x->nonrd_prune_ref_frame_search = 0;
+  } else if (*y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g) {
+    av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
+                         get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
+    mi->ref_frame[0] = ALTREF_FRAME;
+    mi->mv[0].as_int = 0;
+    *y_sad = *y_sad_alt;
+    *ref_frame_partition = ALTREF_FRAME;
+    x->nonrd_prune_ref_frame_search = 0;
   } else {
     *ref_frame_partition = LAST_FRAME;
     x->nonrd_prune_ref_frame_search =
@@ -1181,7 +1237,7 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
                                   cm->seq_params->sb_size, AOM_PLANE_Y,
-                                  AOM_PLANE_V);
+                                  num_planes - 1);
   }
 }
 
@@ -1205,6 +1261,18 @@ static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
              : PART_EVAL_ONLY_NONE;
 }
 
+static AOM_INLINE bool is_set_force_zeromv_skip_based_on_src_sad(
+    int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
+  if (set_zeromv_skip_based_on_source_sad == 0) return false;
+
+  if (set_zeromv_skip_based_on_source_sad >= 2)
+    return source_sad_nonrd <= kVeryLowSad;
+  else if (set_zeromv_skip_based_on_source_sad >= 1)
+    return source_sad_nonrd == kZeroSad;
+
+  return false;
+}
+
 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
                                       ThreadData *td, MACROBLOCK *x, int mi_row,
                                       int mi_col) {
@@ -1250,6 +1318,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
 
   unsigned int y_sad = UINT_MAX;
   unsigned int y_sad_g = UINT_MAX;
+  unsigned int y_sad_alt = UINT_MAX;
   unsigned int y_sad_last = UINT_MAX;
   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
 
@@ -1267,6 +1336,17 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   const int low_res = (cm->width <= 352 && cm->height <= 288);
   int variance4x4downsample[64];
   const int segment_id = xd->mi[0]->segment_id;
+  uint64_t blk_sad = 0;
+  if (cpi->src_sad_blk_64x64 != NULL && !cpi->ppi->use_svc) {
+    const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
+                                  ? (cm->seq_params->mib_size >> 1)
+                                  : cm->seq_params->mib_size;
+    const int sb_cols =
+        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
+    const int sbi_col = mi_col / sb_size_by_mb;
+    const int sbi_row = mi_row / sb_size_by_mb;
+    blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
+  }
 
   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
       cyclic_refresh_segment_id_boosted(segment_id)) {
@@ -1274,12 +1354,14 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
     set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
                        x->content_state_sb.source_sad_nonrd,
-                       x->content_state_sb.source_sad_rd, 1);
+                       x->content_state_sb.source_sad_rd, 1, blk_sad,
+                       x->content_state_sb.lighting_change);
   } else {
     set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
                        x->content_state_sb.low_sumdiff,
                        x->content_state_sb.source_sad_nonrd,
-                       x->content_state_sb.source_sad_rd, 0);
+                       x->content_state_sb.source_sad_rd, 0, blk_sad,
+                       x->content_state_sb.lighting_change);
   }
 
   // For non keyframes, disable 4x4 average for low resolution when speed = 8
@@ -1310,8 +1392,8 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   }
 
   if (!is_key_frame) {
-    setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_last, &ref_frame_partition,
-                 mi_row, mi_col);
+    setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
+                 &ref_frame_partition, mi_row, mi_col);
 
     MB_MODE_INFO *mi = xd->mi[0];
     // Use reference SB directly for zero mv.
@@ -1333,32 +1415,40 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, is_key_frame, zero_motion,
                uv_sad);
 
-  x->force_zeromv_skip = 0;
-  const unsigned int thresh_exit_part =
-      (cm->seq_params->sb_size == BLOCK_64X64) ? 5000 : 10000;
+  x->force_zeromv_skip_for_sb = 0;
+  const bool is_set_force_zeromv_skip =
+      is_set_force_zeromv_skip_based_on_src_sad(
+          cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
+          x->content_state_sb.source_sad_nonrd);
+
   // If the superblock is completely static (zero source sad) and
   // the y_sad (relative to LAST ref) is very small, take the sb_size partition
   // and exit, and force zeromv_last skip mode for nonrd_pickmode.
-  // Only do this when the cyclic refresh is applied, and only on the base
-  // segment (so the QP-boosted segment can still contnue cleaning/ramping
-  // up the quality). Condition on color uv_sad is also added.
+  // Only do this on the base segment (so the QP-boosted segment, if applied,
+  // can still continue cleaning/ramping up the quality).
+  // Condition on color uv_sad is also added.
   if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
-      cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
-      cpi->cyclic_refresh->apply_cyclic_refresh &&
-      segment_id == CR_SEGMENT_ID_BASE &&
-      x->content_state_sb.source_sad_nonrd == kZeroSad &&
-      ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0 &&
-      y_sad < thresh_exit_part && uv_sad[0]<(3 * thresh_exit_part)>> 2 &&
-      uv_sad[1]<(3 * thresh_exit_part)>> 2) {
+      cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
+      is_set_force_zeromv_skip && ref_frame_partition == LAST_FRAME &&
+      xd->mi[0]->mv[0].as_int == 0) {
     const int block_width = mi_size_wide[cm->seq_params->sb_size];
     const int block_height = mi_size_high[cm->seq_params->sb_size];
+    const unsigned int thresh_exit_part_y =
+        cpi->zeromv_skip_thresh_exit_part[bsize];
+    const unsigned int thresh_exit_part_uv =
+        CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
     if (mi_col + block_width <= tile->mi_col_end &&
-        mi_row + block_height <= tile->mi_row_end) {
+        mi_row + block_height <= tile->mi_row_end &&
+        y_sad < thresh_exit_part_y && uv_sad[0] < thresh_exit_part_uv &&
+        uv_sad[1] < thresh_exit_part_uv) {
       set_block_size(cpi, mi_row, mi_col, bsize);
-      x->force_zeromv_skip = 1;
+      x->force_zeromv_skip_for_sb = 1;
       if (vt2) aom_free(vt2);
       if (vt) aom_free(vt);
       return 0;
+    } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
+               cpi->sf.rt_sf.part_early_exit_zeromv >= 2) {
+      x->force_zeromv_skip_for_sb = 2;
     }
   }
 
@@ -1407,6 +1497,10 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
       // (some threshold of) the average variance over the sub-16x16 blocks,
       // then force this block to split. This also forces a split on the upper
       // (64x64) level.
+      uint64_t frame_sad_thresh = 20000;
+      if (cpi->svc.number_temporal_layers > 2 &&
+          cpi->svc.temporal_layer_id == 0)
+        frame_sad_thresh = frame_sad_thresh << 1;
       if (force_split[5 + m2 + i] == PART_EVAL_ALL) {
         get_variance(&vt->split[m].split[i].part_variances.none);
         var_32x32 = vt->split[m].split[i].part_variances.none.variance;
@@ -1428,7 +1522,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
                      maxvar_16x16[m][i] > thresholds[2]) ||
                     (cpi->sf.rt_sf.prefer_large_partition_blocks &&
                      x->content_state_sb.source_sad_nonrd > kLowSad &&
-                     cpi->rc.frame_source_sad < 20000 &&
+                     cpi->rc.frame_source_sad < frame_sad_thresh &&
                      maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
                      maxvar_16x16[m][i] > (minvar_16x16[m][i] << 2)))) {
           force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;