aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWan-Teh Chang <wtc@google.com>2024-02-29 14:23:42 -0800
committerWan-Teh Chang <wtc@google.com>2024-03-04 20:37:19 +0000
commitfd059efcfecfac7be90b3aed3c2159a38fa3cd5d (patch)
treec1bce1bca3d7fe8656033c185d926b3beec7227f
parentbff87d33a9ac9ffe08f679beea2b565bd1b2a388 (diff)
downloadlibaom-fd059efcfecfac7be90b3aed3c2159a38fa3cd5d.tar.gz
Optimize multiply_and_scale() by precomputation
multiply_and_scale() is called in nested loops. Precompute the components of w (w1 and w2) and pass the components of w to multiply_and_scale(). Bug: b:319140742 Bug: oss-fuzz:66474 Change-Id: I1ecf42dd4a7a552739136cbfc21f16aaba8f4523 (cherry picked from commit 9ae3ff78826cb7ed785ebe999acefc3426d1820f)
-rw-r--r--av1/encoder/pickrst.c35
1 files changed, 27 insertions, 8 deletions
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 6753c9edc..369529a4f 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -1103,13 +1103,26 @@ static INLINE int wrap_index(int i, int wiener_win) {
return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
}
-// Calculates x * w / WIENER_TAP_SCALE_FACTOR. The multiplication may overflow,
-// so we do the multiplication by components and combine it with the division.
-static INLINE int64_t multiply_and_scale(int64_t x, int32_t w) {
- // Let w = w1 * WIENER_TAP_SCALE_FACTOR + w2
- const int32_t w1 = w / WIENER_TAP_SCALE_FACTOR;
- const int32_t w2 = w - w1 * WIENER_TAP_SCALE_FACTOR;
+// Splits each w[i] into smaller components w1[i] and w2[i] such that
+// w[i] = w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i].
+static INLINE void split_wiener_filter_coefficients(int wiener_win,
+ const int32_t *w,
+ int32_t *w1, int32_t *w2) {
+ for (int i = 0; i < wiener_win; i++) {
+ w1[i] = w[i] / WIENER_TAP_SCALE_FACTOR;
+ w2[i] = w[i] - w1[i] * WIENER_TAP_SCALE_FACTOR;
+ assert(w[i] == w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]);
+ }
+}
+
+// Calculates x * w / WIENER_TAP_SCALE_FACTOR, where
+// w = w1 * WIENER_TAP_SCALE_FACTOR + w2.
+//
+// The multiplication x * w may overflow, so we multiply x by the components of
+// w (w1 and w2) and combine the multiplication with the division.
+static INLINE int64_t multiply_and_scale(int64_t x, int32_t w1, int32_t w2) {
// Let y = x * w / WIENER_TAP_SCALE_FACTOR
+ // = x * (w1 * WIENER_TAP_SCALE_FACTOR + w2) / WIENER_TAP_SCALE_FACTOR
const int64_t y = x * w1 + x * w2 / WIENER_TAP_SCALE_FACTOR;
return y;
}
@@ -1190,6 +1203,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
int i, j;
int64_t S[WIENER_WIN];
int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
+ int32_t b1[WIENER_WIN], b2[WIENER_WIN];
const int wiener_win2 = wiener_win * wiener_win;
const int wiener_halfwin1 = (wiener_win >> 1) + 1;
memset(A, 0, sizeof(A));
@@ -1200,6 +1214,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
A[jj] += Mc[i][j] * b[i] / WIENER_TAP_SCALE_FACTOR;
}
}
+ split_wiener_filter_coefficients(wiener_win, b, b1, b2);
for (i = 0; i < wiener_win; i++) {
for (j = 0; j < wiener_win; j++) {
@@ -1217,7 +1232,8 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
// multiplication with the last division.
const int64_t x = Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
WIENER_TAP_SCALE_FACTOR;
- B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b[j]);
+ // b[j] = b1[j] * WIENER_TAP_SCALE_FACTOR + b2[j]
+ B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b1[j], b2[j]);
}
}
}
@@ -1257,6 +1273,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
int i, j;
int64_t S[WIENER_WIN];
int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
+ int32_t a1[WIENER_WIN], a2[WIENER_WIN];
const int wiener_win2 = wiener_win * wiener_win;
const int wiener_halfwin1 = (wiener_win >> 1) + 1;
memset(A, 0, sizeof(A));
@@ -1267,6 +1284,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
A[ii] += Mc[i][j] * a[j] / WIENER_TAP_SCALE_FACTOR;
}
}
+ split_wiener_filter_coefficients(wiener_win, a, a1, a2);
for (i = 0; i < wiener_win; i++) {
const int ii = wrap_index(i, wiener_win);
@@ -1284,7 +1302,8 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
// multiplication with the last division.
const int64_t x = Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
WIENER_TAP_SCALE_FACTOR;
- B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a[l]);
+ // a[l] = a1[l] * WIENER_TAP_SCALE_FACTOR + a2[l]
+ B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a1[l], a2[l]);
}
}
}