diff options
author | Wan-Teh Chang <wtc@google.com> | 2024-02-29 14:23:42 -0800 |
---|---|---|
committer | Wan-Teh Chang <wtc@google.com> | 2024-03-04 20:37:19 +0000 |
commit | fd059efcfecfac7be90b3aed3c2159a38fa3cd5d (patch) | |
tree | c1bce1bca3d7fe8656033c185d926b3beec7227f | |
parent | bff87d33a9ac9ffe08f679beea2b565bd1b2a388 (diff) | |
download | libaom-fd059efcfecfac7be90b3aed3c2159a38fa3cd5d.tar.gz |
Optimize multiply_and_scale() by precomputation
multiply_and_scale() is called in nested loops. Precompute the
components of w (w1 and w2) and pass the components of w to
multiply_and_scale().
Bug: b:319140742
Bug: oss-fuzz:66474
Change-Id: I1ecf42dd4a7a552739136cbfc21f16aaba8f4523
(cherry picked from commit 9ae3ff78826cb7ed785ebe999acefc3426d1820f)
-rw-r--r-- | av1/encoder/pickrst.c | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index 6753c9edc..369529a4f 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c @@ -1103,13 +1103,26 @@ static INLINE int wrap_index(int i, int wiener_win) { return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i); } -// Calculates x * w / WIENER_TAP_SCALE_FACTOR. The multiplication may overflow, -// so we do the multiplication by components and combine it with the division. -static INLINE int64_t multiply_and_scale(int64_t x, int32_t w) { - // Let w = w1 * WIENER_TAP_SCALE_FACTOR + w2 - const int32_t w1 = w / WIENER_TAP_SCALE_FACTOR; - const int32_t w2 = w - w1 * WIENER_TAP_SCALE_FACTOR; +// Splits each w[i] into smaller components w1[i] and w2[i] such that +// w[i] = w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]. +static INLINE void split_wiener_filter_coefficients(int wiener_win, + const int32_t *w, + int32_t *w1, int32_t *w2) { + for (int i = 0; i < wiener_win; i++) { + w1[i] = w[i] / WIENER_TAP_SCALE_FACTOR; + w2[i] = w[i] - w1[i] * WIENER_TAP_SCALE_FACTOR; + assert(w[i] == w1[i] * WIENER_TAP_SCALE_FACTOR + w2[i]); + } +} + +// Calculates x * w / WIENER_TAP_SCALE_FACTOR, where +// w = w1 * WIENER_TAP_SCALE_FACTOR + w2. +// +// The multiplication x * w may overflow, so we multiply x by the components of +// w (w1 and w2) and combine the multiplication with the division. +static INLINE int64_t multiply_and_scale(int64_t x, int32_t w1, int32_t w2) { // Let y = x * w / WIENER_TAP_SCALE_FACTOR + // = x * (w1 * WIENER_TAP_SCALE_FACTOR + w2) / WIENER_TAP_SCALE_FACTOR const int64_t y = x * w1 + x * w2 / WIENER_TAP_SCALE_FACTOR; return y; } @@ -1190,6 +1203,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, int i, j; int64_t S[WIENER_WIN]; int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; + int32_t b1[WIENER_WIN], b2[WIENER_WIN]; const int wiener_win2 = wiener_win * wiener_win; const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); @@ -1200,6 +1214,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, A[jj] += Mc[i][j] * b[i] / WIENER_TAP_SCALE_FACTOR; } } + split_wiener_filter_coefficients(wiener_win, b, b1, b2); for (i = 0; i < wiener_win; i++) { for (j = 0; j < wiener_win; j++) { @@ -1217,7 +1232,8 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc, // multiplication with the last division. const int64_t x = Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] / WIENER_TAP_SCALE_FACTOR; - B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b[j]); + // b[j] = b1[j] * WIENER_TAP_SCALE_FACTOR + b2[j] + B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b1[j], b2[j]); } } } @@ -1257,6 +1273,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, int i, j; int64_t S[WIENER_WIN]; int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; + int32_t a1[WIENER_WIN], a2[WIENER_WIN]; const int wiener_win2 = wiener_win * wiener_win; const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); @@ -1267,6 +1284,7 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, A[ii] += Mc[i][j] * a[j] / WIENER_TAP_SCALE_FACTOR; } } + split_wiener_filter_coefficients(wiener_win, a, a1, a2); for (i = 0; i < wiener_win; i++) { const int ii = wrap_index(i, wiener_win); @@ -1284,7 +1302,8 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc, // multiplication with the last division. const int64_t x = Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] / WIENER_TAP_SCALE_FACTOR; - B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a[l]); + // a[l] = a1[l] * WIENER_TAP_SCALE_FACTOR + a2[l] + B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a1[l], a2[l]); } } } |