diff options
Diffstat (limited to 'vp8/common')
-rw-r--r-- | vp8/common/arm/neon/sixtappredict_neon.c | 9 | ||||
-rw-r--r-- | vp8/common/blockd.h | 2 | ||||
-rw-r--r-- | vp8/common/loongarch/sixtap_filter_lsx.c | 23 | ||||
-rw-r--r-- | vp8/common/mips/msa/sixtap_filter_msa.c | 181 | ||||
-rw-r--r-- | vp8/common/mips/msa/vp8_macros_msa.h | 258 | ||||
-rw-r--r-- | vp8/common/onyx.h | 33 | ||||
-rw-r--r-- | vp8/common/rtcd_defs.pl | 6 | ||||
-rw-r--r-- | vp8/common/vp8_loopfilter.c | 2 |
8 files changed, 271 insertions, 243 deletions
diff --git a/vp8/common/arm/neon/sixtappredict_neon.c b/vp8/common/arm/neon/sixtappredict_neon.c index 48e86d327..ee3c281f0 100644 --- a/vp8/common/arm/neon/sixtappredict_neon.c +++ b/vp8/common/arm/neon/sixtappredict_neon.c @@ -16,7 +16,7 @@ #include "vpx_ports/mem.h" static const int8_t vp8_sub_pel_filters[8][8] = { - { 0, 0, 128, 0, 0, 0, 0, 0 }, /* note that 1/8 pel positionyys are */ + { 0, 0, 128, 0, 0, 0, 0, 0 }, /* note that 1/8 pel positions are */ { 0, -6, 123, 12, -1, 0, 0, 0 }, /* just as per alpha -0.5 bicubic */ { 2, -11, 108, 36, -8, 1, 0, 0 }, /* New 1/4 pel 6 tap filter */ { 0, -9, 93, 50, -6, 0, 0, 0 }, @@ -781,7 +781,6 @@ void vp8_sixtap_predict8x4_neon(unsigned char *src_ptr, int src_pixels_per_line, vst1_u8(dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d9u8); - return; } void vp8_sixtap_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, @@ -1250,7 +1249,6 @@ void vp8_sixtap_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, vst1_u8(dst_ptr, d9u8); dst_ptr += dst_pitch; } - return; } void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, @@ -1504,7 +1502,9 @@ void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, src += src_pixels_per_line; d12u8 = vld1_u8(src); d13u8 = vld1_u8(src + 8); - d14u8 = vld1_u8(src + 16); + // Only 5 pixels are needed, avoid a potential out of bounds read. + d14u8 = vld1_u8(src + 13); + d14u8 = vext_u8(d14u8, d14u8, 3); src += src_pixels_per_line; __builtin_prefetch(src); @@ -1726,5 +1726,4 @@ void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, dst += dst_pitch; } } - return; } diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 405443449..8300aad94 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -251,7 +251,7 @@ typedef struct macroblockd { unsigned char update_mb_segmentation_data; /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ - unsigned char mb_segement_abs_delta; + unsigned char mb_segment_abs_delta; /* Per frame flags that define which MB level features (such as quantizer or * loop filter level) */ diff --git a/vp8/common/loongarch/sixtap_filter_lsx.c b/vp8/common/loongarch/sixtap_filter_lsx.c index cd7ba5474..986763341 100644 --- a/vp8/common/loongarch/sixtap_filter_lsx.c +++ b/vp8/common/loongarch/sixtap_filter_lsx.c @@ -1706,21 +1706,22 @@ void vp8_sixtap_predict4x4_lsx(uint8_t *RESTRICT src, int32_t src_stride, switch (xoffset) { case 0: { __m128i tp0; - tp0 = __lsx_vinsgr2vr_w(tp0, src, 0); - src += src_stride; - tp0 = __lsx_vinsgr2vr_w(tp0, src, 0); - src += src_stride; - tp0 = __lsx_vinsgr2vr_w(tp0, src, 0); - src += src_stride; - tp0 = __lsx_vinsgr2vr_w(tp0, src, 0); + tp0 = __lsx_vldrepl_w(src, 0); + src += src_stride; __lsx_vstelm_w(tp0, dst, 0, 0); dst += dst_stride; - __lsx_vstelm_w(tp0, dst, 0, 1); + tp0 = __lsx_vldrepl_w(src, 0); + src += src_stride; + __lsx_vstelm_w(tp0, dst, 0, 0); dst += dst_stride; - __lsx_vstelm_w(tp0, dst, 0, 2); + tp0 = __lsx_vldrepl_w(src, 0); + src += src_stride; + __lsx_vstelm_w(tp0, dst, 0, 0); dst += dst_stride; - __lsx_vstelm_w(tp0, dst, 0, 3); + tp0 = __lsx_vldrepl_w(src, 0); + __lsx_vstelm_w(tp0, dst, 0, 0); + break; } case 2: @@ -1865,7 +1866,7 @@ void vp8_sixtap_predict16x16_lsx(uint8_t *RESTRICT src, int32_t src_stride, case 1: Predict16x16Funcs1[3](src, src_stride, dst, dst_stride, - h_filter, v_filter + 1, 16); + h_filter + 1, v_filter + 1, 16); break; } break; diff --git a/vp8/common/mips/msa/sixtap_filter_msa.c b/vp8/common/mips/msa/sixtap_filter_msa.c index b0affcff0..3a1bb7cd5 100644 --- a/vp8/common/mips/msa/sixtap_filter_msa.c +++ b/vp8/common/mips/msa/sixtap_filter_msa.c @@ -35,101 +35,134 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { #define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_h0, filt_h1, \ filt_h2) \ ({ \ - v16i8 vec0_m, vec1_m, vec2_m; \ - v8i16 hz_out_m; \ + v16i8 _6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m; \ + v8i16 _6tap_out_m; \ \ VSHF_B3_SB(src0, src1, src0, src1, src0, src1, mask0, mask1, mask2, \ - vec0_m, vec1_m, vec2_m); \ - hz_out_m = \ - DPADD_SH3_SH(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); \ + _6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m); \ + _6tap_out_m = DPADD_SH3_SH(_6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m, \ + filt_h0, filt_h1, filt_h2); \ \ - hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \ - hz_out_m = __msa_sat_s_h(hz_out_m, 7); \ + _6tap_out_m = __msa_srari_h(_6tap_out_m, VP8_FILTER_SHIFT); \ + _6tap_out_m = __msa_sat_s_h(_6tap_out_m, 7); \ \ - hz_out_m; \ + _6tap_out_m; \ }) #define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, filt0, filt1, filt2, out0, out1) \ { \ - v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m; \ + v16i8 _6tap_4wid_vec0_m, _6tap_4wid_vec1_m, _6tap_4wid_vec2_m, \ + _6tap_4wid_vec3_m, _6tap_4wid_vec4_m, _6tap_4wid_vec5_m; \ \ - VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ - DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \ - VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ - DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \ - VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \ - DPADD_SB2_SH(vec4_m, vec5_m, filt2, filt2, out0, out1); \ + VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, _6tap_4wid_vec0_m, \ + _6tap_4wid_vec1_m); \ + DOTP_SB2_SH(_6tap_4wid_vec0_m, _6tap_4wid_vec1_m, filt0, filt0, out0, \ + out1); \ + VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, _6tap_4wid_vec2_m, \ + _6tap_4wid_vec3_m); \ + DPADD_SB2_SH(_6tap_4wid_vec2_m, _6tap_4wid_vec3_m, filt1, filt1, out0, \ + out1); \ + VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, _6tap_4wid_vec4_m, \ + _6tap_4wid_vec5_m); \ + DPADD_SB2_SH(_6tap_4wid_vec4_m, _6tap_4wid_vec5_m, filt2, filt2, out0, \ + out1); \ } -#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ - mask2, filt0, filt1, filt2, out0, out1, \ - out2, out3) \ - { \ - v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ - \ - VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ - VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ - DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ - out0, out1, out2, out3); \ - VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \ - VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \ - VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec4_m, vec5_m); \ - VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec6_m, vec7_m); \ - DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \ - out0, out1, out2, out3); \ - DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt2, filt2, filt2, filt2, \ - out0, out1, out2, out3); \ +#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ + mask2, filt0, filt1, filt2, out0, out1, \ + out2, out3) \ + { \ + v16i8 _6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \ + _6tap_8wid_vec3_m, _6tap_8wid_vec4_m, _6tap_8wid_vec5_m, \ + _6tap_8wid_vec6_m, _6tap_8wid_vec7_m; \ + \ + VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, _6tap_8wid_vec0_m, \ + _6tap_8wid_vec1_m); \ + VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, _6tap_8wid_vec2_m, \ + _6tap_8wid_vec3_m); \ + DOTP_SB4_SH(_6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \ + _6tap_8wid_vec3_m, filt0, filt0, filt0, filt0, out0, out1, \ + out2, out3); \ + VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, _6tap_8wid_vec0_m, \ + _6tap_8wid_vec1_m); \ + VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, _6tap_8wid_vec2_m, \ + _6tap_8wid_vec3_m); \ + VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, _6tap_8wid_vec4_m, \ + _6tap_8wid_vec5_m); \ + VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, _6tap_8wid_vec6_m, \ + _6tap_8wid_vec7_m); \ + DPADD_SB4_SH(_6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \ + _6tap_8wid_vec3_m, filt1, filt1, filt1, filt1, out0, out1, \ + out2, out3); \ + DPADD_SB4_SH(_6tap_8wid_vec4_m, _6tap_8wid_vec5_m, _6tap_8wid_vec6_m, \ + _6tap_8wid_vec7_m, filt2, filt2, filt2, filt2, out0, out1, \ + out2, out3); \ } -#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \ - ({ \ - v8i16 tmp0; \ - \ - tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \ - tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \ - \ - tmp0; \ - }) - -#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \ +#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \ ({ \ - v16i8 vec0_m, vec1_m; \ - v8i16 hz_out_m; \ - \ - VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0_m, vec1_m); \ - hz_out_m = FILT_4TAP_DPADD_S_H(vec0_m, vec1_m, filt_h0, filt_h1); \ + v8i16 _4tap_dpadd_tmp0; \ \ - hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \ - hz_out_m = __msa_sat_s_h(hz_out_m, 7); \ + _4tap_dpadd_tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \ + _4tap_dpadd_tmp0 = \ + __msa_dpadd_s_h(_4tap_dpadd_tmp0, (v16i8)vec1, (v16i8)filt1); \ \ - hz_out_m; \ + _4tap_dpadd_tmp0; \ }) -#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ - filt0, filt1, out0, out1) \ - { \ - v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ - \ - VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ - DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \ - VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ - DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \ +#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \ + ({ \ + v16i8 _4tap_vec0_m, _4tap_vec1_m; \ + v8i16 _4tap_out_m; \ + \ + VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, _4tap_vec0_m, \ + _4tap_vec1_m); \ + _4tap_out_m = \ + FILT_4TAP_DPADD_S_H(_4tap_vec0_m, _4tap_vec1_m, filt_h0, filt_h1); \ + \ + _4tap_out_m = __msa_srari_h(_4tap_out_m, VP8_FILTER_SHIFT); \ + _4tap_out_m = __msa_sat_s_h(_4tap_out_m, 7); \ + \ + _4tap_out_m; \ + }) + +#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ + filt0, filt1, out0, out1) \ + { \ + v16i8 _4tap_4wid_vec0_m, _4tap_4wid_vec1_m, _4tap_4wid_vec2_m, \ + _4tap_4wid_vec3_m; \ + \ + VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, _4tap_4wid_vec0_m, \ + _4tap_4wid_vec1_m); \ + DOTP_SB2_SH(_4tap_4wid_vec0_m, _4tap_4wid_vec1_m, filt0, filt0, out0, \ + out1); \ + VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, _4tap_4wid_vec2_m, \ + _4tap_4wid_vec3_m); \ + DPADD_SB2_SH(_4tap_4wid_vec2_m, _4tap_4wid_vec3_m, filt1, filt1, out0, \ + out1); \ } -#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ - filt0, filt1, out0, out1, out2, out3) \ - { \ - v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ - \ - VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ - VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ - DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ - out0, out1, out2, out3); \ - VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \ - VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \ - DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \ - out0, out1, out2, out3); \ +#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ + filt0, filt1, out0, out1, out2, out3) \ + { \ + v16i8 _4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \ + _4tap_8wid_vec3_m; \ + \ + VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, _4tap_8wid_vec0_m, \ + _4tap_8wid_vec1_m); \ + VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, _4tap_8wid_vec2_m, \ + _4tap_8wid_vec3_m); \ + DOTP_SB4_SH(_4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \ + _4tap_8wid_vec3_m, filt0, filt0, filt0, filt0, out0, out1, \ + out2, out3); \ + VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, _4tap_8wid_vec0_m, \ + _4tap_8wid_vec1_m); \ + VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, _4tap_8wid_vec2_m, \ + _4tap_8wid_vec3_m); \ + DPADD_SB4_SH(_4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \ + _4tap_8wid_vec3_m, filt1, filt1, filt1, filt1, out0, out1, \ + out2, out3); \ } static void common_hz_6t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, diff --git a/vp8/common/mips/msa/vp8_macros_msa.h b/vp8/common/mips/msa/vp8_macros_msa.h index 7cb3c9869..cc85b9a1f 100644 --- a/vp8/common/mips/msa/vp8_macros_msa.h +++ b/vp8/common/mips/msa/vp8_macros_msa.h @@ -40,160 +40,160 @@ #define ST_SW(...) ST_W(v4i32, __VA_ARGS__) #if (__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - const uint8_t *psrc_m = (const uint8_t *)(psrc); \ - uint32_t val_m; \ - \ - asm volatile("lw %[val_m], %[psrc_m] \n\t" \ - \ - : [val_m] "=r"(val_m) \ - : [psrc_m] "m"(*psrc_m)); \ - \ - val_m; \ +#define LW(psrc) \ + ({ \ + const uint8_t *lw_psrc_m = (const uint8_t *)(psrc); \ + uint32_t lw_val_m; \ + \ + asm volatile("lw %[lw_val_m], %[lw_psrc_m] \n\t" \ + \ + : [lw_val_m] "=r"(lw_val_m) \ + : [lw_psrc_m] "m"(*lw_psrc_m)); \ + \ + lw_val_m; \ }) #if (__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t *psrc_m = (const uint8_t *)(psrc); \ - uint64_t val_m = 0; \ - \ - asm volatile("ld %[val_m], %[psrc_m] \n\t" \ - \ - : [val_m] "=r"(val_m) \ - : [psrc_m] "m"(*psrc_m)); \ - \ - val_m; \ +#define LD(psrc) \ + ({ \ + const uint8_t *ld_psrc_m = (const uint8_t *)(psrc); \ + uint64_t ld_val_m = 0; \ + \ + asm volatile("ld %[ld_val_m], %[ld_psrc_m] \n\t" \ + \ + : [ld_val_m] "=r"(ld_val_m) \ + : [ld_psrc_m] "m"(*ld_psrc_m)); \ + \ + ld_val_m; \ }) #else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t *psrc_ld = (const uint8_t *)(psrc); \ - uint32_t val0_m, val1_m; \ - uint64_t val_m = 0; \ - \ - val0_m = LW(psrc_ld); \ - val1_m = LW(psrc_ld + 4); \ - \ - val_m = (uint64_t)(val1_m); \ - val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \ - val_m = (uint64_t)(val_m | (uint64_t)val0_m); \ - \ - val_m; \ +#define LD(psrc) \ + ({ \ + const uint8_t *ld_psrc_m = (const uint8_t *)(psrc); \ + uint32_t ld_val0_m, ld_val1_m; \ + uint64_t ld_val_m = 0; \ + \ + ld_val0_m = LW(ld_psrc_m); \ + ld_val1_m = LW(ld_psrc_m + 4); \ + \ + ld_val_m = (uint64_t)(ld_val1_m); \ + ld_val_m = (uint64_t)((ld_val_m << 32) & 0xFFFFFFFF00000000); \ + ld_val_m = (uint64_t)(ld_val_m | (uint64_t)ld_val0_m); \ + \ + ld_val_m; \ }) #endif // (__mips == 64) -#define SH(val, pdst) \ - { \ - uint8_t *pdst_m = (uint8_t *)(pdst); \ - const uint16_t val_m = (val); \ - \ - asm volatile("sh %[val_m], %[pdst_m] \n\t" \ - \ - : [pdst_m] "=m"(*pdst_m) \ - : [val_m] "r"(val_m)); \ +#define SH(val, pdst) \ + { \ + uint8_t *sh_pdst_m = (uint8_t *)(pdst); \ + const uint16_t sh_val_m = (val); \ + \ + asm volatile("sh %[sh_val_m], %[sh_pdst_m] \n\t" \ + \ + : [sh_pdst_m] "=m"(*sh_pdst_m) \ + : [sh_val_m] "r"(sh_val_m)); \ } -#define SW(val, pdst) \ - { \ - uint8_t *pdst_m = (uint8_t *)(pdst); \ - const uint32_t val_m = (val); \ - \ - asm volatile("sw %[val_m], %[pdst_m] \n\t" \ - \ - : [pdst_m] "=m"(*pdst_m) \ - : [val_m] "r"(val_m)); \ +#define SW(val, pdst) \ + { \ + uint8_t *sw_pdst_m = (uint8_t *)(pdst); \ + const uint32_t sw_val_m = (val); \ + \ + asm volatile("sw %[sw_val_m], %[sw_pdst_m] \n\t" \ + \ + : [sw_pdst_m] "=m"(*sw_pdst_m) \ + : [sw_val_m] "r"(sw_val_m)); \ } -#define SD(val, pdst) \ - { \ - uint8_t *pdst_m = (uint8_t *)(pdst); \ - const uint64_t val_m = (val); \ - \ - asm volatile("sd %[val_m], %[pdst_m] \n\t" \ - \ - : [pdst_m] "=m"(*pdst_m) \ - : [val_m] "r"(val_m)); \ +#define SD(val, pdst) \ + { \ + uint8_t *sd_pdst_m = (uint8_t *)(pdst); \ + const uint64_t sd_val_m = (val); \ + \ + asm volatile("sd %[sd_val_m], %[sd_pdst_m] \n\t" \ + \ + : [sd_pdst_m] "=m"(*sd_pdst_m) \ + : [sd_val_m] "r"(sd_val_m)); \ } #else // !(__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - const uint8_t *psrc_m = (const uint8_t *)(psrc); \ - uint32_t val_m; \ - \ - asm volatile( \ - "lwr %[val_m], 0(%[psrc_m]) \n\t" \ - "lwl %[val_m], 3(%[psrc_m]) \n\t" \ - : [val_m] "=&r"(val_m) \ - : [psrc_m] "r"(psrc_m)); \ - \ - val_m; \ +#define LW(psrc) \ + ({ \ + const uint8_t *lw_psrc_m = (const uint8_t *)(psrc); \ + uint32_t lw_val_m; \ + \ + asm volatile( \ + "lwr %[lw_val_m], 0(%[lw_psrc_m]) \n\t" \ + "lwl %[lw_val_m], 3(%[lw_psrc_m]) \n\t" \ + : [lw_val_m] "=&r"(lw_val_m) \ + : [lw_psrc_m] "r"(lw_psrc_m)); \ + \ + lw_val_m; \ }) #if (__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t *psrc_m = (const uint8_t *)(psrc); \ - uint64_t val_m = 0; \ - \ - asm volatile( \ - "ldr %[val_m], 0(%[psrc_m]) \n\t" \ - "ldl %[val_m], 7(%[psrc_m]) \n\t" \ - : [val_m] "=&r"(val_m) \ - : [psrc_m] "r"(psrc_m)); \ - \ - val_m; \ +#define LD(psrc) \ + ({ \ + const uint8_t *ld_psrc_m = (const uint8_t *)(psrc); \ + uint64_t ld_val_m = 0; \ + \ + asm volatile( \ + "ldr %[ld_val_m], 0(%[ld_psrc_m]) \n\t" \ + "ldl %[ld_val_m], 7(%[ld_psrc_m]) \n\t" \ + : [ld_val_m] "=&r"(ld_val_m) \ + : [ld_psrc_m] "r"(ld_psrc_m)); \ + \ + ld_val_m; \ }) #else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \ - uint32_t val0_m, val1_m; \ - uint64_t val_m = 0; \ - \ - val0_m = LW(psrc_m1); \ - val1_m = LW(psrc_m1 + 4); \ - \ - val_m = (uint64_t)(val1_m); \ - val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \ - val_m = (uint64_t)(val_m | (uint64_t)val0_m); \ - \ - val_m; \ +#define LD(psrc) \ + ({ \ + const uint8_t *ld_psrc_m1 = (const uint8_t *)(psrc); \ + uint32_t ld_val0_m, ld_val1_m; \ + uint64_t ld_val_m = 0; \ + \ + ld_val0_m = LW(ld_psrc_m1); \ + ld_val1_m = LW(ld_psrc_m1 + 4); \ + \ + ld_val_m = (uint64_t)(ld_val1_m); \ + ld_val_m = (uint64_t)((ld_val_m << 32) & 0xFFFFFFFF00000000); \ + ld_val_m = (uint64_t)(ld_val_m | (uint64_t)ld_val0_m); \ + \ + ld_val_m; \ }) #endif // (__mips == 64) -#define SH(val, pdst) \ - { \ - uint8_t *pdst_m = (uint8_t *)(pdst); \ - const uint16_t val_m = (val); \ - \ - asm volatile("ush %[val_m], %[pdst_m] \n\t" \ - \ - : [pdst_m] "=m"(*pdst_m) \ - : [val_m] "r"(val_m)); \ +#define SH(val, pdst) \ + { \ + uint8_t *sh_pdst_m = (uint8_t *)(pdst); \ + const uint16_t sh_val_m = (val); \ + \ + asm volatile("ush %[sh_val_m], %[sh_pdst_m] \n\t" \ + \ + : [sh_pdst_m] "=m"(*sh_pdst_m) \ + : [sh_val_m] "r"(sh_val_m)); \ } -#define SW(val, pdst) \ - { \ - uint8_t *pdst_m = (uint8_t *)(pdst); \ - const uint32_t val_m = (val); \ - \ - asm volatile("usw %[val_m], %[pdst_m] \n\t" \ - \ - : [pdst_m] "=m"(*pdst_m) \ - : [val_m] "r"(val_m)); \ +#define SW(val, pdst) \ + { \ + uint8_t *sw_pdst_m = (uint8_t *)(pdst); \ + const uint32_t sw_val_m = (val); \ + \ + asm volatile("usw %[sw_val_m], %[sw_pdst_m] \n\t" \ + \ + : [sw_pdst_m] "=m"(*sw_pdst_m) \ + : [sw_val_m] "r"(sw_val_m)); \ } -#define SD(val, pdst) \ - { \ - uint8_t *pdst_m1 = (uint8_t *)(pdst); \ - uint32_t val0_m, val1_m; \ - \ - val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ - val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ - \ - SW(val0_m, pdst_m1); \ - SW(val1_m, pdst_m1 + 4); \ +#define SD(val, pdst) \ + { \ + uint8_t *sd_pdst_m1 = (uint8_t *)(pdst); \ + uint32_t sd_val0_m, sd_val1_m; \ + \ + sd_val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ + sd_val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ + \ + SW(sd_val0_m, sd_pdst_m1); \ + SW(sd_val1_m, sd_pdst_m1 + 4); \ } #endif // (__mips_isa_rev >= 6) diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 05c72df3f..1b70ea5db 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -27,13 +27,6 @@ struct VP8_COMP; /* Create/destroy static data structures. */ typedef enum { - NORMAL = 0, - FOURFIVE = 1, - THREEFIVE = 2, - ONETWO = 3 -} VPX_SCALING; - -typedef enum { USAGE_LOCAL_FILE_PLAYBACK = 0x0, USAGE_STREAM_FROM_SERVER = 0x1, USAGE_CONSTRAINED_QUALITY = 0x2, @@ -58,19 +51,19 @@ typedef enum { #include <assert.h> static INLINE void Scale2Ratio(int mode, int *hr, int *hs) { switch (mode) { - case NORMAL: + case VP8E_NORMAL: *hr = 1; *hs = 1; break; - case FOURFIVE: + case VP8E_FOURFIVE: *hr = 4; *hs = 5; break; - case THREEFIVE: + case VP8E_THREEFIVE: *hr = 3; *hs = 5; break; - case ONETWO: + case VP8E_ONETWO: *hr = 1; *hs = 2; break; @@ -90,7 +83,14 @@ typedef struct { int Width; int Height; struct vpx_rational timebase; - unsigned int target_bandwidth; /* kilobits per second */ + /* In either kilobits per second or bits per second, depending on which + * copy of oxcf this is in. + * - ctx->oxcf.target_bandwidth is in kilobits per second. See + * set_vp8e_config(). + * - ctx->cpi->oxcf.target_bandwidth in is bits per second. See + * vp8_change_config(). + */ + unsigned int target_bandwidth; /* Parameter used for applying denoiser. * For temporal denoiser: noise_sensitivity = 0 means off, @@ -221,6 +221,7 @@ typedef struct { /* Temporal scaling parameters */ unsigned int number_of_layers; + /* kilobits per second */ unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; unsigned int periodicity; @@ -243,11 +244,11 @@ typedef struct { void vp8_initialize(); -struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf); +struct VP8_COMP *vp8_create_compressor(const VP8_CONFIG *oxcf); void vp8_remove_compressor(struct VP8_COMP **comp); void vp8_init_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf); -void vp8_change_config(struct VP8_COMP *cpi, VP8_CONFIG *oxcf); +void vp8_change_config(struct VP8_COMP *cpi, const VP8_CONFIG *oxcf); int vp8_receive_raw_frame(struct VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, @@ -273,8 +274,8 @@ int vp8_set_roimap(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int threshold[4]); int vp8_set_active_map(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols); -int vp8_set_internal_size(struct VP8_COMP *cpi, VPX_SCALING horiz_mode, - VPX_SCALING vert_mode); +int vp8_set_internal_size(struct VP8_COMP *cpi, VPX_SCALING_MODE horiz_mode, + VPX_SCALING_MODE vert_mode); int vp8_get_quantizer(struct VP8_COMP *cpi); #ifdef __cplusplus diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 739a61284..12b474d93 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -127,12 +127,6 @@ specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa mmi/; # if (vpx_config("CONFIG_POSTPROC") eq "yes") { - add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; - - add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; - - add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; - add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; specialize qw/vp8_filter_by_weight16x16 sse2 msa/; diff --git a/vp8/common/vp8_loopfilter.c b/vp8/common/vp8_loopfilter.c index 9c9e5f351..4576c1853 100644 --- a/vp8/common/vp8_loopfilter.c +++ b/vp8/common/vp8_loopfilter.c @@ -111,7 +111,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, MACROBLOCKD *mbd, /* Note the baseline filter values for each segment */ if (mbd->segmentation_enabled) { - if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) { + if (mbd->mb_segment_abs_delta == SEGMENT_ABSDATA) { lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; } else { /* Delta Value */ lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; |