aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/arm/ih264_inter_pred_chroma_a9q.s4
-rw-r--r--common/arm/ih264_intra_pred_luma_16x16_a9q.s2
-rw-r--r--common/arm/ih264_mem_fns_neon.s10
-rw-r--r--common/arm/ih264_padding_neon.s2
-rw-r--r--common/arm/ih264_weighted_bi_pred_a9q.s4
-rw-r--r--common/arm/ih264_weighted_pred_a9q.s4
-rw-r--r--common/armv8/ih264_deblk_chroma_av8.s78
-rw-r--r--common/armv8/ih264_deblk_luma_av8.s35
-rw-r--r--common/armv8/ih264_default_weighted_pred_av8.s34
-rw-r--r--common/armv8/ih264_inter_pred_chroma_av8.s22
-rw-r--r--common/armv8/ih264_inter_pred_filters_luma_horz_av8.s12
-rw-r--r--common/armv8/ih264_inter_pred_filters_luma_vert_av8.s12
-rw-r--r--common/armv8/ih264_inter_pred_luma_copy_av8.s18
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s12
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s19
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s14
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s18
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s16
-rw-r--r--common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s14
-rw-r--r--common/armv8/ih264_intra_pred_chroma_av8.s48
-rw-r--r--common/armv8/ih264_intra_pred_luma_16x16_av8.s104
-rw-r--r--common/armv8/ih264_intra_pred_luma_4x4_av8.s110
-rw-r--r--common/armv8/ih264_intra_pred_luma_8x8_av8.s100
-rw-r--r--common/armv8/ih264_iquant_itrans_recon_av8.s24
-rw-r--r--common/armv8/ih264_iquant_itrans_recon_dc_av8.s24
-rw-r--r--common/armv8/ih264_mem_fns_neon_av8.s68
-rw-r--r--common/armv8/ih264_padding_neon_av8.s120
-rw-r--r--common/armv8/ih264_resi_trans_quant_av8.s93
-rw-r--r--common/armv8/ih264_weighted_bi_pred_av8.s110
-rw-r--r--common/armv8/ih264_weighted_pred_av8.s58
-rw-r--r--decoder.arm64.mk6
-rw-r--r--decoder/ih264d_api.c61
-rw-r--r--decoder/ih264d_dpb_mgr.c5
-rw-r--r--decoder/ih264d_parse_headers.c34
-rw-r--r--decoder/ih264d_parse_islice.c4
-rw-r--r--decoder/ih264d_parse_mb_header.c41
-rw-r--r--decoder/ih264d_structs.h4
-rw-r--r--decoder/ih264d_utils.c7
-rw-r--r--encoder.arm64.mk7
-rw-r--r--encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s24
-rw-r--r--encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s28
-rw-r--r--encoder/armv8/ih264e_half_pel_av8.s4
-rw-r--r--encoder/armv8/ime_distortion_metrics_av8.s44
-rw-r--r--encoder/ih264e_api.c2
-rw-r--r--encoder/ih264e_cabac_encode.c125
-rw-r--r--encoder/ih264e_cabac_init.c10
-rw-r--r--encoder/ih264e_cavlc.c91
-rw-r--r--encoder/ih264e_defs.h6
-rw-r--r--encoder/ih264e_encode_header.c9
-rw-r--r--encoder/ih264e_process.c79
-rw-r--r--encoder/ih264e_structs.h178
-rw-r--r--encoder/irc_rate_control_api.c10
-rw-r--r--test/decoder.mk2
-rw-r--r--test/decoder/dec.cfg12
-rw-r--r--test/encoder/enc.cfg47
55 files changed, 1127 insertions, 902 deletions
diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s
index 6681a7c..e2b8c99 100644
--- a/common/arm/ih264_inter_pred_chroma_a9q.s
+++ b/common/arm/ih264_inter_pred_chroma_a9q.s
@@ -91,8 +91,8 @@
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@ WORD32 dst_strd,
-@ UWORD8 u1_dx,
-@ UWORD8 u1_dy,
+@ WORD32 u1_dx,
+@ WORD32 u1_dy,
@ WORD32 ht,
@ WORD32 wd)
@**************Variables Vs Registers*****************************************
diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
index 0dd82f3..7597444 100644
--- a/common/arm/ih264_intra_pred_luma_16x16_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
@@ -413,7 +413,7 @@ scrlbl1:
add r7, r0, r4, lsl #3
sub r0, r7, r4, lsl #1
- rsb lr, r4, #0x0
+ neg lr, r4
vpadd.s16 d0, d0, d1
diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s
index 39ad9b3..b9595d7 100644
--- a/common/arm/ih264_mem_fns_neon.s
+++ b/common/arm/ih264_mem_fns_neon.s
@@ -68,7 +68,7 @@
@*
@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
-@ UWORD8 num_bytes)
+@ UWORD32 num_bytes)
@**************Variables Vs Registers*************************
@ r0 => *pu1_dst
@ r1 => *pu1_src
@@ -97,7 +97,7 @@ loop_neon_memcpy_mul_8:
@*
@void ih264_memcpy(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
-@ UWORD8 num_bytes)
+@ UWORD32 num_bytes)
@**************Variables Vs Registers*************************
@ r0 => *pu1_dst
@ r1 => *pu1_src
@@ -135,7 +135,7 @@ loop_memcpy:
@void ih264_memset_mul_8(UWORD8 *pu1_dst,
@ UWORD8 value,
-@ UWORD8 num_bytes)
+@ UWORD32 num_bytes)
@**************Variables Vs Registers*************************
@ r0 => *pu1_dst
@ r1 => value
@@ -202,7 +202,7 @@ loop_memset:
@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
@ UWORD16 value,
-@ UWORD8 num_words)
+@ UWORD32 num_words)
@**************Variables Vs Registers*************************
@ r0 => *pu2_dst
@ r1 => value
@@ -234,7 +234,7 @@ loop_memset_16bit_mul_8:
@void ih264_memset_16bit(UWORD16 *pu2_dst,
@ UWORD16 value,
-@ UWORD8 num_words)
+@ UWORD32 num_words)
@**************Variables Vs Registers*************************
@ r0 => *pu2_dst
@ r1 => value
diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s
index e7a1f91..819b0b3 100644
--- a/common/arm/ih264_padding_neon.s
+++ b/common/arm/ih264_padding_neon.s
@@ -88,7 +88,7 @@ ih264_pad_top_a9q:
stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
sub r5, r0, r1
- rsb r6, r1, #0
+ neg r6, r1
loop_neon_memcpy_mul_16:
@ Load 16 bytes
diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s
index 33859e6..304bd8a 100644
--- a/common/arm/ih264_weighted_bi_pred_a9q.s
+++ b/common/arm/ih264_weighted_bi_pred_a9q.s
@@ -144,7 +144,7 @@ ih264_weighted_bi_pred_luma_a9q:
ldr r4, [sp, #40] @Load src_strd2 in r4
ldr r5, [sp, #44] @Load dst_strd in r5
sxtb r9, r9 @sign-extend 8-bit ofst1 to 32-bit
- rsb r10, r6, #0 @r13 = -(log_wd + 1)
+ neg r10, r6 @r10 = -(log_wd + 1)
ldr r11, [sp, #68] @Load ht in r11
ldr r12, [sp, #72] @Load wd in r12
vdup.16 q0, r10 @Q0 = -(log_wd + 1) (32-bit)
@@ -456,7 +456,7 @@ ih264_weighted_bi_pred_chroma_a9q:
ldr r9, [sp, #60] @Load ofst1 in r9
ldr r10, [sp, #64] @Load ofst2 in r10
- rsb r12, r6, #0 @r12 = -(log_wd + 1)
+ neg r12, r6 @r12 = -(log_wd + 1)
ldr r4, [sp, #40] @Load src_strd2 in r4
ldr r5, [sp, #44] @Load dst_strd in r5
vdup.16 q0, r12 @Q0 = -(log_wd + 1) (16-bit)
diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s
index 81d26d4..80c2c6d 100644
--- a/common/arm/ih264_weighted_pred_a9q.s
+++ b/common/arm/ih264_weighted_pred_a9q.s
@@ -122,7 +122,7 @@ ih264_weighted_pred_luma_a9q:
vpush {d8-d15}
vdup.16 d2, r5 @D2 = wt (16-bit)
- rsb r9, r4, #0 @r9 = -log_wd
+ neg r9, r4 @r9 = -log_wd
vdup.8 d3, r6 @D3 = ofst (8-bit)
cmp r8, #16 @check if wd is 16
vdup.16 q0, r9 @Q0 = -log_wd (16-bit)
@@ -349,7 +349,7 @@ ih264_weighted_pred_chroma_a9q:
ldr r6, [sp, #36] @Load ofst = {ofst_u (8-bit), ofst_v (8-bit)}
ldr r8, [sp, #44] @Load wd
- rsb r9, r4, #0 @r9 = -log_wd
+ neg r9, r4 @r9 = -log_wd
vdup.32 q1, r5 @Q1 = {wt_u (16-bit), wt_v (16-bit)}
ldr r7, [sp, #40] @Load ht
vpush {d8-d15}
diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s
index a4dbd23..b7f2d58 100644
--- a/common/armv8/ih264_deblk_chroma_av8.s
+++ b/common/armv8/ih264_deblk_chroma_av8.s
@@ -56,19 +56,19 @@
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha_cb
+//* @param[in] w2 - alpha_cb
//* Alpha Value for the boundary in U
//*
-//* @param[in] x3 - beta_cb
+//* @param[in] w3 - beta_cb
//* Beta Value for the boundary in U
//*
-//* @param[in] sp(0) - alpha_cr
+//* @param[in] w4 - alpha_cr
//* Alpha Value for the boundary in V
//*
-//* @param[in] sp(4) - beta_cr
+//* @param[in] w5 - beta_cr
//* Beta Value for the boundary in V
//*
//* @returns
@@ -87,6 +87,7 @@ ih264_deblk_chroma_horz_bs4_av8:
// STMFD sp!,{x4-x6,x14} //
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x1, w1
mov x6, x5
mov x5, x4
sub x0, x0, x1, lsl #1 //x0 = uc_edgePixel pointing to p1 of chroma
@@ -155,19 +156,19 @@ ih264_deblk_chroma_horz_bs4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha_cb
+//* @param[in] w2 - alpha_cb
//* Alpha Value for the boundary in U
//*
-//* @param[in] x3 - beta_cb
+//* @param[in] w3 - beta_cb
//* Beta Value for the boundary in U
//*
-//* @param[in] sp(0) - alpha_cr
+//* @param[in] w4 - alpha_cr
//* Alpha Value for the boundary in V
//*
-//* @param[in] sp(4) - beta_cr
+//* @param[in] w5 - beta_cr
//* Beta Value for the boundary in V
//*
//* @returns
@@ -186,12 +187,13 @@ ih264_deblk_chroma_vert_bs4_av8:
// STMFD sp!,{x4,x5,x12,x14}
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x1, w1
sub x0, x0, #4 //point x0 to p1u of row0.
mov x12, x0 //keep a back up of x0 for buffer write
- add x2, x2, x4, lsl #8 //x2 = (alpha_cr,alpha_cb)
- add x3, x3, x5, lsl #8 //x3 = (beta_cr,beta_cb)
+ add w2, w2, w4, lsl #8 //w2 = (alpha_cr,alpha_cb)
+ add w3, w3, w5, lsl #8 //w3 = (beta_cr,beta_cb)
ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
@@ -292,28 +294,28 @@ ih264_deblk_chroma_vert_bs4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha_cb
+//* @param[in] w2 - alpha_cb
//* Alpha Value for the boundary in U
//*
-//* @param[in] x3 - beta_cb
+//* @param[in] w3 - beta_cb
//* Beta Value for the boundary in U
//*
-//* @param[in] sp(0) - alpha_cr
+//* @param[in] w4 - alpha_cr
//* Alpha Value for the boundary in V
//*
-//* @param[in] sp(4) - beta_cr
+//* @param[in] w5 - beta_cr
//* Beta Value for the boundary in V
//*
-//* @param[in] sp(8) - u4_bs
+//* @param[in] w6 - u4_bs
//* Packed Boundary strength array
//*
-//* @param[in] sp(12) - pu1_cliptab_cb
+//* @param[in] x7 - pu1_cliptab_cb
//* tc0_table for U
//*
-//* @param[in] sp(16) - pu1_cliptab_cr
+//* @param[in] sp(0) - pu1_cliptab_cr
//* tc0_table for V
//*
//* @returns
@@ -332,14 +334,13 @@ ih264_deblk_chroma_horz_bslt4_av8:
// STMFD sp!,{x4-x9,x14} //
push_v_regs
stp x19, x20, [sp, #-16]!
- mov x8, x7
- mov x7, x6
- ldr x9, [sp, #80]
+ sxtw x1, w1
+ ldr x8, [sp, #80]
sub x0, x0, x1, lsl #1 //x0 = uc_edgePixelU pointing to p1 of chroma U
- rev w7, w7 //
- mov v12.s[0], w7 //D12[0] = ui_Bs
- ld1 {v16.s}[0], [x8] //D16[0] contains cliptab_cb
- ld1 {v17.s}[0], [x9] //D17[0] contains cliptab_cr
+ rev w6, w6 //
+ mov v12.s[0], w6 //D12[0] = ui_Bs
+ ld1 {v16.s}[0], [x7] //D16[0] contains cliptab_cb
+ ld1 {v17.s}[0], [x8] //D17[0] contains cliptab_cr
ld2 {v6.8b, v7.8b}, [x0], x1 //Q3=p1
tbl v14.8b, {v16.16b}, v12.8b //Retreiving cliptab values for U
tbl v28.8b, {v17.16b}, v12.8b //Retrieving cliptab values for V
@@ -428,28 +429,28 @@ ih264_deblk_chroma_horz_bslt4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha_cb
+//* @param[in] w2 - alpha_cb
//* Alpha Value for the boundary in U
//*
-//* @param[in] x3 - beta_cb
+//* @param[in] w3 - beta_cb
//* Beta Value for the boundary in U
//*
-//* @param[in] sp(0) - alpha_cr
+//* @param[in] w4 - alpha_cr
//* Alpha Value for the boundary in V
//*
-//* @param[in] sp(4) - beta_cr
+//* @param[in] w5 - beta_cr
//* Beta Value for the boundary in V
//*
-//* @param[in] sp(8) - u4_bs
+//* @param[in] w6 - u4_bs
//* Packed Boundary strength array
//*
-//* @param[in] sp(12) - pu1_cliptab_cb
+//* @param[in] x7 - pu1_cliptab_cb
//* tc0_table for U
//*
-//* @param[in] sp(16) - pu1_cliptab_cr
+//* @param[in] sp(0) - pu1_cliptab_cr
//* tc0_table for V
//*
//* @returns
@@ -468,11 +469,12 @@ ih264_deblk_chroma_vert_bslt4_av8:
// STMFD sp!,{x4-x7,x10-x12,x14}
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x1, w1
mov x10, x7
- ldr x11, [sp, #80] //x6 = u4_bs
+ ldr x11, [sp, #80] //x11 = u4_bs
sub x0, x0, #4 //point x0 to p1u of row0.
- add x2, x2, x4, lsl #8
- add x3, x3, x5, lsl #8
+ add w2, w2, w4, lsl #8
+ add w3, w3, w5, lsl #8
mov x12, x0 //keep a back up of x0 for buffer write
ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s
index 1b3950d..7705df2 100644
--- a/common/armv8/ih264_deblk_luma_av8.s
+++ b/common/armv8/ih264_deblk_luma_av8.s
@@ -60,19 +60,19 @@
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha
+//* @param[in] w2 - alpha
//* Alpha Value for the boundary
//*
-//* @param[in] x3 - beta
+//* @param[in] w3 - beta
//* Beta Value for the boundary
//*
-//* @param[in] sp(0) - u4_bs
+//* @param[in] w4 - u4_bs
//* Packed Boundary strength array
//*
-//* @param[in] sp(4) - pu1_cliptab
+//* @param[in] x5 - pu1_cliptab
//* tc0_table
//*
//* @returns
@@ -90,6 +90,7 @@ ih264_deblk_luma_horz_bslt4_av8:
// STMFD sp!,{x4-x7,x14}
push_v_regs
+ sxtw x1, w1
stp x19, x20, [sp, #-16]!
//LDRD x4,x5,[SP,#0x14] //x4 = ui_Bs , x5 = *puc_ClpTab
@@ -214,13 +215,13 @@ ih264_deblk_luma_horz_bslt4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha
+//* @param[in] w2 - alpha
//* Alpha Value for the boundary
//*
-//* @param[in] x3 - beta
+//* @param[in] w3 - beta
//* Beta Value for the boundary
//*
//* @returns
@@ -240,6 +241,7 @@ ih264_deblk_luma_horz_bs4_av8:
// STMFD sp!,{x12,x14}
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x1, w1
// Init
dup v0.16b, w2 //duplicate alpha
@@ -401,19 +403,19 @@ ih264_deblk_luma_horz_bs4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha
+//* @param[in] w2 - alpha
//* Alpha Value for the boundary
//*
-//* @param[in] x3 - beta
+//* @param[in] w3 - beta
//* Beta Value for the boundary
//*
-//* @param[in] sp(0) - u4_bs
+//* @param[in] w4 - u4_bs
//* Packed Boundary strength array
//*
-//* @param[in] sp(4) - pu1_cliptab
+//* @param[in] x5 - pu1_cliptab
//* tc0_table
//*
//* @returns
@@ -432,6 +434,7 @@ ih264_deblk_luma_vert_bslt4_av8:
// STMFD sp!,{x12,x14}
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x1, w1
sub x0, x0, #4 //pointer uc_edgePixel-4
mov x12, x4
@@ -743,13 +746,13 @@ ih264_deblk_luma_vert_bslt4_av8:
//* @param[in] x0 - pu1_src
//* Pointer to the src sample q0
//*
-//* @param[in] x1 - src_strd
+//* @param[in] w1 - src_strd
//* Source stride
//*
-//* @param[in] x2 - alpha
+//* @param[in] w2 - alpha
//* Alpha Value for the boundary
//*
-//* @param[in] x3 - beta
+//* @param[in] w3 - beta
//* Beta Value for the boundary
//*
//* @returns
diff --git a/common/armv8/ih264_default_weighted_pred_av8.s b/common/armv8/ih264_default_weighted_pred_av8.s
index 6823015..d10047e 100644
--- a/common/armv8/ih264_default_weighted_pred_av8.s
+++ b/common/armv8/ih264_default_weighted_pred_av8.s
@@ -88,18 +88,18 @@
// WORD32 src_strd1,
// WORD32 src_strd2,
// WORD32 dst_strd,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src1
// x1 => puc_src2
// x2 => puc_dst
-// x3 => src_strd1
-// [sp] => src_strd2 (x4)
-// [sp+4] => dst_strd (x5)
-// [sp+8] => ht (x6)
-// [sp+12] => wd (x7)
+// w3 => src_strd1
+// w4 => src_strd2
+// w5 => dst_strd
+// w6 => ht
+// w7 => wd
//
.text
.p2align 2
@@ -113,6 +113,9 @@ ih264_default_weighted_pred_luma_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
cmp w7, #16
beq loop_16 //branch if wd is 16
cmp w7, #8
@@ -263,18 +266,18 @@ end_loops:
// WORD32 src_strd1,
// WORD32 src_strd2,
// WORD32 dst_strd,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src1
// x1 => puc_src2
// x2 => puc_dst
-// x3 => src_strd1
-// [sp] => src_strd2 (x4)
-// [sp+4] => dst_strd (x5)
-// [sp+8] => ht (x6)
-// [sp+12] => wd (x7)
+// w3 => src_strd1
+// w4 => src_strd2
+// w5 => dst_strd
+// w6 => ht
+// w7 => wd
//
@@ -286,6 +289,9 @@ ih264_default_weighted_pred_chroma_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
cmp w7, #8
beq loop_8_uv //branch if wd is 8
cmp w7, #4
diff --git a/common/armv8/ih264_inter_pred_chroma_av8.s b/common/armv8/ih264_inter_pred_chroma_av8.s
index 714e271..f6aef40 100644
--- a/common/armv8/ih264_inter_pred_chroma_av8.s
+++ b/common/armv8/ih264_inter_pred_chroma_av8.s
@@ -91,19 +91,19 @@
// UWORD8 *pu1_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
-// UWORD8 u1_dx,
-// UWORD8 u1_dy,
+// WORD32 u1_dx,
+// WORD32 u1_dy,
// WORD32 ht,
// WORD32 wd)
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => u1_dx
-// x5 => u1_dy
-// x6 => height
-// x7 => width
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => u1_dx
+// w5 => u1_dy
+// w6 => height
+// w7 => width
//
.text
.p2align 2
@@ -120,6 +120,12 @@ ih264_inter_pred_chroma_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
+ sxtw x6, w6
+ sxtw x7, w7
diff --git a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
index 6ad463a..e7c9f86 100644
--- a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
+++ b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
@@ -89,10 +89,10 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
.text
.p2align 2
@@ -111,6 +111,10 @@ ih264_inter_pred_luma_horz_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
sub x0, x0, #2 //pu1_src-2
sub x14, x4, #16
movi v0.8b, #5 //filter coeff
diff --git a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
index 9564f99..711d73e 100644
--- a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
+++ b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
@@ -89,10 +89,10 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
.text
.p2align 2
@@ -108,6 +108,10 @@ ih264_inter_pred_luma_vert_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
diff --git a/common/armv8/ih264_inter_pred_luma_copy_av8.s b/common/armv8/ih264_inter_pred_luma_copy_av8.s
index 1a76c1c..007df30 100644
--- a/common/armv8/ih264_inter_pred_luma_copy_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_copy_av8.s
@@ -65,10 +65,10 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x7 => ht
-// x12 => wd
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
.text
.p2align 2
@@ -82,6 +82,10 @@ ih264_inter_pred_luma_copy_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
mov x12, x5
mov x7, x4
@@ -228,14 +232,16 @@ end_inner_loop_wd_16:
// Register Usage
// x0 : pi2_src
// x1 : pu1_out
-// x2 : src_strd
-// x3 : out_strd
+// w2 : src_strd
+// w3 : out_strd
// Neon registers d0-d7, d16-d30 are used
// No need for pushing arm and neon registers
.global ih264_interleave_copy_av8
ih264_interleave_copy_av8:
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
ld1 {v2.8b}, [x0], x2 //load src plane 1 => d2 &pred palne 2 => d3
ld1 {v3.8b}, [x0], x2
mov v2.d[1], v3.d[0]
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
index d2897b6..dd4383e 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
@@ -52,10 +52,10 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
.text
@@ -71,6 +71,10 @@ ih264_inter_pred_luma_horz_hpel_vert_hpel_av8:
//store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
sub x0, x0, #2 //pu1_src-2
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
index 546c807..3563ac0 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
@@ -105,12 +105,12 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
-// x7 => dydx
-// x9 => *pu1_tmp
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
+// x6 => *pu1_tmp
+// w7 => dydx
.text
.p2align 2
@@ -126,6 +126,10 @@ ih264_inter_pred_luma_horz_hpel_vert_qpel_av8:
// store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
@@ -134,7 +138,8 @@ ih264_inter_pred_luma_horz_hpel_vert_qpel_av8:
mov x9, x6
- lsr x7, x7, #3 // dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit
+ // by writing to w7 here, we clear the upper half of x7
+ lsr w7, w7, #3 // dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit
add x7, x7, #2
mov x6, #48
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
index 39e3253..38268c7 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
@@ -94,11 +94,11 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
-// x7 => dydx
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
+// w7 => dydx
.text
.p2align 2
@@ -114,6 +114,10 @@ ih264_inter_pred_luma_horz_qpel_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
and x7, x7, #3 //Finds x-offset
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
index 3f3e297..6ccf11f 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
@@ -105,12 +105,12 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
-// x6 => dydx
-// x9 => *pu1_tmp
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
+// x6 => *pu1_tmp
+// w7 => dydx
.text
.p2align 2
@@ -125,11 +125,15 @@ ih264_inter_pred_luma_horz_qpel_vert_hpel_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
sub x0, x0, #2 //pu1_src-2
mov x9, x6
- mov x6, x7
+ mov w6, w7
and x6, x6, #2 // dydx & 0x3 followed by dydx>>1 and dydx<<1
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
index ab663d0..a9dfbd1 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
@@ -104,11 +104,11 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
-// x6 => dydx
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
+// w7 => dydx
.text
.p2align 2
@@ -122,7 +122,11 @@ ih264_inter_pred_luma_horz_qpel_vert_qpel_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
- mov x6, x7
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
+ mov w6, w7
and x7, x6, #3
add x7, x0, x7, lsr #1 //pu1_pred_vert = pu1_src + (x_offset>>1)
diff --git a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
index 9d19a2d..014faca 100644
--- a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
@@ -94,11 +94,11 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ht
-// x5 => wd
-// x7 => dydx
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ht
+// w5 => wd
+// w7 => dydx
.text
.p2align 2
@@ -112,6 +112,10 @@ ih264_inter_pred_luma_vert_qpel_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x2, w2
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
and x7, x7, #12 //Finds y-offset
diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s
index 8f0f282..39c0256 100644
--- a/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -100,9 +100,9 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
@@ -113,13 +113,14 @@ ih264_intra_pred_chroma_8x8_mode_dc_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
- mov x19, #5
- ands x6, x4, x19
+ mov w19, #5
+ ands w6, w4, w19
beq none_available
- cmp x6, #1
+ cmp w6, #1
beq left_only_available
- cmp x6, #4
+ cmp w6, #4
beq top_only_available
all_available:
@@ -251,9 +252,9 @@ end_func:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_chroma_8x8_mode_horz_av8
@@ -263,6 +264,7 @@ ih264_intra_pred_chroma_8x8_mode_horz_av8:
push_v_regs
+ sxtw x3, w3
ld1 {v0.8h}, [x0]
dup v10.8h, v0.h[7]
@@ -332,9 +334,9 @@ ih264_intra_pred_chroma_8x8_mode_horz_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_chroma_8x8_mode_vert_av8
@@ -342,6 +344,7 @@ ih264_intra_pred_chroma_8x8_mode_horz_av8:
ih264_intra_pred_chroma_8x8_mode_vert_av8:
push_v_regs
+ sxtw x3, w3
add x0, x0, #18
ld1 {v0.8b, v1.8b}, [x0]
@@ -405,15 +408,16 @@ ih264_intra_pred_chroma_8x8_mode_vert_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_chroma_8x8_mode_plane_av8
ih264_intra_pred_chroma_8x8_mode_plane_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 {v0.2s}, [x0]
add x10, x0, #10
@@ -457,18 +461,14 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
rshrn v13.4h, v26.4s, #6
rshrn v14.4h, v28.4s, #6
ldrb w6, [x0], #1
- sxtw x6, w6
add x10, x0, #31
ldrb w8, [x0], #1
- sxtw x8, w8
ldrb w7, [x10], #1
- sxtw x7, w7
ldrb w9, [x10], #1
- sxtw x9, w9
- add x6, x6, x7
- add x8, x8, x9
- lsl x6, x6, #4
- lsl x8, x8, #4
+ add w6, w6, w7
+ add w8, w8, w9
+ lsl w6, w6, #4
+ lsl w8, w8, #4
dup v0.8h, w6
dup v2.8h, w8
dup v4.8h, v12.h[0]
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index c1847b5..fa19c12 100644
--- a/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -98,9 +98,9 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_vert_av8
@@ -108,6 +108,7 @@
ih264_intra_pred_luma_16x16_mode_vert_av8:
push_v_regs
+ sxtw x3, w3
add x0, x0, #17
@@ -181,9 +182,9 @@ ih264_intra_pred_luma_16x16_mode_vert_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_horz_av8
@@ -192,6 +193,7 @@ ih264_intra_pred_luma_16x16_mode_horz_av8:
push_v_regs
+ sxtw x3, w3
ld1 {v0.16b}, [x0]
@@ -283,9 +285,9 @@ ih264_intra_pred_luma_16x16_mode_horz_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_dc_av8
@@ -295,18 +297,19 @@ ih264_intra_pred_luma_16x16_mode_dc_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
sub v0.16b, v0.16b, v0.16b
sub v1.16b, v1.16b, v1.16b
mov w10, #0
mov w11 , #3
- ands x6, x4, #0x01
+ ands w6, w4, #0x01
beq top_available //LEFT NOT AVAILABLE
ld1 {v0.16b}, [x0]
add w10, w10, #8
add w11, w11, #1
top_available:
- ands x6, x4, #0x04
+ ands w6, w4, #0x04
beq none_available
add x6, x0, #17
ld1 {v1.16b}, [x6]
@@ -314,7 +317,7 @@ top_available:
add w11, w11, #1
b summation
none_available:
- cmp x4, #0
+ cmp w4, #0
bne summation
mov w15, #128
dup v20.16b, w15
@@ -410,15 +413,16 @@ end_func:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_16x16_mode_plane_av8
ih264_intra_pred_luma_16x16_mode_plane_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
mov x2, x1
add x1, x0, #17
add x0, x0, #15
@@ -440,76 +444,58 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
uxtl v18.8h, v7.8b
add x7, x0, x4, lsl #3
sub x0, x7, x4, lsl #1
- sub x20, x4, #0x0
- neg x14, x20
+ neg x14, x4
addp v0.8h, v0.8h, v1.8h
ldrb w8, [x7], #-1
- sxtw x8, w8
ldrb w9, [x0], #1
- sxtw x9, w9
saddlp v0.2s, v0.4h
- sub x12, x8, x9
+ sub w12, w8, w9
ldrb w8, [x7], #-1
- sxtw x8, w8
saddlp v0.1d, v0.2s
ldrb w9, [x0], #1
- sxtw x9, w9
- sub x8, x8, x9
+ sub w8, w8, w9
shl v2.2s, v0.2s, #2
- add x12, x12, x8, lsl #1
+ add w12, w12, w8, lsl #1
add v0.2s, v0.2s , v2.2s
ldrb w8, [x7], #-1
- sxtw x8, w8
ldrb w9, [x0], #1
- sxtw x9, w9
srshr v0.2s, v0.2s, #6 // i_b = D0[0]
- sub x8, x8, x9
+ sub w8, w8, w9
ldrb w5, [x7], #-1
- sxtw x5, w5
- add x8, x8, x8, lsl #1
+ add w8, w8, w8, lsl #1
dup v4.8h, v0.h[0]
- add x12, x12, x8
+ add w12, w12, w8
ldrb w9, [x0], #1
- sxtw x9, w9
mul v0.8h, v4.8h , v16.8h
- sub x5, x5, x9
+ sub w5, w5, w9
mul v2.8h, v4.8h , v18.8h
- add x12, x12, x5, lsl #2
+ add w12, w12, w5, lsl #2
ldrb w8, [x7], #-1
- sxtw x8, w8
ldrb w9, [x0], #1
- sxtw x9, w9
- sub x8, x8, x9
+ sub w8, w8, w9
ldrb w5, [x7], #-1
- sxtw x5, w5
- add x8, x8, x8, lsl #2
+ add w8, w8, w8, lsl #2
ldrb w6, [x0], #1
- sxtw x6, w6
- add x12, x12, x8
+ add w12, w12, w8
ldrb w8, [x7], #-1
- sxtw x8, w8
ldrb w9, [x0], #1
- sxtw x9, w9
- sub x5, x5, x6
- sub x8, x8, x9
- add x5, x5, x5, lsl #1
- sub x20, x8, x8, lsl #3
- neg x8, x20
- add x12, x12, x5, lsl #1
+ sub w5, w5, w6
+ sub w8, w8, w9
+ add w5, w5, w5, lsl #1
+ sub w20, w8, w8, lsl #3
+ neg w8, w20
+ add w12, w12, w5, lsl #1
ldrb w5, [x7], #-1
- sxtw x5, w5
ldrb w6, [x10] //top_left
- sxtw x6, w6
- add x12, x12, x8
- sub x9, x5, x6
+ add w12, w12, w8
+ sub w9, w5, w6
ldrb w6, [x1, #7]
- sxtw x6, w6
- add x12, x12, x9, lsl #3 // i_c = x12
- add x8, x5, x6
- add x12, x12, x12, lsl #2
- lsl x8, x8, #4 // i_a = x8
- add x12, x12, #0x20
- lsr x12, x12, #6
+ add w12, w12, w9, lsl #3 // i_c = w12
+ add w8, w5, w6
+ add w12, w12, w12, lsl #2
+ lsl w8, w8, #4 // i_a = w8
+ add w12, w12, #0x20
+ lsr w12, w12, #6
shl v28.8h, v4.8h, #3
dup v6.8h, w12
dup v30.8h, w8
diff --git a/common/armv8/ih264_intra_pred_luma_4x4_av8.s b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
index 62e8cee..1f95131 100644
--- a/common/armv8/ih264_intra_pred_luma_4x4_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
@@ -102,15 +102,16 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_av8
ih264_intra_pred_luma_4x4_mode_vert_av8:
push_v_regs
+ sxtw x3, w3
add x0, x0, #5
@@ -171,9 +172,9 @@ ih264_intra_pred_luma_4x4_mode_vert_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
@@ -182,6 +183,7 @@ ih264_intra_pred_luma_4x4_mode_vert_av8:
ih264_intra_pred_luma_4x4_mode_horz_av8:
push_v_regs
+ sxtw x3, w3
ld1 {v1.s}[0], [x0]
dup v0.8b, v1.b[3]
@@ -246,9 +248,9 @@ ih264_intra_pred_luma_4x4_mode_horz_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
@@ -261,41 +263,34 @@ ih264_intra_pred_luma_4x4_mode_dc_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
- ands x5, x4, #0x01
+ ands w5, w4, #0x01
beq top_available //LEFT NOT AVAILABLE
add x10, x0, #3
mov x2, #-1
ldrb w5, [x10], #-1
- sxtw x5, w5
ldrb w6, [x10], #-1
- sxtw x6, w6
ldrb w7, [x10], #-1
- sxtw x7, w7
- add x5, x5, x6
+ add w5, w5, w6
ldrb w8, [x10], #-1
- sxtw x8, w8
- add x5, x5, x7
- ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
- add x5, x5, x8
+ add w5, w5, w7
+ ands w11, w4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ add w5, w5, w8
beq left_available
add x10, x0, #5
// BOTH LEFT AND TOP AVAILABLE
ldrb w6, [x10], #1
- sxtw x6, w6
ldrb w7, [x10], #1
- sxtw x7, w7
- add x5, x5, x6
+ add w5, w5, w6
ldrb w8, [x10], #1
- sxtw x8, w8
- add x5, x5, x7
+ add w5, w5, w7
ldrb w9, [x10], #1
- sxtw x9, w9
- add x5, x5, x8
- add x5, x5, x9
- add x5, x5, #4
- lsr x5, x5, #3
+ add w5, w5, w8
+ add w5, w5, w9
+ add w5, w5, #4
+ lsr w5, w5, #3
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
@@ -304,23 +299,19 @@ ih264_intra_pred_luma_4x4_mode_dc_av8:
b end_func
top_available: // ONLT TOP AVAILABLE
- ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ ands w11, w4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
beq none_available
add x10, x0, #5
ldrb w6, [x10], #1
- sxtw x6, w6
ldrb w7, [x10], #1
- sxtw x7, w7
ldrb w8, [x10], #1
- sxtw x8, w8
- add x5, x6, x7
+ add w5, w6, w7
ldrb w9, [x10], #1
- sxtw x9, w9
- add x5, x5, x8
- add x5, x5, x9
- add x5, x5, #2
- lsr x5, x5, #2
+ add w5, w5, w8
+ add w5, w5, w9
+ add w5, w5, #2
+ lsr w5, w5, #2
dup v0.8b, w5
st1 {v0.s}[0], [x1], x3
st1 {v0.s}[0], [x1], x3
@@ -401,9 +392,9 @@ end_func:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
@@ -413,6 +404,7 @@ ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
add x0, x0, #5
sub x5, x3, #2
@@ -488,9 +480,9 @@ end_func_diag_dl:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
@@ -499,6 +491,7 @@ ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 {v0.8b}, [x0]
@@ -571,9 +564,9 @@ end_func_diag_dr:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_r_av8
@@ -582,6 +575,7 @@ ih264_intra_pred_luma_4x4_mode_vert_r_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 {v0.8b}, [x0]
@@ -656,9 +650,9 @@ end_func_vert_r:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_horz_d_av8
@@ -667,6 +661,7 @@ ih264_intra_pred_luma_4x4_mode_horz_d_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 {v0.8b}, [x0]
add x0, x0, #1
@@ -743,9 +738,9 @@ end_func_horz_d:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_vert_l_av8
@@ -754,6 +749,7 @@ ih264_intra_pred_luma_4x4_mode_vert_l_av8:
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
add x0, x0, #4
ld1 {v0.8b}, [x0]
add x0, x0, #1
@@ -825,9 +821,9 @@ end_func_vert_l:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_4x4_mode_horz_u_av8
@@ -835,11 +831,11 @@ end_func_vert_l:
ih264_intra_pred_luma_4x4_mode_horz_u_av8:
push_v_regs
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
mov x10, x0
ld1 {v0.8b}, [x0]
ldrb w9, [x0], #1
- sxtw x9, w9
ext v1.8b, v0.8b , v0.8b , #1
ld1 {v0.b}[7], [x10]
ext v2.8b, v1.8b , v1.8b , #1
diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index bf9a4c1..273aa81 100644
--- a/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -102,9 +102,9 @@
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_av8
@@ -114,6 +114,7 @@ ih264_intra_pred_luma_8x8_mode_vert_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
//stp x19, x20,[sp,#-16]!
+ sxtw x3, w3
add x0, x0, #9
ld1 {v0.8b}, [x0]
@@ -180,9 +181,9 @@ ih264_intra_pred_luma_8x8_mode_vert_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_av8
@@ -194,38 +195,30 @@ ih264_intra_pred_luma_8x8_mode_horz_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
add x0, x0, #7
- mov x2 , #-1
ldrb w5, [x0], #-1
- sxtw x5, w5
ldrb w6, [x0], #-1
- sxtw x6, w6
dup v0.8b, w5
st1 {v0.8b}, [x1], x3
ldrb w7, [x0], #-1
- sxtw x7, w7
dup v1.8b, w6
st1 {v1.8b}, [x1], x3
dup v2.8b, w7
ldrb w8, [x0], #-1
- sxtw x8, w8
dup v3.8b, w8
st1 {v2.8b}, [x1], x3
ldrb w5, [x0], #-1
- sxtw x5, w5
st1 {v3.8b}, [x1], x3
dup v0.8b, w5
ldrb w6, [x0], #-1
- sxtw x6, w6
st1 {v0.8b}, [x1], x3
ldrb w7, [x0], #-1
- sxtw x7, w7
dup v1.8b, w6
dup v2.8b, w7
st1 {v1.8b}, [x1], x3
ldrb w8, [x0], #-1
- sxtw x8, w8
dup v3.8b, w8
st1 {v2.8b}, [x1], x3
st1 {v3.8b}, [x1], x3
@@ -285,9 +278,9 @@ ih264_intra_pred_luma_8x8_mode_horz_av8:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_dc_av8
@@ -298,37 +291,30 @@ ih264_intra_pred_luma_8x8_mode_dc_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
- ands x6, x4, #0x01
+ ands w6, w4, #0x01
beq top_available //LEFT NOT AVAILABLE
add x10, x0, #7
mov x2, #-1
ldrb w5, [x10], -1
- sxtw x5, w5
ldrb w6, [x10], -1
- sxtw x6, w6
ldrb w7, [x10], -1
- sxtw x7, w7
- add x5, x5, x6
+ add w5, w5, w6
ldrb w8, [x10], -1
- sxtw x8, w8
- add x5, x5, x7
+ add w5, w5, w7
ldrb w6, [x10], -1
- sxtw x6, w6
- add x5, x5, x8
+ add w5, w5, w8
ldrb w7, [x10], -1
- sxtw x7, w7
- add x5, x5, x6
+ add w5, w5, w6
ldrb w8, [x10], -1
- sxtw x8, w8
- add x5, x5, x7
- ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
- add x5, x5, x8
+ add w5, w5, w7
+ ands w11, w4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ add w5, w5, w8
ldrb w6, [x10], -1
- sxtw x6, w6
- add x5, x5, x6
+ add w5, w5, w6
beq left_available
add x10, x0, #9
// BOTH LEFT AND TOP AVAILABLE
@@ -351,7 +337,7 @@ ih264_intra_pred_luma_8x8_mode_dc_av8:
b end_func
top_available: // ONLT TOP AVAILABLE
- ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ ands w11, w4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
beq none_available
add x10, x0, #9
@@ -452,9 +438,9 @@ end_func:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_diag_dl_av8
@@ -463,6 +449,7 @@ ih264_intra_pred_luma_8x8_mode_diag_dl_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
add x0, x0, #9
sub x5, x3, #4
@@ -554,9 +541,9 @@ end_func_diag_dl:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_diag_dr_av8
@@ -566,6 +553,7 @@ ih264_intra_pred_luma_8x8_mode_diag_dr_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 { v0.16b}, [x0]
@@ -654,9 +642,9 @@ end_func_diag_dr:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_r_av8
@@ -666,6 +654,7 @@ ih264_intra_pred_luma_8x8_mode_vert_r_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 { v0.16b}, [x0]
mov v1.d[0], v0.d[1]
@@ -780,9 +769,9 @@ end_func_vert_r:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_d_av8
@@ -791,6 +780,7 @@ ih264_intra_pred_luma_8x8_mode_horz_d_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 { v0.16b}, [x0]
mov v1.d[0], v0.d[1]
@@ -910,9 +900,9 @@ end_func_horz_d:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_vert_l_av8
@@ -922,6 +912,7 @@ ih264_intra_pred_luma_8x8_mode_vert_l_av8:
// STMFD sp!, {x4-x12, x14} //Restoring registers from stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
add x0, x0, #9
ld1 { v0.16b}, [x0]
mov v1.d[0], v0.d[1]
@@ -1018,9 +1009,9 @@ end_func_vert_l:
//**************Variables Vs Registers*****************************************
// x0 => *pu1_src
// x1 => *pu1_dst
-// x2 => src_strd
-// x3 => dst_strd
-// x4 => ui_neighboravailability
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => ui_neighboravailability
.global ih264_intra_pred_luma_8x8_mode_horz_u_av8
@@ -1029,6 +1020,7 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
+ sxtw x3, w3
ld1 {v0.8b}, [x0]
ld1 {v1.b}[7], [x0]
diff --git a/common/armv8/ih264_iquant_itrans_recon_av8.s b/common/armv8/ih264_iquant_itrans_recon_av8.s
index 4c83036..003ee74 100644
--- a/common/armv8/ih264_iquant_itrans_recon_av8.s
+++ b/common/armv8/ih264_iquant_itrans_recon_av8.s
@@ -103,11 +103,11 @@
//x0 => *pi2_src
//x1 => *pu1_pred
//x2 => *pu1_out
-//x3 => pred_strd
-//x4 => out_strd
+//w3 => pred_strd
+//w4 => out_strd
//x5 => *pu2_iscal_mat
//x6 => *pu2_weigh_mat
-//x7 => u4_qp_div_6
+//w7 => u4_qp_div_6
// => pi4_tmp
// => iq_start_idx
// => pi2_dc_ld_addr
@@ -119,6 +119,8 @@
ih264_iquant_itrans_recon_4x4_av8:
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15
@@ -292,11 +294,11 @@ skip_loading_luma_dc_src:
//x0 => *pi2_src
//x1 => *pu1_pred
//x2 => *pu1_out
-//x3 => pred_strd
-//x4 => out_strd
+//w3 => pred_strd
+//w4 => out_strd
//x5 => *pu2_iscal_mat
//x6 => *pu2_weigh_mat
-//x7 => u4_qp_div_6
+//w7 => u4_qp_div_6
//sp => pi4_tmp
//sp#8 => *pi2_dc_src
@@ -315,6 +317,8 @@ ih264_iquant_itrans_recon_chroma_4x4_av8:
//reduce sp by 64
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15
@@ -512,11 +516,11 @@ ih264_iquant_itrans_recon_chroma_4x4_av8:
//x0 => *pi2_src
//x1 => *pu1_pred
//x2 => *pu1_out
-//x3 => pred_strd
-//x4 => out_strd
+//w3 => pred_strd
+//w4 => out_strd
//x5 => *pu2_iscal_mat
//x6 => *pu2_weigh_mat
-//x7 => u4_qp_div_6
+//w7 => u4_qp_div_6
//NOT USED => pi4_tmp
//NOT USED => iq_start_idx
//NOT USED => pi2_dc_ld_addr
@@ -525,6 +529,8 @@ ih264_iquant_itrans_recon_chroma_4x4_av8:
ih264_iquant_itrans_recon_8x8_av8:
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
ld1 {v8.8h -v11.8h}, [x5], #64
ld1 {v12.8h-v15.8h}, [x5]
diff --git a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
index 8bb9c32..13061ec 100644
--- a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
+++ b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
@@ -104,11 +104,11 @@
//x0 => *pi2_src
//x1 => *pu1_pred
//x2 => *pu1_out
-//x3 => pred_strd
-//x4 => out_strd
+//w3 => pred_strd
+//w4 => out_strd
//x5 => *pu2_iscal_mat
//x6 => *pu2_weigh_mat
-//x7 => u4_qp_div_6
+//w7 => u4_qp_div_6
// => pi4_tmp
// => iq_start_idx
// => pi2_dc_ld_addr
@@ -119,6 +119,8 @@
.global ih264_iquant_itrans_recon_4x4_dc_av8
ih264_iquant_itrans_recon_4x4_dc_av8:
+ sxtw x3, w3
+ sxtw x4, w4
ldr w8, [sp, #8] //Loads iq_start_idx
subs w8, w8, #1 // if x8 == 1 => intra case , so result of subtraction is zero and z flag is set
@@ -209,11 +211,11 @@ donot_use_pi2_src_luma_dc:
// x0 : pi2_src
// x1 : pu1_pred
// x2 : pu1_out
-// x3 : pred_strd
-// x4 : out_strd
+// w3 : pred_strd
+// w4 : out_strd
// x5 : pu2_iscal_mat
// x6 : pu2_weigh_mat
-// x7 : u4_qp_div_6
+// w7 : u4_qp_div_6
// : pi2_tmp
// : pi2_dc_src
// Neon registers d0-d7, d16-d30 are used
@@ -223,6 +225,8 @@ donot_use_pi2_src_luma_dc:
.global ih264_iquant_itrans_recon_chroma_4x4_dc_av8
ih264_iquant_itrans_recon_chroma_4x4_dc_av8:
+ sxtw x3, w3
+ sxtw x4, w4
ldr x0, [sp, #8]
push_v_regs
ld1 {v0.h}[0], [x0]
@@ -327,11 +331,11 @@ ih264_iquant_itrans_recon_chroma_4x4_dc_av8:
//x0 => *pi2_src
//x1 => *pu1_pred
//x2 => *pu1_out
-//x3 => pred_strd
-//x4 => out_strd
+//w3 => pred_strd
+//w4 => out_strd
//x5 => *pu2_iscal_mat
//x6 => *pu2_weigh_mat
-//x7 => u4_qp_div_6
+//w7 => u4_qp_div_6
//NOT USED => pi4_tmp
//NOT USED => iq_start_idx
//NOT USED => pi2_dc_ld_addr
@@ -340,6 +344,8 @@ ih264_iquant_itrans_recon_chroma_4x4_dc_av8:
ih264_iquant_itrans_recon_8x8_dc_av8:
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
ld1 {v1.h}[0], [x5]
ld1 {v2.h}[0], [x6]
diff --git a/common/armv8/ih264_mem_fns_neon_av8.s b/common/armv8/ih264_mem_fns_neon_av8.s
index 4e9020d..802550d 100644
--- a/common/armv8/ih264_mem_fns_neon_av8.s
+++ b/common/armv8/ih264_mem_fns_neon_av8.s
@@ -70,11 +70,11 @@
//*/
//void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
// UWORD8 *pu1_src,
-// UWORD8 num_bytes)
+// UWORD32 num_bytes)
//**************Variables Vs Registers*************************
// x0 => *pu1_dst
// x1 => *pu1_src
-// x2 => num_bytes
+// w2 => num_bytes
@@ -89,7 +89,7 @@ loop_neon_memcpy_mul_8:
ld1 {v0.8b}, [x1], #8
st1 {v0.8b}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bne loop_neon_memcpy_mul_8
ret
@@ -99,38 +99,36 @@ loop_neon_memcpy_mul_8:
//*/
//void ih264_memcpy(UWORD8 *pu1_dst,
// UWORD8 *pu1_src,
-// UWORD8 num_bytes)
+// UWORD32 num_bytes)
//**************Variables Vs Registers*************************
// x0 => *pu1_dst
// x1 => *pu1_src
-// x2 => num_bytes
+// w2 => num_bytes
.global ih264_memcpy_av8
ih264_memcpy_av8:
- subs x2, x2, #8
+ subs w2, w2, #8
blt arm_memcpy
loop_neon_memcpy:
// Memcpy 8 bytes
ld1 {v0.8b}, [x1], #8
st1 {v0.8b}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bge loop_neon_memcpy
- cmn x2, #8
+ cmn w2, #8
beq end_func1
arm_memcpy:
- add x2, x2, #8
+ add w2, w2, #8
loop_arm_memcpy:
ldrb w3, [x1], #1
- sxtw x3, w3
strb w3, [x0], #1
- sxtw x3, w3
- subs x2, x2, #1
+ subs w2, w2, #1
bne loop_arm_memcpy
ret
end_func1:
@@ -139,7 +137,7 @@ end_func1:
//void ih264_memset_mul_8(UWORD8 *pu1_dst,
// UWORD8 value,
-// UWORD8 num_bytes)
+// UWORD32 num_bytes)
//**************Variables Vs Registers*************************
// x0 => *pu1_dst
// x1 => value
@@ -156,7 +154,7 @@ loop_memset_mul_8:
// Memset 8 bytes
st1 {v0.8b}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bne loop_memset_mul_8
ret
@@ -164,36 +162,35 @@ loop_memset_mul_8:
//void ih264_memset(UWORD8 *pu1_dst,
// UWORD8 value,
-// UWORD8 num_bytes)
+// UWORD32 num_bytes)
//**************Variables Vs Registers*************************
// x0 => *pu1_dst
-// x1 => value
-// x2 => num_bytes
+// w1 => value
+// w2 => num_bytes
.global ih264_memset_av8
ih264_memset_av8:
- subs x2, x2, #8
+ subs w2, w2, #8
blt arm_memset
dup v0.8b, w1
loop_neon_memset:
// Memcpy 8 bytes
st1 {v0.8b}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bge loop_neon_memset
- cmn x2, #8
+ cmn w2, #8
beq end_func2
arm_memset:
- add x2, x2, #8
+ add w2, w2, #8
loop_arm_memset:
strb w1, [x0], #1
- sxtw x1, w1
- subs x2, x2, #1
+ subs w2, w2, #1
bne loop_arm_memset
ret
end_func2:
@@ -205,11 +202,11 @@ end_func2:
//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
// UWORD16 value,
-// UWORD8 num_words)
+// UWORD32 num_words)
//**************Variables Vs Registers*************************
// x0 => *pu2_dst
-// x1 => value
-// x2 => num_words
+// w1 => value
+// w2 => num_words
.global ih264_memset_16bit_mul_8_av8
@@ -224,7 +221,7 @@ loop_memset_16bit_mul_8:
st1 {v0.4h}, [x0], #8
st1 {v0.4h}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bne loop_memset_16bit_mul_8
ret
@@ -233,18 +230,18 @@ loop_memset_16bit_mul_8:
//void ih264_memset_16bit(UWORD16 *pu2_dst,
// UWORD16 value,
-// UWORD8 num_words)
+// UWORD32 num_words)
//**************Variables Vs Registers*************************
// x0 => *pu2_dst
-// x1 => value
-// x2 => num_words
+// w1 => value
+// w2 => num_words
.global ih264_memset_16bit_av8
ih264_memset_16bit_av8:
- subs x2, x2, #8
+ subs w2, w2, #8
blt arm_memset_16bit
dup v0.4h, w1
loop_neon_memset_16bit:
@@ -252,18 +249,17 @@ loop_neon_memset_16bit:
st1 {v0.4h}, [x0], #8
st1 {v0.4h}, [x0], #8
- subs x2, x2, #8
+ subs w2, w2, #8
bge loop_neon_memset_16bit
- cmn x2, #8
+ cmn w2, #8
beq end_func3
arm_memset_16bit:
- add x2, x2, #8
+ add w2, w2, #8
loop_arm_memset_16bit:
strh w1, [x0], #2
- sxtw x1, w1
- subs x2, x2, #1
+ subs w2, w2, #1
bne loop_arm_memset_16bit
ret
diff --git a/common/armv8/ih264_padding_neon_av8.s b/common/armv8/ih264_padding_neon_av8.s
index 35d9c8a..e03fe2f 100644
--- a/common/armv8/ih264_padding_neon_av8.s
+++ b/common/armv8/ih264_padding_neon_av8.s
@@ -76,9 +76,9 @@
// WORD32 pad_size)
//**************Variables Vs Registers*************************
// x0 => *pu1_src
-// x1 => src_strd
-// x2 => wd
-// x3 => pad_size
+// w1 => src_strd
+// w2 => wd
+// w3 => pad_size
.global ih264_pad_top_av8
@@ -86,25 +86,25 @@ ih264_pad_top_av8:
// STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x1, w1
stp x19, x20, [sp, #-16]!
sub x5, x0, x1
- sub x20, x1, #0
- neg x6, x20
+ neg x6, x1
loop_neon_memcpy_mul_16:
// Load 16 bytes
ld1 {v0.8b, v1.8b}, [x0], #16
mov x4, x5
- mov x7, x3
+ mov w7, w3
add x5, x5, #16
loop_neon_pad_top:
st1 {v0.8b, v1.8b}, [x4], x6
- subs x7, x7, #1
+ subs w7, w7, #1
bne loop_neon_pad_top
- subs x2, x2, #16
+ subs w2, w2, #16
bne loop_neon_memcpy_mul_16
// LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
@@ -160,9 +160,9 @@ loop_neon_pad_top:
// WORD32 pad_size)
//**************Variables Vs Registers*************************
// x0 => *pu1_src
-// x1 => src_strd
-// x2 => ht
-// x3 => pad_size
+// w1 => src_strd
+// w2 => ht
+// w3 => pad_size
@@ -172,6 +172,8 @@ ih264_pad_left_luma_av8:
// STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x1, w1
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
@@ -182,43 +184,35 @@ ih264_pad_left_luma_av8:
loop_16: // /*hard coded for width=16 ,height =8,16*/
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], x1 // 16 bytes store
dup v2.16b, w9
st1 {v2.16b}, [x4], x1 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
dup v4.16b, w10
dup v6.16b, w11
st1 {v4.16b}, [x4], x1 // 16 bytes store
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
st1 {v6.16b}, [x4], x1 // 16 bytes store
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], x1 // 16 bytes store
dup v2.16b, w9
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], x1 // 16 bytes store
dup v4.16b, w10
dup v6.16b, w11
- subs x2, x2, #8
+ subs w2, w2, #8
st1 {v4.16b}, [x4], x1 // 16 bytes store
st1 {v6.16b}, [x4], x1 // 16 bytes store
bne loop_16
@@ -227,14 +221,11 @@ loop_16: // /*hard coded for width=16 ,height =
loop_32: // /*hard coded for width=32 ,height =8,16*/
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.16b, w9
st1 {v0.16b}, [x4], x6
@@ -243,35 +234,30 @@ loop_32: // /*hard coded for width=32 ,height =8
st1 {v2.16b}, [x4], x6 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.16b, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
st1 {v6.16b}, [x4], #16 // 16 bytes store
dup v0.16b, w8
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
st1 {v6.16b}, [x4], x6 // 16 bytes store
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.16b, w9
st1 {v0.16b}, [x4], x6 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.16b, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.16b, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #8
+ subs w2, w2, #8
st1 {v6.16b}, [x4], #16 // 16 bytes store
st1 {v6.16b}, [x4], x6 // 16 bytes store
bne loop_32
@@ -333,9 +319,9 @@ end_func:
// WORD32 pad_size)
//{
// x0 => *pu1_src
-// x1 => src_strd
-// x2 => ht
-// x3 => pad_size
+// w1 => src_strd
+// w2 => ht
+// w3 => pad_size
@@ -345,6 +331,8 @@ ih264_pad_left_chroma_av8:
// STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x1, w1
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
sub x4, x0, x3
@@ -354,27 +342,23 @@ ih264_pad_left_chroma_av8:
loop_32_l_c: // /*hard coded for width=32 ,height =4,8,12*/
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.8h, w8
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6 // 16 bytes store
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
dup v6.8h, w11
st1 {v4.16b}, [x4], #16 // 16 bytes store
st1 {v4.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #4
+ subs w2, w2, #4
st1 {v6.16b}, [x4], #16 // 16 bytes store
st1 {v6.16b}, [x4], x6 // 16 bytes store
@@ -383,27 +367,23 @@ loop_32_l_c: // /*hard coded for width=32 ,height =
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.8h, w8
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
dup v6.8h, w11
st1 {v4.16b}, [x4], #16 // 16 bytes store
st1 {v4.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #4
+ subs w2, w2, #4
st1 {v6.16b}, [x4], #16 // 16 bytes store
st1 {v6.16b}, [x4], x6 // 16 bytes store
@@ -412,20 +392,16 @@ loop_32_l_c: // /*hard coded for width=32 ,height =
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.8h, w8
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
@@ -500,9 +476,9 @@ end_func_l_c:
//}
//
// x0 => *pu1_src
-// x1 => src_strd
-// x2 => ht
-// x3 => pad_size
+// w1 => src_strd
+// w2 => ht
+// w3 => pad_size
@@ -512,6 +488,8 @@ ih264_pad_right_luma_av8:
// STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x1, w1
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
mov x4, x0
@@ -522,43 +500,35 @@ ih264_pad_right_luma_av8:
loop_16_r: // /*hard coded for width=16 ,height =8,16*/
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], x1 // 16 bytes store
dup v2.16b, w9
st1 {v2.16b}, [x4], x1 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
dup v4.16b, w10
dup v6.16b, w11
st1 {v4.16b}, [x4], x1 // 16 bytes store
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
st1 {v6.16b}, [x4], x1 // 16 bytes store
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], x1 // 16 bytes store
dup v2.16b, w9
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], x1 // 16 bytes store
dup v4.16b, w10
dup v6.16b, w11
- subs x2, x2, #8
+ subs w2, w2, #8
st1 {v4.16b}, [x4], x1 // 16 bytes store
st1 {v6.16b}, [x4], x1 // 16 bytes store
bne loop_16_r
@@ -567,14 +537,11 @@ loop_16_r: // /*hard coded for width=16 ,height =8,16*/
loop_32_r: // /*hard coded for width=32 ,height =8,16*/
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.16b, w9
st1 {v0.16b}, [x4], x6
@@ -583,35 +550,30 @@ loop_32_r: // /*hard coded for width=32 ,height =
st1 {v2.16b}, [x4], x6 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.16b, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
ldrb w8, [x0]
add x0, x0, x1
- sxtw x8, w8
st1 {v6.16b}, [x4], #16 // 16 bytes store
ldrb w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.16b, w8
st1 {v6.16b}, [x4], x6 // 16 bytes store
ldrb w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.16b, w9
st1 {v0.16b}, [x4], x6 // 16 bytes store
ldrb w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.16b, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.16b, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #8
+ subs w2, w2, #8
st1 {v6.16b}, [x4], #16 // 16 bytes store
st1 {v6.16b}, [x4], x6 // 16 bytes store
bne loop_32_r
@@ -672,9 +634,9 @@ end_func_r:
// WORD32 ht,
// WORD32 pad_size)
// x0 => *pu1_src
-// x1 => src_strd
-// x2 => ht
-// x3 => pad_size
+// w1 => src_strd
+// w2 => ht
+// w3 => pad_size
@@ -684,6 +646,8 @@ ih264_pad_right_chroma_av8:
// STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x1, w1
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
mov x4, x0
@@ -692,24 +656,20 @@ ih264_pad_right_chroma_av8:
loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
dup v0.8h, w8
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #4
+ subs w2, w2, #4
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.8h, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
@@ -720,27 +680,23 @@ loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
dup v0.8h, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6 // 16 bytes store
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
st1 {v4.16b}, [x4], #16 // 16 bytes store
dup v6.8h, w11
st1 {v4.16b}, [x4], x6 // 16 bytes store
- subs x2, x2, #4
+ subs w2, w2, #4
st1 {v6.16b}, [x4], #16 // 16 bytes store
st1 {v6.16b}, [x4], x6 // 16 bytes store
@@ -748,20 +704,16 @@ loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/
bne loop_32_r_c
ldrh w8, [x0]
add x0, x0, x1
- sxtw x8, w8
dup v0.8h, w8
ldrh w9, [x0]
add x0, x0, x1
- sxtw x9, w9
ldrh w10, [x0]
add x0, x0, x1
- sxtw x10, w10
st1 {v0.16b}, [x4], #16 // 16 bytes store
dup v2.8h, w9
st1 {v0.16b}, [x4], x6 // 16 bytes store
ldrh w11, [x0]
add x0, x0, x1
- sxtw x11, w11
st1 {v2.16b}, [x4], #16 // 16 bytes store
dup v4.8h, w10
st1 {v2.16b}, [x4], x6 // 16 bytes store
diff --git a/common/armv8/ih264_resi_trans_quant_av8.s b/common/armv8/ih264_resi_trans_quant_av8.s
index 316c220..d2ba3cf 100644
--- a/common/armv8/ih264_resi_trans_quant_av8.s
+++ b/common/armv8/ih264_resi_trans_quant_av8.s
@@ -45,18 +45,6 @@
//* function name : ih264_resi_trans_quant_4x4
//* description : this function does cf4 of h264
//*
-//* arguments : x0 :pointer to src buffer
-// x1 :pointer to pred buffer
-// x2 :pointer to dst buffer
-// x3 :source stride
-// x4 :pred stride,
-// x5 :dst stride,
-// x6 :pointer to scaling matrix,
-// x7 :pointer to threshold matrix,
-// stack qbits,
-// rounding factor,
-// pointer to store nnz
-// pointer to store non quantized dc value
// values returned : none
//
// register usage :
@@ -77,34 +65,24 @@
.global ih264_resi_trans_quant_4x4_av8
ih264_resi_trans_quant_4x4_av8:
- //x0 :pointer to src buffer
- //x1 :pointer to pred buffer
- //x2 :pointer to dst buffer
- //x3 :source stride
- //x4 :pred stride
- //x5 :dst stride,
- //x6 :scale matirx,
- //x7 :threshold matrix
- // :qbits
- // :round factor
- // :nnz
- // :pointer to store non quantized dc value
push_v_regs
//x0 :pointer to src buffer
//x1 :pointer to pred buffer
//x2 :pointer to dst buffer
- //x3 :source stride
- //x4 :pred stride
- //x5 :scale matirx,
+ //w3 :source stride
+ //w4 :pred stride
+ //w5 :scale matirx,
//x6 :threshold matrix
- //x7 :qbits
- //x8 :round factor
+ //w7 :qbits
+ //w8 :round factor
//x9 :nnz
//x10 :pointer to store non quantized dc value
+ sxtw x3, w3
+ sxtw x4, w4
ldr w8, [sp, #64] //load round factor
ldr x10, [sp, #80] //load addres for non quant val
- neg x7, x7 //negate the qbit value for usiing lsl
+ neg w7, w7 //negate the qbit value for usiing lsl
ldr x9, [sp, #72]
//------------fucntion loading done----------------;
@@ -259,18 +237,6 @@ ih264_resi_trans_quant_4x4_av8:
//* description : this function does residue calculation, forward transform
//* and quantization for 4x4 chroma block.
//*
-//* arguments : x0 :pointer to src buffer
-// x1 :pointer to pred buffer
-// x2 :pointer to dst buffer
-// x3 :source stride
-// x4 :pred stride,
-// x5 :dst stride,
-// x6 :pointer to scaling matrix,
-// x7 :pointer to threshold matrix,
-// stack qbits,
-// rounding factor,
-// pointer to store nnz
-// pointer to store unquantized dc values
// values returned : none
//
// register usage :
@@ -290,33 +256,24 @@ ih264_resi_trans_quant_4x4_av8:
.global ih264_resi_trans_quant_chroma_4x4_av8
ih264_resi_trans_quant_chroma_4x4_av8:
- //x0 :pointer to src buffer
- //x1 :pointer to pred buffer
- //x2 :pointer to dst buffer
- //x3 :source stride
- //stack :pred stride
- // :scale matirx,
- // :threshold matrix
- // :qbits
- // :round factor
- // :nnz
- // :pu1_dc_alt_addr
push_v_regs
//x0 :pointer to src buffer
//x1 :pointer to pred buffer
//x2 :pointer to dst buffer
- //x3 :source stride
- //x4 :pred stride
+ //w3 :source stride
+ //w4 :pred stride
//x5 :scale matirx,
//x6 :threshold matrix
- //x7 :qbits
- //x8 :round factor
+ //w7 :qbits
+ //w8 :round factor
//x9 :nnz
//x10 :pointer to store non quantized dc value
+ sxtw x3, w3
+ sxtw x4, w4
ldr w8, [sp, #64] //load round factor
ldr x10, [sp, #80] //load addres for non quant val
- neg x7, x7 //negate the qbit value for usiing lsl
+ neg w7, w7 //negate the qbit value for usiing lsl
ldr x9, [sp, #72]
//------------fucntion loading done----------------;
@@ -485,10 +442,10 @@ ih264_resi_trans_quant_chroma_4x4_av8:
//* arguments : x0 :pointer to src buffer
// x1 :pointer to dst buffer
// x2 :pu2_scale_matrix
-// x2 :pu2_threshold_matrix
-// x3 :u4_qbits
-// x4 :u4_round_factor
-// x5 :pu1_nnz
+// x3 :pu2_threshold_matrix
+// w4 :u4_qbits
+// w5 :u4_round_factor
+// x6 :pu1_nnz
// values returned : none
//
// register usage :
@@ -516,8 +473,8 @@ ih264_hadamard_quant_4x4_av8:
//x1 :pointer to dst buffer
//x2 :pu2_scale_matrix
//x3 :pu2_threshold_matrix
-//x4 :u4_qbits
-//x5 :u4_round_factor
+//w4 :u4_qbits
+//w5 :u4_round_factor
//x6 :pu1_nnz
push_v_regs
@@ -632,10 +589,10 @@ ih264_hadamard_quant_4x4_av8:
//* arguments : x0 :pointer to src buffer
// x1 :pointer to dst buffer
// x2 :pu2_scale_matrix
-// x2 :pu2_threshold_matrix
-// x3 :u4_qbits
-// x4 :u4_round_factor
-// x5 :pu1_nnz
+// x3 :pu2_threshold_matrix
+// w4 :u4_qbits
+// w5 :u4_round_factor
+// x6 :pu1_nnz
// values returned : none
//
// register usage :
diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s
index b039fba..475f690 100644
--- a/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -103,28 +103,28 @@
// WORD32 src_strd1,
// WORD32 src_strd2,
// WORD32 dst_strd,
-// UWORD16 log_WD,
-// UWORD32 wt1,
-// UWORD32 wt2,
-// UWORD16 ofst1,
-// UWORD16 ofst2,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 log_WD,
+// WORD32 wt1,
+// WORD32 wt2,
+// WORD16 ofst1,
+// WORD16 ofst2,
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src1
// x1 => puc_src2
// x2 => puc_dst
-// x3 => src_strd1
-// [sp] => src_strd2 (x4)
-// [sp+4] => dst_strd (x5)
-// [sp+8] => log_WD (x6)
-// [sp+12] => wt1 (x7)
-// [sp+16] => wt2 (x8)
-// [sp+20] => ofst1 (x9)
-// [sp+24] => ofst2 (x10)
-// [sp+28] => ht (x11)
-// [sp+32] => wd (x12)
+// w3 => src_strd1
+// w4 => src_strd2
+// w5 => dst_strd
+// w6 => log_WD
+// w7 => wt1
+// [sp] => wt2 (w8)
+// [sp+8] => ofst1 (w9)
+// [sp+16] => ofst2 (w10)
+// [sp+24] => ht (w11)
+// [sp+32] => wd (w12)
//
.text
.p2align 2
@@ -138,21 +138,23 @@ ih264_weighted_bi_pred_luma_av8:
// STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
stp x19, x20, [sp, #-16]!
- ldr x8, [sp, #80] //Load wt2 in x8
- ldr x9, [sp, #88] //Load ofst1 in x9
- add x6, x6, #1 //x6 = log_WD + 1
- sub x20, x6, #0 //x13 = -(log_WD + 1)
- neg x10, x20
+ ldr w8, [sp, #80] //Load wt2 in w8
+ ldr w9, [sp, #88] //Load ofst1 in w9
+ add w6, w6, #1 //w6 = log_WD + 1
+ neg w10, w6 //w10 = -(log_WD + 1)
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
- ldr x10, [sp, #96] //Load ofst2 in x10
- ldr x11, [sp, #104] //Load ht in x11
- ldr x12, [sp, #112] //Load wd in x12
- add x9, x9, #1 //x9 = ofst1 + 1
- add x9, x9, x10 //x9 = ofst1 + ofst2 + 1
+ ldr w10, [sp, #96] //Load ofst2 in w10
+ ldr w11, [sp, #104] //Load ht in w11
+ ldr w12, [sp, #112] //Load wd in w12
+ add w9, w9, #1 //w9 = ofst1 + 1
+ add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
mov v2.s[0], w7
mov v2.s[1], w8 //D2 = {wt1(32-bit), wt2(32-bit)}
- asr x9, x9, #1 //x9 = ofst = (ofst1 + ofst2 + 1) >> 1
+ asr w9, w9, #1 //w9 = ofst = (ofst1 + ofst2 + 1) >> 1
dup v3.8b, w9 //D3 = ofst (8-bit)
cmp w12, #16
beq loop_16 //branch if wd is 16
@@ -383,28 +385,28 @@ end_loops:
// WORD32 src_strd1,
// WORD32 src_strd2,
// WORD32 dst_strd,
-// UWORD16 log_WD,
-// UWORD32 wt1,
-// UWORD32 wt2,
-// UWORD16 ofst1,
-// UWORD16 ofst2,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 log_WD,
+// WORD32 wt1,
+// WORD32 wt2,
+// WORD32 ofst1,
+// WORD32 ofst2,
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src1
// x1 => puc_src2
// x2 => puc_dst
-// x3 => src_strd1
-// [sp] => src_strd2 (x4)
-// [sp+4] => dst_strd (x5)
-// [sp+8] => log_WD (x6)
-// [sp+12] => wt1 (x7)
-// [sp+16] => wt2 (x8)
-// [sp+20] => ofst1 (x9)
-// [sp+24] => ofst2 (x10)
-// [sp+28] => ht (x11)
-// [sp+32] => wd (x12)
+// w3 => src_strd1
+// w4 => src_strd2
+// w5 => dst_strd
+// w6 => log_WD
+// w7 => wt1
+// [sp] => wt2 (w8)
+// [sp+8] => ofst1 (w9)
+// [sp+16] => ofst2 (w10)
+// [sp+24] => ht (w11)
+// [sp+32] => wd (w12)
//
@@ -417,24 +419,22 @@ ih264_weighted_bi_pred_chroma_av8:
// STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
+ sxtw x5, w5
stp x19, x20, [sp, #-16]!
- ldr x8, [sp, #80] //Load wt2 in x8
+ ldr w8, [sp, #80] //Load wt2 in w8
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
- add x6, x6, #1 //x6 = log_WD + 1
- ldr w9, [sp, #88] //Load ofst1 in x9
- sxtw x9, w9
- ldr w10, [sp, #96] //Load ofst2 in x10
- sxtw x10, w10
- sub x20, x6, #0 //x12 = -(log_WD + 1)
- neg x20, x20
+ add w6, w6, #1 //w6 = log_WD + 1
+ ldr w9, [sp, #88] //Load ofst1 in w9
+ ldr w10, [sp, #96] //Load ofst2 in w10
+ neg w20, w6 //w20 = -(log_WD + 1)
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
ldr w11, [sp, #104] //Load ht in x11
ldr w12, [sp, #112] //Load wd in x12
- sxtw x11, w11
- sxtw x12, w12
dup v20.8h, w9 //0ffset1
dup v21.8h, w10 //0ffset2
srhadd v6.8b, v20.8b, v21.8b
diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s
index 69ed3b0..f145217 100644
--- a/common/armv8/ih264_weighted_pred_av8.s
+++ b/common/armv8/ih264_weighted_pred_av8.s
@@ -89,22 +89,22 @@
// UWORD8 *puc_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
-// UWORD8 log_WD,
-// UWORD32 wt,
-// UWORD16 ofst,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 log_WD,
+// WORD32 wt,
+// WORD32 ofst,
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src
// x1 => puc_dst
-// x2 => src_strd
-// x3 => dst_strd
-// [sp] => log_WD (x4)
-// [sp+4] => wt (x5)
-// [sp+8] => ofst (x6)
-// [sp+12] => ht (x7)
-// [sp+16] => wd (x8)
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => log_WD
+// w5 => wt
+// w6 => ofst
+// w7 => ht
+// [sp] => wd (w8)
//
.text
.p2align 2
@@ -118,13 +118,14 @@ ih264_weighted_pred_luma_av8:
// STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
ldr w8, [sp, #80] //Load wd
sxtw x8, w8
dup v2.4h, w5 //D2 = wt (16-bit)
- sub x20, x4, #0 //x9 = -log_WD
- neg x9, x20
+ neg w9, w4 //w9 = -log_WD
dup v3.8b, w6 //D3 = ofst (8-bit)
cmp w8, #16 //check if wd is 16
dup v0.8h, w9 //Q0 = -log_WD (16-bit)
@@ -318,22 +319,22 @@ end_loops:
// UWORD8 *puc_dst,
// WORD32 src_strd,
// WORD32 dst_strd,
-// UWORD8 log_WD,
-// UWORD32 wt,
-// UWORD16 ofst,
-// UWORD8 ht,
-// UWORD8 wd)
+// WORD32 log_WD,
+// WORD32 wt,
+// WORD32 ofst,
+// WORD32 ht,
+// WORD32 wd)
//
//**************Variables Vs Registers*****************************************
// x0 => puc_src
// x1 => puc_dst
-// x2 => src_strd
-// x3 => dst_strd
-// [sp] => log_WD (x4)
-// [sp+4] => wt (x5)
-// [sp+8] => ofst (x6)
-// [sp+12] => ht (x7)
-// [sp+16] => wd (x8)
+// w2 => src_strd
+// w3 => dst_strd
+// w4 => log_WD
+// w5 => wt
+// w6 => ofst
+// w7 => ht
+// [sp] => wd (w8)
//
@@ -345,13 +346,14 @@ ih264_weighted_pred_chroma_av8:
// STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
ldr w8, [sp, #80] //Load wd
sxtw x8, w8
- sub x20, x4, #0 //x9 = -log_WD
- neg x9, x20
+ neg w9, w4 //w9 = -log_WD
dup v2.4s, w5 //Q1 = {wt_u (16-bit), wt_v (16-bit)}
diff --git a/decoder.arm64.mk b/decoder.arm64.mk
index 2140b94..5ccf70f 100644
--- a/decoder.arm64.mk
+++ b/decoder.arm64.mk
@@ -6,7 +6,6 @@ libavcd_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8
libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector.c
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector_av8.c
libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_chroma_av8.s
@@ -34,11 +33,6 @@ libavcd_srcs_asm_arm64 += common/armv8/ih264_ihadamard_scaling_av8.s
libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_8x8_av8.s
libavcd_cflags_arm64 += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
-else
-libavcd_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
-endif
-
-
LOCAL_SRC_FILES_arm64 += $(libavcd_srcs_c_arm64) $(libavcd_srcs_asm_arm64)
diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c
index 2cde456..01deff0 100644
--- a/decoder/ih264d_api.c
+++ b/decoder/ih264d_api.c
@@ -1858,8 +1858,16 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
}
- if(ps_dec->u1_flushfrm && ps_dec->u1_init_dec_flag)
+ if(ps_dec->u1_flushfrm)
{
+ if(ps_dec->u1_init_dec_flag == 0)
+ {
+ /*Come out of flush mode and return*/
+ ps_dec->u1_flushfrm = 0;
+ return (IV_FAIL);
+ }
+
+
ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer,
&(ps_dec->s_disp_op));
@@ -2634,6 +2642,9 @@ WORD32 ih264d_set_flush_mode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op
ps_ctl_op->u4_error_code = 0;
+ /* Ignore dangling fields during flush */
+ ps_dec->u1_top_bottom_decoded = 0;
+
return IV_SUCCESS;
}
@@ -3031,40 +3042,30 @@ WORD32 ih264d_set_params(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
}
}
- if((0 != ps_dec->u4_app_disp_width)
- && (ps_ctl_ip->u4_disp_wd
- != ps_dec->u4_app_disp_width))
+ if(ps_ctl_ip->u4_disp_wd >= ps_dec->u2_pic_wd)
{
- ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
- ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
- ret = IV_FAIL;
+ ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
+ }
+ else if(0 == ps_dec->i4_header_decoded)
+ {
+ ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
+ }
+ else if(ps_ctl_ip->u4_disp_wd == 0)
+ {
+ ps_dec->u4_app_disp_width = 0;
}
else
{
- if(ps_ctl_ip->u4_disp_wd >= ps_dec->u2_pic_wd)
- {
- ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
- }
- else if(0 == ps_dec->i4_header_decoded)
- {
- ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
- }
- else if(ps_ctl_ip->u4_disp_wd == 0)
- {
- ps_dec->u4_app_disp_width = 0;
- }
- else
- {
- /*
- * Set the display width to zero. This will ensure that the wrong value we had stored (0xFFFFFFFF)
- * does not propogate.
- */
- ps_dec->u4_app_disp_width = 0;
- ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
- ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
- ret = IV_FAIL;
- }
+ /*
+ * Set the display width to zero. This will ensure that the wrong value we had stored (0xFFFFFFFF)
+ * does not propogate.
+ */
+ ps_dec->u4_app_disp_width = 0;
+ ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
+ ret = IV_FAIL;
}
+
if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_FRAME)
ps_dec->i4_decode_header = 0;
else if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_HEADER)
diff --git a/decoder/ih264d_dpb_mgr.c b/decoder/ih264d_dpb_mgr.c
index e02cc90..453dcab 100644
--- a/decoder/ih264d_dpb_mgr.c
+++ b/decoder/ih264d_dpb_mgr.c
@@ -17,9 +17,10 @@
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
+#ifdef __ANDROID__
#include "log/log.h"
#include <cutils/log.h>
-
+#endif
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
@@ -888,8 +889,10 @@ WORD32 ih264d_read_mmco_commands(struct _DecStruct * ps_dec)
{
if (j >= MAX_REF_BUFS)
{
+#ifdef __ANDROID__
ALOGE("b/25818142");
android_errorWriteLog(0x534e4554, "25818142");
+#endif
ps_dpb_cmds->u1_num_of_commands = 0;
return -1;
}
diff --git a/decoder/ih264d_parse_headers.c b/decoder/ih264d_parse_headers.c
index 2694114..d8c37a6 100644
--- a/decoder/ih264d_parse_headers.c
+++ b/decoder/ih264d_parse_headers.c
@@ -484,7 +484,7 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
UWORD8 u1_frm, uc_constraint_set0_flag, uc_constraint_set1_flag;
-
+ WORD32 i4_cropped_ht, i4_cropped_wd;
UWORD32 u4_temp;
WORD32 pic_height_in_map_units_minus1 = 0;
UWORD32 u2_pic_wd = 0;
@@ -564,10 +564,11 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
/*--------------------------------------------------------------------*/
ps_seq = ps_dec->pv_scratch_sps_pps;
- *ps_seq = ps_dec->ps_sps[u1_seq_parameter_set_id];
+ if(ps_dec->i4_header_decoded & 1)
+ {
+ *ps_seq = *ps_dec->ps_cur_sps;
+ }
- if(NULL == ps_dec->ps_cur_sps)
- ps_dec->ps_cur_sps = ps_seq;
if((ps_dec->i4_header_decoded & 1) && (ps_seq->u1_profile_idc != u1_profile_idc))
{
@@ -863,7 +864,6 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
UWORD16 u2_btm_ofst = 0;
UWORD8 u1_frm_mbs_flag;
UWORD8 u1_vert_mult_factor;
- WORD32 i4_cropped_ht, i4_cropped_wd;
if(u1_frame_cropping_flag)
{
@@ -925,10 +925,6 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
}
- ps_dec->u2_disp_height = i4_cropped_ht;
-
- ps_dec->u2_disp_width = i4_cropped_wd;
-
}
/* Backup u4_num_reorder_frames if header is already decoded */
@@ -960,6 +956,19 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
return IVD_RES_CHANGED;
}
+ /* In case bitstream read has exceeded the filled size, then
+ return an error */
+ if (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* All initializations to ps_dec are beyond this point */
+ /*--------------------------------------------------------------------*/
+ ps_dec->u2_disp_height = i4_cropped_ht;
+ ps_dec->u2_disp_width = i4_cropped_wd;
+
ps_dec->u2_pic_wd = u2_pic_wd;
ps_dec->u2_pic_ht = u2_pic_ht;
@@ -978,14 +987,9 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
ps_dec->u2_crop_offset_y = u2_crop_offset_y;
ps_dec->u2_crop_offset_uv = u2_crop_offset_uv;
- /* In case bitstream read has exceeded the filled size, then
- return an error */
- if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
- {
- return ERROR_INV_SPS_PPS_T;
- }
ps_seq->u1_is_valid = TRUE;
ps_dec->ps_sps[u1_seq_parameter_set_id] = *ps_seq;
+ ps_dec->ps_cur_sps = &ps_dec->ps_sps[u1_seq_parameter_set_id];
return OK;
}
diff --git a/decoder/ih264d_parse_islice.c b/decoder/ih264d_parse_islice.c
index 504b775..46a87d1 100644
--- a/decoder/ih264d_parse_islice.c
+++ b/decoder/ih264d_parse_islice.c
@@ -509,9 +509,7 @@ WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec,
MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
*((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
- pi1_buf = p_curr_ctxt->i1_ref_idx;
- pi4_buf = (WORD32 *)pi1_buf;
- *pi4_buf = 0;
+ memset(p_curr_ctxt->i1_ref_idx, 0, 4);
}
if(u1_mb_type == I_4x4_MB)
diff --git a/decoder/ih264d_parse_mb_header.c b/decoder/ih264d_parse_mb_header.c
index f30ad67..9a6a1f9 100644
--- a/decoder/ih264d_parse_mb_header.c
+++ b/decoder/ih264d_parse_mb_header.c
@@ -1172,7 +1172,6 @@ void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb,
/***************************************************************/
/* Store abs_mvd_values cabac contexts */
/***************************************************************/
-#ifndef ARM
{
UWORD8 u1_i;
for(u1_i = 0; u1_i < u1_part_wd; u1_i++, pu1_top_mv_ctxt += 4)
@@ -1187,46 +1186,6 @@ void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb,
pu1_lft_mv_ctxt[1] = u1_abs_mvd_y;
}
}
-#else
- /* Optimising the loop, with Little-Endian Assumption */
- {
- UWORD16 *pu2_top_cxt = (UWORD16 *)pu1_top_mv_ctxt;
- UWORD16 *pu2_lft_cxt = (UWORD16 *)pu1_lft_mv_ctxt;
- UWORD16 u2_pack_mvd = (UWORD16)((u1_abs_mvd_y << 8) | u1_abs_mvd_x);
- UWORD8 u1_wd = u1_part_wd, u1_ht = u1_part_ht;
-
- u1_wd--;
- *pu2_top_cxt = u2_pack_mvd;
- pu2_top_cxt += 2;
- if(u1_wd)
- {
- u1_wd--;
- *pu2_top_cxt = u2_pack_mvd;
- pu2_top_cxt += 2;
- }
- if(u1_wd)
- {
- *pu2_top_cxt = u2_pack_mvd;
- pu2_top_cxt += 2;
- *pu2_top_cxt = u2_pack_mvd;
- }
- u1_ht--;
- *pu2_lft_cxt = u2_pack_mvd;
- pu2_lft_cxt += 2;
- if(u1_ht)
- {
- u1_ht--;
- *pu2_lft_cxt = u2_pack_mvd;
- pu2_lft_cxt += 2;
- }
- if(u1_ht)
- {
- *pu2_lft_cxt = u2_pack_mvd;
- pu2_lft_cxt += 2;
- *pu2_lft_cxt = u2_pack_mvd;
- }
- }
-#endif
}
/*****************************************************************************/
diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h
index c83c34e..6958a0c 100644
--- a/decoder/ih264d_structs.h
+++ b/decoder/ih264d_structs.h
@@ -1055,7 +1055,6 @@ typedef struct _DecStruct
prev_seq_params_t s_prev_seq_params;
UWORD8 u1_cur_mb_fld_dec_flag; /* current Mb fld or Frm */
- WORD8 pi1_left_pred_mode[8];
UWORD8 u1_topleft_mb_fld;
UWORD8 u1_topleft_mbtype;
UWORD8 u1_topleft_mb_fld_bot;
@@ -1065,6 +1064,9 @@ typedef struct _DecStruct
UWORD16 u2_top_left_mask;
UWORD16 u2_top_right_mask;
dec_err_status_t * ps_dec_err_status;
+ /* Ensure pi1_left_pred_mode is aligned to 4 byte boundary,
+ by declaring this after a pointer or an integer */
+ WORD8 pi1_left_pred_mode[8];
UWORD8 u1_mb_idx_mv;
UWORD16 u2_mv_2mb[2];
diff --git a/decoder/ih264d_utils.c b/decoder/ih264d_utils.c
index 4f6deca..4437832 100644
--- a/decoder/ih264d_utils.c
+++ b/decoder/ih264d_utils.c
@@ -1893,6 +1893,10 @@ WORD16 ih264d_allocate_dynamic_bufs(dec_struct_t * ps_dec)
RETURN_IF((NULL == pv_buf), IV_FAIL);
ps_dec->p_ctxt_inc_mb_map = pv_buf;
+ /* 0th entry of CtxtIncMbMap will be always be containing default values
+ for CABAC context representing MB not available */
+ ps_dec->p_ctxt_inc_mb_map += 1;
+
size = (sizeof(mv_pred_t) * ps_dec->u1_recon_mb_grp
* 16);
pv_buf = ps_dec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
@@ -2073,9 +2077,6 @@ WORD16 ih264d_allocate_dynamic_bufs(dec_struct_t * ps_dec)
RETURN_IF((NULL == pv_buf), IV_FAIL);
ps_dec->pu1_pic_buf_base = pv_buf;
- /* 0th entry of CtxtIncMbMap will be always be containing default values
- for CABAC context representing MB not available */
- ps_dec->p_ctxt_inc_mb_map += 1;
/* Post allocation Increment Actions */
/***************************************************************************/
diff --git a/encoder.arm64.mk b/encoder.arm64.mk
index f95a29f..73cce1b 100644
--- a/encoder.arm64.mk
+++ b/encoder.arm64.mk
@@ -7,7 +7,6 @@ libavce_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8
libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector.c
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector_av8.c
libavce_srcs_asm_arm64 += common/armv8/ih264_resi_trans_quant_av8.s
@@ -35,12 +34,6 @@ libavce_srcs_asm_arm64 += encoder/armv8/ih264e_half_pel_av8.s
#ME
libavce_srcs_asm_arm64 += encoder/armv8/ime_distortion_metrics_av8.s
-else
-libavce_cflags_arm64 += -DDISABLE_NEON
-endif
-
-
-
LOCAL_SRC_FILES_arm64 += $(libavce_srcs_c_arm64) $(libavce_srcs_asm_arm64)
LOCAL_C_INCLUDES_arm64 += $(libavce_inc_dir_arm64)
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
index df06d41..c23a6ea 100644
--- a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -82,9 +82,9 @@ ih264e_evaluate_intra16x16_modes_av8:
//x0 = pu1_src,
//x1 = pu1_ngbr_pels_i16,
//x2 = pu1_dst,
-//x3 = src_strd,
-//x4 = dst_strd,
-//x5 = u4_n_avblty,
+//w3 = src_strd,
+//w4 = dst_strd,
+//w5 = u4_n_avblty,
//x6 = u4_intra_mode,
//x7 = pu4_sadmin
@@ -92,9 +92,11 @@ ih264e_evaluate_intra16x16_modes_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
stp x19, x20, [sp, #-16]!
- ldr x16, [sp, #80]
+ ldr w16, [sp, #80]
mov x17, x4
mov x14, x6
mov x15, x7
@@ -105,13 +107,13 @@ ih264e_evaluate_intra16x16_modes_av8:
mov w10, #0
mov w11 , #3
- ands x6, x5, #0x01
+ ands w6, w5, #0x01
beq top_available //LEFT NOT AVAILABLE
ld1 {v0.16b}, [x1]
add w10, w10, #8
add w11, w11, #1
top_available:
- ands x6, x5, #0x04
+ ands w6, w5, #0x04
beq none_available
add x6, x1, #17
ld1 {v1.16b}, [x6]
@@ -119,7 +121,7 @@ top_available:
add w11, w11, #1
b summation
none_available:
- cmp x5, #0
+ cmp w5, #0
bne summation
mov w6, #128
dup v30.16b, w6
@@ -469,16 +471,16 @@ sad_comp:
mov x11, #1
lsl x11, x11, #30
- mov x0, x16
+ mov w0, w16
//--------------------------------------------
- ands x7, x0, #01 // vert mode valid????????????
+ ands w7, w0, #01 // vert mode valid????????????
csel x8, x11, x8, eq
- ands x6, x0, #02 // horz mode valid????????????
+ ands w6, w0, #02 // horz mode valid????????????
csel x9, x11, x9, eq
- ands x6, x0, #04 // dc mode valid????????????
+ ands w6, w0, #04 // dc mode valid????????????
csel x10, x11, x10, eq
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
index bb2526d..4014c4f 100644
--- a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
@@ -82,9 +82,9 @@ ih264e_evaluate_intra_chroma_modes_av8:
//x0 = pu1_src,
//x1 = pu1_ngbr_pels_i16,
//x2 = pu1_dst,
-//x3 = src_strd,
-//x4 = dst_strd,
-//x5 = u4_n_avblty,
+//w3 = src_strd,
+//w4 = dst_strd,
+//w5 = u4_n_avblty,
//x6 = u4_intra_mode,
//x7 = pu4_sadmin
@@ -92,20 +92,22 @@ ih264e_evaluate_intra_chroma_modes_av8:
// STMFD sp!, {x4-x12, x14} //store register values to stack
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
stp x19, x20, [sp, #-16]!
//-----------------------
- ldr x16, [sp, #80]
+ ldr w16, [sp, #80]
mov x17, x4
- mov x18, x5
+ mov w18, w5
mov x14, x6
mov x15, x7
- mov x19, #5
- ands x6, x5, x19
+ mov w19, #5
+ ands w6, w5, w19
beq none_available
- cmp x6, #1
+ cmp w6, #1
beq left_only_available
- cmp x6, #4
+ cmp w6, #4
beq top_only_available
all_available:
@@ -368,20 +370,20 @@ sad_comp:
mov x11, #1
//-----------------------
- mov x0, x16 // u4_valid_intra_modes
+ mov w0, w16 // u4_valid_intra_modes
//--------------------------------------------
lsl x11, x11, #30
- ands x7, x0, #04 // vert mode valid????????????
+ ands w7, w0, #04 // vert mode valid????????????
csel x8, x11, x8, eq
- ands x6, x0, #02 // horz mode valid????????????
+ ands w6, w0, #02 // horz mode valid????????????
csel x9, x11, x9, eq
- ands x6, x0, #01 // dc mode valid????????????
+ ands w6, w0, #01 // dc mode valid????????????
csel x10, x11, x10, eq
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
index 8f27104..cdac8da 100644
--- a/encoder/armv8/ih264e_half_pel_av8.s
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -86,6 +86,8 @@
ih264e_sixtapfilter_horz_av8:
// STMFD sp!,{x14}
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
stp x19, x20, [sp, #-16]!
movi v0.8b, #5
@@ -263,6 +265,8 @@ filter_horz_loop:
ih264e_sixtap_filter_2dvh_vert_av8:
// STMFD sp!,{x10,x11,x12,x14}
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
stp x19, x20, [sp, #-16]!
////x0 - pu1_ref
diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s
index 47c3425..00d11c0 100644
--- a/encoder/armv8/ime_distortion_metrics_av8.s
+++ b/encoder/armv8/ime_distortion_metrics_av8.s
@@ -95,6 +95,8 @@
.global ime_compute_sad_16x16_fast_av8
ime_compute_sad_16x16_fast_av8:
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
lsl x2, x2, #1
lsl x3, x3, #1
@@ -179,6 +181,8 @@ ime_compute_sad_16x8_av8:
//chheck what stride incremtn to use
//earlier code did not have this lsl
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
mov x6, #2
movi v30.8h, #0
@@ -255,6 +259,8 @@ core_loop_ime_compute_sad_16x8_av8:
ime_compute_sad_16x16_ea8_av8:
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
movi v30.8h, #0
add x7, x0, x2
@@ -381,9 +387,12 @@ ime_calculate_sad2_prog_av8:
// x0 = ref1 <UWORD8 *>
// x1 = ref2 <UWORD8 *>
// x2 = src <UWORD8 *>
- // x3 = RefBufferWidth <UWORD32>
- // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ // w3 = RefBufferWidth <UWORD32>
+ // w4 = CurBufferWidth <UWORD32>
+ // x5 = psad <UWORD32 *>
push_v_regs
+ sxtw x3, w3
+ sxtw x4, w4
mov x6, #8
movi v30.8h, #0
movi v31.8h, #0
@@ -459,16 +468,15 @@ ime_calculate_sad3_prog_av8:
// x1 = ref2 <UWORD8 *>
// x2 = ref3 <UWORD8 *>
// x3 = src <UWORD8 *>
- // stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ // w4 = RefBufferWidth <UWORD32>
+ // w5 = CurBufferWidth <UWORD32>
+ // x6 = psad <UWORD32 *>
- // x0 = ref1 <UWORD8 *>
- // x1 = ref2 <UWORD8 *>
- // x2 = src <UWORD8 *>
- // x3 = RefBufferWidth <UWORD32>
- // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
push_v_regs
- mov x6, #16
+ sxtw x4, w4
+ sxtw x5, w5
+ mov x7, #16
movi v29.8h, #0
movi v30.8h, #0
movi v31.8h, #0
@@ -499,15 +507,15 @@ core_loop_ime_calculate_sad3_prog_av8:
uabal v31.8h, v6.8b, v7.8b
uabal2 v31.8h, v6.16b, v7.16b
- subs x6, x6, #1
- bne core_loop_ime_calculate_sad2_prog_av8
+ subs x7, x7, #1
+ bne core_loop_ime_calculate_sad3_prog_av8
addp v30.8h, v30.8h, v31.8h
uaddlp v30.4s, v30.8h
addp v30.2s, v30.2s, v30.2s
shl v30.2s, v30.2s, #1
- st1 {v30.2s}, [x5]
+ st1 {v30.2s}, [x6]
pop_v_regs
ret
@@ -544,6 +552,8 @@ core_loop_ime_calculate_sad3_prog_av8:
.global ime_sub_pel_compute_sad_16x16_av8
ime_sub_pel_compute_sad_16x16_av8:
push_v_regs
+ sxtw x4, w4
+ sxtw x5, w5
sub x7, x1, #1 //x left
sub x8, x2, x5 //y top
sub x9, x3, #1 //xy left
@@ -647,6 +657,8 @@ core_loop_ime_sub_pel_compute_sad_16x16_av8:
.global ime_compute_sad_16x16_av8
ime_compute_sad_16x16_av8:
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
mov x6, #4
movi v30.8h, #0
@@ -702,6 +714,8 @@ core_loop_ime_compute_sad_16x16_av8:
.global ime_calculate_sad4_prog_av8
ime_calculate_sad4_prog_av8:
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
sub x5, x0, #1 //left
add x6, x0, #1 //right
sub x7, x0, x2 //top
@@ -777,13 +791,15 @@ core_loop_ime_calculate_sad4_prog_av8:
ime_compute_satqd_16x16_lumainter_av8:
//x0 :pointer to src buffer
//x1 :pointer to est buffer
- //x2 :Source stride
- //x3 :Pred stride
+ //w2 :Source stride
+ //w3 :Pred stride
//x4 :Threshold pointer
//x5 :Distortion,ie SAD
//x6 :is nonzero
//x7 :loop counter
push_v_regs
+ sxtw x2, w2
+ sxtw x3, w3
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
index e0c9f83..2ecfdf5 100644
--- a/encoder/ih264e_api.c
+++ b/encoder/ih264e_api.c
@@ -3823,7 +3823,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
/* size of header data of 1 mb */
- size = 40;
+ size = sizeof(mb_hdr_t);
/* size for 1 row of mbs */
size = size * max_mb_cols;
diff --git a/encoder/ih264e_cabac_encode.c b/encoder/ih264e_cabac_encode.c
index ecc30f5..e49ab58 100644
--- a/encoder/ih264e_cabac_encode.c
+++ b/encoder/ih264e_cabac_encode.c
@@ -339,7 +339,7 @@ static void ih264e_cabac_enc_4x4mb_modes(cabac_ctxt_t *ps_cabac_ctxt,
for (i = 0; i < 16; i += 2)
{
/* sub blk idx 1 */
- byte = *pu1_intra_4x4_modes++;
+ byte = pu1_intra_4x4_modes[i >> 1];
if (byte & 0x1)
{
ih264e_cabac_encode_bin(ps_cabac_ctxt,
@@ -1540,14 +1540,14 @@ static void ih264e_cabac_enc_mvds_b16x16(cabac_ctxt_t *ps_cabac_ctxt,
u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1];
u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0];
u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1];
- u2_mv = *(pi2_mv_ptr++);
+ u2_mv = pi2_mv_ptr[0];
ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X,
(UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b),
ps_cabac_ctxt);
u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv));
- u2_mv = *(pi2_mv_ptr++);
+ u2_mv = pi2_mv_ptr[1];
ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y,
(UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b),
@@ -1555,6 +1555,7 @@ static void ih264e_cabac_enc_mvds_b16x16(cabac_ctxt_t *ps_cabac_ctxt,
u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv));
}
+
/***************************************************************/
/* Store abs_mvd_values cabac contexts */
/***************************************************************/
@@ -1571,14 +1572,14 @@ static void ih264e_cabac_enc_mvds_b16x16(cabac_ctxt_t *ps_cabac_ctxt,
u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[3];
u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[2];
u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[3];
- u2_mv = *(pi2_mv_ptr++);
+ u2_mv = pi2_mv_ptr[2];
ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X,
(UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b),
ps_cabac_ctxt);
u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv));
- u2_mv = *(pi2_mv_ptr++);
+ u2_mv = pi2_mv_ptr[3];
ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y,
(UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b),
@@ -1624,11 +1625,11 @@ IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
/* packed header data */
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
mb_info_ctxt_t *ps_curr_ctxt;
WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
WORD8 mb_qp_delta;
UWORD32 u4_cbp_l, u4_cbp_c;
- WORD32 byte_count = 0;
WORD32 bitstream_start_offset, bitstream_end_offset;
if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
@@ -1638,12 +1639,10 @@ IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
}
/* mb header info */
- mb_tpm = *pu1_byte++;
- byte_count++;
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
+
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -1671,9 +1670,10 @@ IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
MB_TYPE_I_SLICE);
if (mb_type == I4x4)
- { /* Encode 4x4 MB modes */
- ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
- byte_count += 8;
+ {
+ /* Encode 4x4 MB modes */
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes);
}
/* Encode chroma mode */
ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt);
@@ -1731,17 +1731,18 @@ IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
memset(ps_curr_ctxt->u1_mv, 0, 16);
memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = cbp;
- ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+
if (mb_type == I16x16)
{
ps_curr_ctxt->u1_mb_type = CAB_I16x16;
-
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
}
else
{
ps_curr_ctxt->u1_mb_type = CAB_I4x4;
-
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
}
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
return IH264E_SUCCESS;
}
@@ -1778,8 +1779,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
WORD8 mb_qp_delta;
UWORD32 u4_cbp_l, u4_cbp_c;
- WORD32 byte_count = 0;
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
>= ps_bitstream->u4_max_strm_size)
@@ -1788,8 +1789,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
}
/* mb header info */
- mb_tpm = *pu1_byte++;
- byte_count++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -1800,10 +1800,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
/* if Intra MB */
if (mb_type == I16x16 || mb_type == I4x4)
{
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* Starting bitstream offset for header in bits */
bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
@@ -1833,9 +1831,10 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
}
if (mb_type == I4x4)
- { /* Intra 4x4 modes */
- ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
- byte_count += 8;
+ {
+ /* Intra 4x4 modes */
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes);
}
chroma_intra_mode = (mb_tpm >> 6);
@@ -1901,13 +1900,15 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
if (mb_type == I16x16)
{
ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
}
else
{
ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
}
- ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
return IH264E_SUCCESS;
}
@@ -1918,10 +1919,9 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
/* Encoding P16x16 */
if (mb_type != PSKIP)
{
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ mb_hdr_p16x16_t *ps_mb_hdr_p16x16 = (mb_hdr_p16x16_t *)ps_ent_ctxt->pv_mb_header_data;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* Encoding mb_skip */
ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE);
@@ -1937,8 +1937,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
}
ps_curr_ctxt->u1_mb_type = CAB_P;
{
- WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte;
- byte_count += 4;
+ WORD16 *pi2_mv_ptr = (WORD16 *) ps_mb_hdr_p16x16->ai2_mv;
+
ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type
| CAB_NON_BD16x16);
/* Encoding motion vector for P16x16 */
@@ -1960,6 +1960,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
/* Starting bitstream offset for residue */
bitstream_start_offset = bitstream_end_offset;
+ pu1_byte += sizeof(mb_hdr_p16x16_t);
+
}
else/* MB = PSKIP */
{
@@ -1978,6 +1980,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
- bitstream_start_offset;
/* Starting bitstream offset for residue */
+ pu1_byte += sizeof(mb_hdr_pskip_t);
}
if (cbp > 0)
@@ -2002,7 +2005,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
}
ps_curr_ctxt->u1_intrapred_chroma_mode = 0;
ps_curr_ctxt->u1_cbp = cbp;
- ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
return IH264E_SUCCESS;
}
}
@@ -2066,8 +2070,8 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
WORD8 mb_qp_delta;
UWORD32 u4_cbp_l, u4_cbp_c;
- WORD32 byte_count = 0;
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
>= ps_bitstream->u4_max_strm_size)
@@ -2076,8 +2080,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
}
/* mb header info */
- mb_tpm = *pu1_byte++;
- byte_count++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -2088,10 +2091,8 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
/* if Intra MB */
if (mb_type == I16x16 || mb_type == I4x4)
{
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* Starting bitstream offset for header in bits */
bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
@@ -2138,9 +2139,10 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
}
if (mb_type == I4x4)
- { /* Intra 4x4 modes */
- ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
- byte_count += 8;
+ {
+ /* Intra 4x4 modes */
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, ps_mb_hdr_i4x4->au1_sub_blk_modes);
}
chroma_intra_mode = (mb_tpm >> 6);
@@ -2206,13 +2208,15 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
if (mb_type == I16x16)
{
ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
}
else
{
ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
}
- ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
return IH264E_SUCCESS;
}
@@ -2224,10 +2228,9 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
/* Encoding B_Direct_16x16 */
if (mb_type == BDIRECT)
{
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
+
/* Encoding mb_skip */
ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
@@ -2275,6 +2278,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
bitstream_start_offset = bitstream_end_offset;
/* Starting bitstream offset for residue */
+ pu1_byte += sizeof(mb_hdr_bdirect_t);
}
else if (mb_type == BSKIP)/* MB = BSKIP */
@@ -2293,17 +2297,18 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
- bitstream_start_offset;
/* Starting bitstream offset for residue */
+ pu1_byte += sizeof(mb_hdr_bskip_t);
}
else /* mbype is B_L0_16x16, B_L1_16x16 or B_Bi_16x16 */
{
+ mb_hdr_b16x16_t *ps_mb_hdr_b16x16 = (mb_hdr_b16x16_t *)ps_ent_ctxt->pv_mb_header_data;
+
WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4);
UWORD32 u4_mb_type = mb_type - B16x16 + B_L0_16x16
+ i4_mb_part_pred_mode;
- cbp = *pu1_byte++;
- byte_count++;
- mb_qp_delta = *pu1_byte++;
- byte_count++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* Encoding mb_skip */
ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
@@ -2338,11 +2343,9 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16;
{
- WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte;
- /* Get the pred modes */
-
- byte_count += 4 * (1 + (i4_mb_part_pred_mode == PRED_BI));
+ WORD16 *pi2_mv_ptr = (WORD16 *) ps_mb_hdr_b16x16->ai2_mv;
+ /* Get the pred modes */
ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type
| CAB_NON_BD16x16);
/* Encoding motion vector for B16x16 */
@@ -2364,6 +2367,8 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
- bitstream_start_offset;
/* Starting bitstream offset for residue */
bitstream_start_offset = bitstream_end_offset;
+
+ pu1_byte += sizeof(mb_hdr_b16x16_t);
}
if (cbp > 0)
@@ -2388,7 +2393,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
}
ps_curr_ctxt->u1_intrapred_chroma_mode = 0;
ps_curr_ctxt->u1_cbp = cbp;
- ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
return IH264E_SUCCESS;
}
}
diff --git a/encoder/ih264e_cabac_init.c b/encoder/ih264e_cabac_init.c
index 347842c..7407dcc 100644
--- a/encoder/ih264e_cabac_init.c
+++ b/encoder/ih264e_cabac_init.c
@@ -160,17 +160,13 @@ void ih264e_init_cabac_table(entropy_ctxt_t *ps_ent_ctxt)
/* 0th entry of mb_map_ctxt_inc will be always be containing default values */
/* for CABAC context representing MB not available */
mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1;
- UWORD32 *pu4_temp;
- WORD8 i;
ps_def_ctxt->u1_mb_type = CAB_SKIP;
ps_def_ctxt->u1_cbp = 0x0f;
ps_def_ctxt->u1_intrapred_chroma_mode = 0;
- pu4_temp = (UWORD32 *)ps_def_ctxt->i1_ref_idx;
- pu4_temp[0] = 0;
- pu4_temp = (UWORD32 *)ps_def_ctxt->u1_mv;
- for (i = 0; i < 4; i++, pu4_temp++)
- (*pu4_temp) = 0;
+
+ memset(ps_def_ctxt->i1_ref_idx, 0, sizeof(ps_def_ctxt->i1_ref_idx));
+ memset(ps_def_ctxt->u1_mv, 0, sizeof(ps_def_ctxt->u1_mv));
ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt;
}
}
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
index 7491480..ed34a43 100644
--- a/encoder/ih264e_cavlc.c
+++ b/encoder/ih264e_cavlc.c
@@ -959,6 +959,7 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* packed header data */
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
/* mb header info */
/*
@@ -986,9 +987,9 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/********************************************************************/
/* mb header info */
- mb_tpm = *pu1_byte++;
- cbp = *pu1_byte++;
- mb_qp_delta = *pu1_byte++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -1009,9 +1010,13 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
}
else if (mb_type == I4x4)
{
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* mb sub blk modes */
WORD32 intra_pred_mode_flag, rem_intra_mode;
WORD32 byte;
@@ -1024,7 +1029,7 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 16; i += 2)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1056,11 +1061,14 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
}
else if (mb_type == I8x8)
{
/* transform 8x8 flag */
UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+ mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = (mb_hdr_i8x8_t *)ps_ent_ctxt->pv_mb_header_data;
/* mb sub blk modes */
WORD32 intra_pred_mode_flag, rem_intra_mode;
@@ -1080,7 +1088,7 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 4; i++)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1112,6 +1120,8 @@ IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+
+ pu1_byte += sizeof(mb_hdr_i8x8_t);
}
else
{
@@ -1181,6 +1191,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* packed header data */
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
/* mb header info */
/*
@@ -1211,7 +1222,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/********************************************************************/
/* mb header info */
- mb_tpm = *pu1_byte++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -1227,6 +1238,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
(*ps_ent_ctxt->pi4_mb_skip_run)++;
/* store the index of the next mb syntax layer */
+ pu1_byte += sizeof(mb_hdr_pskip_t);
ps_ent_ctxt->pv_mb_header_data = pu1_byte;
/* set nnz to zero */
@@ -1248,8 +1260,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
}
/* remaining mb header info */
- cbp = *pu1_byte++;
- mb_qp_delta = *pu1_byte++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* mb skip run */
PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
@@ -1278,9 +1290,12 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
}
else if (mb_type == I4x4)
{
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* mb sub blk modes */
WORD32 intra_pred_mode_flag, rem_intra_mode;
WORD32 byte;
@@ -1296,7 +1311,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 16; i += 2)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1328,9 +1343,13 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
}
else if (mb_type == I8x8)
{
+ mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = (mb_hdr_i8x8_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* transform 8x8 flag */
UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
@@ -1355,7 +1374,7 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 4; i++)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1387,14 +1406,18 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+
+ pu1_byte += sizeof(mb_hdr_i8x8_t);
}
else
{
+ mb_hdr_p16x16_t *ps_mb_hdr_p16x16 = (mb_hdr_p16x16_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* inter macro block partition cnt */
const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
/* mv ptr */
- WORD16 *pi2_mv_ptr = (WORD16 *)pu1_byte;
+ WORD16 *pi2_mv_ptr = (WORD16 *)ps_mb_hdr_p16x16->ai2_mv;
/* number of partitions for the current mb */
UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3];
@@ -1410,7 +1433,8 @@ IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y");
}
- pu1_byte = (UWORD8 *)pi2_mv_ptr;
+ pu1_byte += sizeof(mb_hdr_p16x16_t);
+
}
/* coded_block_pattern */
@@ -1479,6 +1503,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* packed header data */
UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_hdr_common_t *ps_mb_hdr = (mb_hdr_common_t *)ps_ent_ctxt->pv_mb_header_data;
/* mb header info */
/*
@@ -1508,7 +1533,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* BEGIN HEADER GENERATION */
/********************************************************************/
- mb_tpm = *pu1_byte++;
+ mb_tpm = ps_mb_hdr->u1_mb_type_mode;
/* mb type */
mb_type = mb_tpm & 0xF;
@@ -1524,6 +1549,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
(*ps_ent_ctxt->pi4_mb_skip_run)++;
/* store the index of the next mb syntax layer */
+ pu1_byte += sizeof(mb_hdr_bskip_t);
ps_ent_ctxt->pv_mb_header_data = pu1_byte;
/* set nnz to zero */
@@ -1547,8 +1573,8 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* remaining mb header info */
- cbp = *pu1_byte++;
- mb_qp_delta = *pu1_byte++;
+ cbp = ps_mb_hdr->u1_cbp;
+ mb_qp_delta = ps_mb_hdr->u1_mb_qp_delta;
/* mb skip run */
PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
@@ -1577,9 +1603,13 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ pu1_byte += sizeof(mb_hdr_i16x16_t);
+
}
else if (mb_type == I4x4)
{
+ mb_hdr_i4x4_t *ps_mb_hdr_i4x4 = (mb_hdr_i4x4_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* mb sub blk modes */
WORD32 intra_pred_mode_flag, rem_intra_mode;
WORD32 byte;
@@ -1595,7 +1625,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 16; i += 2)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i4x4->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1627,9 +1657,13 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ pu1_byte += sizeof(mb_hdr_i4x4_t);
+
}
else if (mb_type == I8x8)
{
+ mb_hdr_i8x8_t *ps_mb_hdr_i8x8 = (mb_hdr_i8x8_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* transform 8x8 flag */
UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
@@ -1654,7 +1688,7 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < 4; i++)
{
/* sub blk idx 1 */
- byte = *pu1_byte++;
+ byte = ps_mb_hdr_i8x8->au1_sub_blk_modes[i >> 1];
intra_pred_mode_flag = byte & 0x1;
@@ -1686,21 +1720,24 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
/* intra_chroma_pred_mode */
PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ pu1_byte += sizeof(mb_hdr_i8x8_t);
+
}
else if(mb_type == BDIRECT)
{
is_inter = 1;
/* write mb type */
PUT_BITS_UEV(ps_bitstream, B_DIRECT_16x16, error_status, "mb type");
+ pu1_byte += sizeof(mb_hdr_bdirect_t);
+
}
else /* if mb_type == B16x16 */
{
+ mb_hdr_b16x16_t *ps_mb_hdr_b16x16 = (mb_hdr_b16x16_t *)ps_ent_ctxt->pv_mb_header_data;
+
/* inter macro block partition cnt for 16x16 16x8 8x16 8x8 */
const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
- /* mv ptr */
- WORD16 *pi2_mvd_ptr = (WORD16 *)pu1_byte;
-
/* number of partitions for the current mb */
UWORD32 u4_part_cnt = au1_part_cnt[mb_type - B16x16];
@@ -1718,21 +1755,17 @@ IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
{
if (i4_mb_part_pred_mode != PRED_L1)/* || PRED_BI */
{
- PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr, error_status, "mv l0 x");
- pi2_mvd_ptr++;
- PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr, error_status, "mv l0 y");
- pi2_mvd_ptr++;
+ PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mv[0][0], error_status, "mv l0 x");
+ PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mv[0][1], error_status, "mv l0 y");
}
if (i4_mb_part_pred_mode != PRED_L0)/* || PRED_BI */
{
- PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr, error_status, "mv l1 x");
- pi2_mvd_ptr++;
- PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr, error_status, "mv l1 y");
- pi2_mvd_ptr++;
+ PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mv[1][0], error_status, "mv l1 x");
+ PUT_BITS_SEV(ps_bitstream, ps_mb_hdr_b16x16->ai2_mv[1][1], error_status, "mv l1 y");
}
}
- pu1_byte = (UWORD8 *)pi2_mvd_ptr;
+ pu1_byte += sizeof(mb_hdr_b16x16_t);
}
/* coded_block_pattern */
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
index aee270e..c4e7885 100644
--- a/encoder/ih264e_defs.h
+++ b/encoder/ih264e_defs.h
@@ -66,7 +66,8 @@
* Maximum width supported by codec
*/
-#define MAX_WD 1920
+/* changed by haining@ to support Nexus 6P screen size, was previously 1920 */
+#define MAX_WD 2560
/**
* Minimum height supported by codec
@@ -77,7 +78,8 @@
* Maximum height supported by codec
*/
-#define MAX_HT 1920
+/* changed by haining@ to support Nexus 6P screen size, was previously 1920 */
+#define MAX_HT 2560
/*****************************************************************************/
/* Padding sizes */
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
index 04bdc14..3626a63 100644
--- a/encoder/ih264e_encode_header.c
+++ b/encoder/ih264e_encode_header.c
@@ -1129,7 +1129,14 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
}
/* direct_8x8_inference_flag */
- ps_sps->i1_direct_8x8_inference_flag = 0;
+ if (ps_sps->u1_level_idc < IH264_LEVEL_30)
+ {
+ ps_sps->i1_direct_8x8_inference_flag = 0;
+ }
+ else
+ {
+ ps_sps->i1_direct_8x8_inference_flag = 1;
+ }
/* cropping params */
/*NOTE : Cropping values depend on the chroma format
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
index 796c983..5fb0b88 100644
--- a/encoder/ih264e_process.c
+++ b/encoder/ih264e_process.c
@@ -652,18 +652,19 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
{
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
/* temp var */
WORD32 i4, byte;
/* mb type plus mode */
- *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
+ ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
/* cbp */
- *pu1_ptr++ = ps_proc->u4_cbp;
+ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
/* mb qp delta */
- *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
/* sub mb modes */
for (i4 = 0; i4 < 16; i4 ++)
@@ -710,63 +711,66 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
}
}
- *pu1_ptr++ = byte;
+ ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte;
}
/* end of mb layer */
+ pu1_ptr += sizeof(mb_hdr_i4x4_t);
ps_proc->pv_mb_header_data = pu1_ptr;
}
else if (u4_mb_type == I16x16)
{
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
/* mb type plus mode */
- *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
+ ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
/* cbp */
- *pu1_ptr++ = ps_proc->u4_cbp;
+ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
/* mb qp delta */
- *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
/* end of mb layer */
+ pu1_ptr += sizeof(mb_hdr_i16x16_t);
ps_proc->pv_mb_header_data = pu1_ptr;
}
else if (u4_mb_type == P16x16)
{
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
- WORD16 *i2_mv_ptr;
-
- /* mb type plus mode */
- *pu1_ptr++ = u4_mb_type;
+ /* mb type */
+ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
/* cbp */
- *pu1_ptr++ = ps_proc->u4_cbp;
+ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
/* mb qp delta */
- *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
-
- i2_mv_ptr = (WORD16 *)pu1_ptr;
+ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
+ ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
+ ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
/* end of mb layer */
- ps_proc->pv_mb_header_data = i2_mv_ptr;
+ pu1_ptr += sizeof(mb_hdr_p16x16_t);
+ ps_proc->pv_mb_header_data = pu1_ptr;
}
else if (u4_mb_type == PSKIP)
{
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
- /* mb type plus mode */
- *pu1_ptr++ = u4_mb_type;
+ /* mb type */
+ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
/* end of mb layer */
+ pu1_ptr += sizeof(mb_hdr_pskip_t);
ps_proc->pv_mb_header_data = pu1_ptr;
}
else if(u4_mb_type == B16x16)
@@ -774,58 +778,59 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
-
- WORD16 *i2_mv_ptr;
+ mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
/* mb type plus mode */
- *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
+ ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
/* cbp */
- *pu1_ptr++ = ps_proc->u4_cbp;
+ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
/* mb qp delta */
- *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
/* l0 & l1 me data */
- i2_mv_ptr = (WORD16 *)pu1_ptr;
-
if (u4_pred_mode != PRED_L1)
{
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
+ ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
- ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
+ ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
- ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
}
if (u4_pred_mode != PRED_L0)
{
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
+ ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
- ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
+ ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
- ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
}
/* end of mb layer */
- ps_proc->pv_mb_header_data = i2_mv_ptr;
+ pu1_ptr += sizeof(mb_hdr_b16x16_t);
+ ps_proc->pv_mb_header_data = pu1_ptr;
}
else if(u4_mb_type == BDIRECT)
{
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
/* mb type plus mode */
- *pu1_ptr++ = u4_mb_type;
+ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
/* cbp */
- *pu1_ptr++ = ps_proc->u4_cbp;
+ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
/* mb qp delta */
- *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+ /* end of mb layer */
+ pu1_ptr += sizeof(mb_hdr_bdirect_t);
ps_proc->pv_mb_header_data = pu1_ptr;
}
@@ -835,11 +840,13 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
/* pointer to mb header storage space */
UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+ mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
/* mb type plus mode */
- *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
+ ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
/* end of mb layer */
+ pu1_ptr += sizeof(mb_hdr_bskip_t);
ps_proc->pv_mb_header_data = pu1_ptr;
}
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
index 6cbce7c..125db84 100644
--- a/encoder/ih264e_structs.h
+++ b/encoder/ih264e_structs.h
@@ -1151,6 +1151,184 @@ typedef struct
/**
******************************************************************************
+* @brief mb_hdr structures to access first few common elements of above
+* structures
+******************************************************************************
+*/
+
+typedef struct
+{
+ /**
+ * mb type and mode
+ */
+ UWORD8 u1_mb_type_mode;
+
+ /**
+ * CBP
+ */
+ UWORD8 u1_cbp;
+
+ /**
+ * MB qp delta
+ */
+ UWORD8 u1_mb_qp_delta;
+
+ /**
+ * Element to align structure to 2 byte boundary
+ */
+ UWORD8 u1_pad;
+}mb_hdr_common_t;
+
+/**
+******************************************************************************
+* @brief macro block info for I4x4 MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+ /**
+ * Sub block modes, 2 modes per byte
+ */
+ UWORD8 au1_sub_blk_modes[8];
+}mb_hdr_i4x4_t;
+
+/**
+******************************************************************************
+* @brief macro block info for I8x8 MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+
+ /**
+ * Sub block modes, 2 modes per byte
+ */
+ UWORD8 au1_sub_blk_modes[2];
+}mb_hdr_i8x8_t;
+
+/**
+******************************************************************************
+* @brief macro block info for I16x16 MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+}mb_hdr_i16x16_t;
+
+/**
+******************************************************************************
+* @brief macro block info for P16x16 MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+ /**
+ * MV
+ */
+ WORD16 ai2_mv[2];
+}mb_hdr_p16x16_t;
+
+/**
+******************************************************************************
+* @brief macro block info for PSKIP MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+}mb_hdr_pskip_t;
+
+/**
+******************************************************************************
+* @brief macro block info for B16x16 MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+
+ /**
+ * MV
+ */
+ WORD16 ai2_mv[2][2];
+}mb_hdr_b16x16_t;
+
+/**
+******************************************************************************
+* @brief macro block info for BDIRECT MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+}mb_hdr_bdirect_t;
+
+/**
+******************************************************************************
+* @brief macro block info for PSKIP MB
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Common MB header params
+ */
+ mb_hdr_common_t common;
+
+}mb_hdr_bskip_t;
+
+/**
+******************************************************************************
+* @brief Union of mb_hdr structures for size calculation
+* and to access first few common elements
+******************************************************************************
+*/
+
+typedef union
+{
+ mb_hdr_i4x4_t mb_hdr_i4x4;
+ mb_hdr_i8x8_t mb_hdr_i8x8;
+ mb_hdr_i16x16_t mb_hdr_i16x16;
+ mb_hdr_p16x16_t mb_hdr_p16x16;
+ mb_hdr_pskip_t mb_hdr_pskip;
+ mb_hdr_b16x16_t mb_hdr_b16x16;
+ mb_hdr_bdirect_t mb_hdr_bdirect;
+ mb_hdr_bskip_t mb_hdr_bskip;
+}mb_hdr_t;
+/**
+******************************************************************************
* @brief structure presenting the neighbor availability of a mb
* or subblk or any other partition
******************************************************************************
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
index 95befce..4a64645 100644
--- a/encoder/irc_rate_control_api.c
+++ b/encoder/irc_rate_control_api.c
@@ -756,6 +756,16 @@ void irc_update_frame_level_info(rate_control_api_t *ps_rate_control_api,
{
u1_is_scd = 0;
}
+ /* For frames that contain plane areas that differ from reference frames, encoder
+ * might generate more INTRA MBs because of lower SAD compared with INTER MBs.
+ * Such cases should not be treated as scene change.
+ * For such frames bits consumed will be lesser than the allocated bits.
+ */
+ if(i4_total_frame_bits < ps_rate_control_api->i4_prev_frm_est_bits)
+ {
+ u1_is_scd = 0;
+ }
+
trace_printf((const WORD8*)"i4_total_frame_bits %d\n", i4_total_frame_bits);
if(!i4_is_it_a_skip && !i4_is_pic_handling_done)
diff --git a/test/decoder.mk b/test/decoder.mk
index 1a49a92..0dda948 100644
--- a/test/decoder.mk
+++ b/test/decoder.mk
@@ -9,5 +9,5 @@ LOCAL_CFLAGS := -DPROFILE_ENABLE -DARM -DMD5_DISABLE -fPIC
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/
LOCAL_SRC_FILES := decoder/main.c
LOCAL_STATIC_LIBRARIES := libavcdec
-
+LOCAL_SHARED_LIBRARIES := liblog
include $(BUILD_EXECUTABLE)
diff --git a/test/decoder/dec.cfg b/test/decoder/dec.cfg
new file mode 100644
index 0000000..f452ea1
--- /dev/null
+++ b/test/decoder/dec.cfg
@@ -0,0 +1,12 @@
+--input input.h264
+--save_output 0
+--num_frames -1
+--output out.yuv
+--chroma_format YUV_420P
+--share_display_buf 0
+--num_cores 3
+--loopback 0
+--display 0
+--fps 59.94
+--arch ARM_A9Q
+--soc GENERIC
diff --git a/test/encoder/enc.cfg b/test/encoder/enc.cfg
new file mode 100644
index 0000000..ba62199
--- /dev/null
+++ b/test/encoder/enc.cfg
@@ -0,0 +1,47 @@
+--input input_qvga.yuv
+--output output.264
+--recon recon.yuv
+--chksum chksum.md5
+--chksum_enable 0
+--recon_enable 0
+--input_chroma_format YUV_420P
+--recon_chroma_format YUV_420P
+--qp_i 24
+--qp_p 27
+--qp_b 29
+--qp_i_min 4
+--qp_i_max 49
+--qp_p_min 4
+--qp_p_max 49
+--qp_b_min 4
+--qp_b_max 49
+--max_wd 1920
+--max_ht 1080
+--psnr 0
+--slice 0
+--slice_param 0
+--num_frames -1
+--search_range_x 16
+--search_range_y 16
+--width 320
+--height 240
+--src_framerate 30
+--tgt_framerate 30
+--num_cores 4
+--rc 2
+--bitrate 256000
+--vbv_delay 1000
+--disable_deblock_level 0
+--intra_4x4_enable 1
+--i_interval 1000
+--me_speed 100
+--hpel 1
+--fast_sad 0
+--speed NORMAL
+--max_level 41
+--idr_interval 1000
+--entropy 0
+--bframes 0
+--adaptive_intra_refresh 0
+--air_refresh_period 30
+