From 6acb36d25de9b2ac19aa4c34d6821c42e9a84c00 Mon Sep 17 00:00:00 2001 From: Saurabh Sood Date: Fri, 10 May 2019 14:43:15 +0530 Subject: Add push-pop for Neon D8-D15 registers Bug: 68320413 Test: hevcdec with testbench updates to validate D8-D15 Change-Id: Ib62e68b69b016b1ded12cb00d1bb580de8a744c2 --- decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s | 19 +++++++++---------- decoder/arm/ihevcd_itrans_recon_dc_chroma.s | 7 ++++--- decoder/arm/ihevcd_itrans_recon_dc_luma.s | 7 ++++--- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s b/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s index a9a75cb..caf7123 100644 --- a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s +++ b/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s @@ -68,13 +68,13 @@ @* * @* Register Usage : R0 - R14 * @* * -@* Stack Usage : 40 Bytes * +@* Stack Usage : 104 Bytes * @* * @* Interruptibility : Interruptible * @* * @* Known Limitations * @* Assumptions: Image Width: Assumed to be multiple of 16 and * -@* greater than or equal to 16 * +@* greater than or equal to 16 * @* Image Height: Assumed to be even. * @* * @* Revision History : * @@ -82,6 +82,7 @@ @* 07 06 2010 Varshita Draft * @* 07 06 2010 Naveen Kr T Completed * @* 05 08 2013 Naveen K P Modified for HEVC * +@* 30 10 2018 Saurabh Sood Store D registers to stack * @*****************************************************************************/ .global ihevcd_fmt_conv_420sp_to_rgba8888_a9q .type ihevcd_fmt_conv_420sp_to_rgba8888_a9q, function @@ -89,7 +90,7 @@ ihevcd_fmt_conv_420sp_to_rgba8888_a9q: @// push the registers on the stack STMFD SP!,{R4-R12,LR} - + VPUSH {d8-d15} @//R0 - Y PTR @//R1 - UV PTR @@ -131,12 +132,12 @@ ihevcd_fmt_conv_420sp_to_rgba8888_a9q: @//D0 HAS C1-C2-C3-C4 @// load other parameters from stack - LDR R5,[sp,#40] + LDR R5,[sp,#104] @LDR R4,[sp,#44] - LDR R6,[sp,#44] - LDR R7,[sp,#48] + LDR R6,[sp,#108] + LDR R7,[sp,#112] @LDR R8,[sp,#52] - LDR R9,[sp,#52] + LDR R9,[sp,#116] @// calculate offsets, offset = stride - width SUB R10,R6,R3 @// luma offset @@ -445,10 +446,8 @@ LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP: BNE LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP @//POP THE REGISTERS + VPOP {d8-d15} LDMFD SP!,{R4-R12,PC} - - .section .note.GNU-stack,"",%progbits - diff --git a/decoder/arm/ihevcd_itrans_recon_dc_chroma.s b/decoder/arm/ihevcd_itrans_recon_dc_chroma.s index 6732ce0..9184b80 100644 --- a/decoder/arm/ihevcd_itrans_recon_dc_chroma.s +++ b/decoder/arm/ihevcd_itrans_recon_dc_chroma.s @@ -58,9 +58,9 @@ ihevcd_itrans_recon_dc_chroma_a9q: push {r0-r11,lr} - ldr r4,[sp,#0x34] @loads log2_trans_size - ldr r5,[sp,#0x38] @ loads i2_coeff_value - + vpush {d8-d15} + ldr r4,[sp,#0x74] @loads log2_trans_size + ldr r5,[sp,#0x78] @ loads i2_coeff_value mov r10,#1 lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ mov r6,#64 @ 1 << (shift1 - 1)@ @@ -188,6 +188,7 @@ col_loop_4chroma: vst1.u32 {d8},[r1] end_loops_chroma: + vpop {d8-d15} pop {r0-r11,pc} diff --git a/decoder/arm/ihevcd_itrans_recon_dc_luma.s b/decoder/arm/ihevcd_itrans_recon_dc_luma.s index 8aee84c..da1f1ad 100644 --- a/decoder/arm/ihevcd_itrans_recon_dc_luma.s +++ b/decoder/arm/ihevcd_itrans_recon_dc_luma.s @@ -59,9 +59,9 @@ ihevcd_itrans_recon_dc_luma_a9q: push {r0-r11,lr} - ldr r4,[sp,#0x34] @loads log2_trans_size - ldr r5,[sp,#0x38] @ loads i2_coeff_value - + vpush {d8-d15} + ldr r4,[sp,#0x74] @loads log2_trans_size + ldr r5,[sp,#0x78] @ loads i2_coeff_value mov r10,#1 lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ mov r6,#64 @ 1 << (shift1 - 1)@ @@ -182,6 +182,7 @@ col_loop_4: vst1.u32 {d5[0]},[r1] end_loops: + vpop {d8-d15} pop {r0-r11,pc} -- cgit v1.2.3