diff options
Diffstat (limited to 'src/common_audio/signal_processing/spl_sqrt_floor.s')
-rw-r--r-- | src/common_audio/signal_processing/spl_sqrt_floor.s | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.s b/src/common_audio/signal_processing/spl_sqrt_floor.s new file mode 100644 index 0000000000..425993dfa6 --- /dev/null +++ b/src/common_audio/signal_processing/spl_sqrt_floor.s @@ -0,0 +1,88 @@ +@ Written by Wilco Dijkstra, 1996. +@ Refer to NOTICE file at the root of git project. +@ +@ Minor modifications in code style for WebRTC, 2012. +@ Output is bit-exact with the reference C code in spl_sqrt_floor.c. + +@ Input : r0 32 bit unsigned integer +@ Output: r0 = INT (SQRT (r0)), precision is 16 bits +@ Registers touched: r1, r2 + +.global WebRtcSpl_SqrtFloor + +.align 2 +WebRtcSpl_SqrtFloor: +.fnstart + mov r1, #3 << 30 + mov r2, #1 << 30 + + @ unroll for i = 0 .. 15 + + cmp r0, r2, ror #2 * 0 + subhs r0, r0, r2, ror #2 * 0 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 1 + subhs r0, r0, r2, ror #2 * 1 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 2 + subhs r0, r0, r2, ror #2 * 2 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 3 + subhs r0, r0, r2, ror #2 * 3 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 4 + subhs r0, r0, r2, ror #2 * 4 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 5 + subhs r0, r0, r2, ror #2 * 5 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 6 + subhs r0, r0, r2, ror #2 * 6 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 7 + subhs r0, r0, r2, ror #2 * 7 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 8 + subhs r0, r0, r2, ror #2 * 8 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 9 + subhs r0, r0, r2, ror #2 * 9 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 10 + subhs r0, r0, r2, ror #2 * 10 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 11 + subhs r0, r0, r2, ror #2 * 11 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 12 + subhs r0, r0, r2, ror #2 * 12 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 13 + subhs r0, r0, r2, ror #2 * 13 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 14 + subhs r0, r0, r2, ror #2 * 14 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 15 + subhs r0, r0, r2, ror #2 * 15 + adc r2, r1, r2, lsl #1 + + bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1 + bx lr + +.fnend |