diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2018-06-29 13:58:50 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2019-07-31 14:01:48 +0100 |
commit | 81aef9014e059f9bf4838db49ba4fd47fd9d14ce (patch) | |
tree | d6506ae61ec982cc363b7b01b6d8c719440f594d /simd | |
parent | 6f0254d1546343e6d3d008488a1dd465d3f92bd0 (diff) | |
download | libjpeg-turbo-81aef9014e059f9bf4838db49ba4fd47fd9d14ce.tar.gz |
Add Arm NEON implementation of h2v2_upsample
Adds an Arm NEON intrinsics implementation of h2v2_upsample. This is
new code, there was no previous NEON assembly implementation for
either AArch32 or AArch64.
Bug: 922430
Change-Id: Iaed1202e0c77e4d9e6b5dbd522d2fb0c851878b2
Diffstat (limited to 'simd')
-rw-r--r-- | simd/arm/arm/jsimd.c | 13 | ||||
-rw-r--r-- | simd/arm/arm64/jsimd.c | 13 | ||||
-rw-r--r-- | simd/arm/common/jdsample-neon.c | 42 | ||||
-rw-r--r-- | simd/jsimd.h | 3 |
4 files changed, 71 insertions, 0 deletions
diff --git a/simd/arm/arm/jsimd.c b/simd/arm/arm/jsimd.c index 541cdfed..4bf39319 100644 --- a/simd/arm/arm/jsimd.c +++ b/simd/arm/arm/jsimd.c @@ -322,6 +322,17 @@ jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, GLOBAL(int) jsimd_can_h2v2_upsample(void) { + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_NEON) + return 1; + return 0; } @@ -346,6 +357,8 @@ GLOBAL(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); } GLOBAL(void) diff --git a/simd/arm/arm64/jsimd.c b/simd/arm/arm64/jsimd.c index 5481dad7..7ce66470 100644 --- a/simd/arm/arm64/jsimd.c +++ b/simd/arm/arm64/jsimd.c @@ -397,6 +397,17 @@ jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, GLOBAL(int) jsimd_can_h2v2_upsample(void) { + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_NEON) + return 1; + return 0; } @@ -421,6 +432,8 @@ GLOBAL(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); } GLOBAL(void) diff --git a/simd/arm/common/jdsample-neon.c b/simd/arm/common/jdsample-neon.c index 8f4fd7c5..e4f5129d 100644 --- a/simd/arm/common/jdsample-neon.c +++ b/simd/arm/common/jdsample-neon.c @@ -513,3 +513,45 @@ void jsimd_h2v1_upsample_neon(int max_v_samp_factor, } } } + + +/* + * The diagram below shows the operation of h2v2 (simple) upsampling. Each + * sample in the row is duplicated to form two output pixel channel values. + * This horizontally-upsampled row is then also duplicated. + * + * p0 p1 p2 p3 + * +-----+-----+ +-----+-----+-----+-----+ + * | s0 | s1 | -> | s0 | s0 | s1 | s1 | + * +-----+-----+ +-----+-----+-----+-----+ + * | s0 | s0 | s1 | s1 | + * +-----+-----+-----+-----+ + */ + +void jsimd_h2v2_upsample_neon(int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr, outptr0, outptr1; + + for (int inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + inptr = input_data[inrow]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + for (unsigned colctr = 0; 2 * colctr < output_width; colctr += 16) { + uint8x16_t samples = vld1q_u8(inptr + colctr); + /* Duplicate the samples - the store interleaves them to produce the */ + /* pattern in the diagram above. */ + uint8x16x2_t output_pixels = { samples, samples }; + /* Store pixel values to memory for both output rows. */ + /* Due to the way sample buffers are allocated, we don't need to worry */ + /* about tail cases when output_width is not a multiple of 32. */ + /* See "Creation of 2-D sample arrays" in jmemmgr.c for details. */ + vst2q_u8(outptr0 + 2 * colctr, output_pixels); + vst2q_u8(outptr1 + 2 * colctr, output_pixels); + } + } +} diff --git a/simd/jsimd.h b/simd/jsimd.h index 48726034..9b5781c2 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -565,6 +565,9 @@ EXTERN(void) jsimd_h2v2_upsample_avx2 EXTERN(void) jsimd_h2v1_upsample_neon (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_neon + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_upsample_dspr2 (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, |