aboutsummaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
authorJonathan Wright <jonathan.wright@arm.com>2018-06-29 13:58:50 +0100
committerJonathan Wright <jonathan.wright@arm.com>2019-07-31 14:01:48 +0100
commit81aef9014e059f9bf4838db49ba4fd47fd9d14ce (patch)
treed6506ae61ec982cc363b7b01b6d8c719440f594d /simd
parent6f0254d1546343e6d3d008488a1dd465d3f92bd0 (diff)
downloadlibjpeg-turbo-81aef9014e059f9bf4838db49ba4fd47fd9d14ce.tar.gz
Add Arm NEON implementation of h2v2_upsample
Adds an Arm NEON intrinsics implementation of h2v2_upsample. This is new code, there was no previous NEON assembly implementation for either AArch32 or AArch64. Bug: 922430 Change-Id: Iaed1202e0c77e4d9e6b5dbd522d2fb0c851878b2
Diffstat (limited to 'simd')
-rw-r--r--simd/arm/arm/jsimd.c13
-rw-r--r--simd/arm/arm64/jsimd.c13
-rw-r--r--simd/arm/common/jdsample-neon.c42
-rw-r--r--simd/jsimd.h3
4 files changed, 71 insertions, 0 deletions
diff --git a/simd/arm/arm/jsimd.c b/simd/arm/arm/jsimd.c
index 541cdfed..4bf39319 100644
--- a/simd/arm/arm/jsimd.c
+++ b/simd/arm/arm/jsimd.c
@@ -322,6 +322,17 @@ jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
GLOBAL(int)
jsimd_can_h2v2_upsample(void)
{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+
+ if (simd_support & JSIMD_NEON)
+ return 1;
+
return 0;
}
@@ -346,6 +357,8 @@ GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
+ input_data, output_data_ptr);
}
GLOBAL(void)
diff --git a/simd/arm/arm64/jsimd.c b/simd/arm/arm64/jsimd.c
index 5481dad7..7ce66470 100644
--- a/simd/arm/arm64/jsimd.c
+++ b/simd/arm/arm64/jsimd.c
@@ -397,6 +397,17 @@ jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
GLOBAL(int)
jsimd_can_h2v2_upsample(void)
{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+
+ if (simd_support & JSIMD_NEON)
+ return 1;
+
return 0;
}
@@ -421,6 +432,8 @@ GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
+ input_data, output_data_ptr);
}
GLOBAL(void)
diff --git a/simd/arm/common/jdsample-neon.c b/simd/arm/common/jdsample-neon.c
index 8f4fd7c5..e4f5129d 100644
--- a/simd/arm/common/jdsample-neon.c
+++ b/simd/arm/common/jdsample-neon.c
@@ -513,3 +513,45 @@ void jsimd_h2v1_upsample_neon(int max_v_samp_factor,
}
}
}
+
+
+/*
+ * The diagram below shows the operation of h2v2 (simple) upsampling. Each
+ * sample in the row is duplicated to form two output pixel channel values.
+ * This horizontally-upsampled row is then also duplicated.
+ *
+ * p0 p1 p2 p3
+ * +-----+-----+ +-----+-----+-----+-----+
+ * | s0 | s1 | -> | s0 | s0 | s1 | s1 |
+ * +-----+-----+ +-----+-----+-----+-----+
+ * | s0 | s0 | s1 | s1 |
+ * +-----+-----+-----+-----+
+ */
+
+void jsimd_h2v2_upsample_neon(int max_v_samp_factor,
+ JDIMENSION output_width,
+ JSAMPARRAY input_data,
+ JSAMPARRAY *output_data_ptr)
+{
+ JSAMPARRAY output_data = *output_data_ptr;
+ JSAMPROW inptr, outptr0, outptr1;
+
+ for (int inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) {
+ inptr = input_data[inrow];
+ outptr0 = output_data[outrow++];
+ outptr1 = output_data[outrow++];
+
+ for (unsigned colctr = 0; 2 * colctr < output_width; colctr += 16) {
+ uint8x16_t samples = vld1q_u8(inptr + colctr);
+ /* Duplicate the samples - the store interleaves them to produce the */
+ /* pattern in the diagram above. */
+ uint8x16x2_t output_pixels = { samples, samples };
+ /* Store pixel values to memory for both output rows. */
+ /* Due to the way sample buffers are allocated, we don't need to worry */
+ /* about tail cases when output_width is not a multiple of 32. */
+ /* See "Creation of 2-D sample arrays" in jmemmgr.c for details. */
+ vst2q_u8(outptr0 + 2 * colctr, output_pixels);
+ vst2q_u8(outptr1 + 2 * colctr, output_pixels);
+ }
+ }
+}
diff --git a/simd/jsimd.h b/simd/jsimd.h
index 48726034..9b5781c2 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -565,6 +565,9 @@ EXTERN(void) jsimd_h2v2_upsample_avx2
EXTERN(void) jsimd_h2v1_upsample_neon
(int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h2v2_upsample_neon
+ (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+ JSAMPARRAY *output_data_ptr);
EXTERN(void) jsimd_h2v1_upsample_dspr2
(int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,