aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDRC <dcommander@users.sourceforge.net>2014-05-15 18:26:01 +0000
committerDRC <dcommander@users.sourceforge.net>2014-05-15 18:26:01 +0000
commit1b3fd7eead3ae2c83f88df43078878f0883aeea1 (patch)
treef0d3e9cfbeb32da2030be33859e603e0c84d7018
parenta3c3bbc2f43074900fb4c5062c0e1b9286f113e7 (diff)
downloadlibjpeg-turbo-1b3fd7eead3ae2c83f88df43078878f0883aeea1.tar.gz
SIMD-accelerated NULL convert routine for MIPS DSPr2
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1304 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r--jccolor.c71
-rw-r--r--jsimd.h5
-rw-r--r--jsimd_none.c13
-rw-r--r--simd/jsimd.h5
-rw-r--r--simd/jsimd_mips.c26
-rw-r--r--simd/jsimd_mips_dspr2.S104
6 files changed, 201 insertions, 23 deletions
diff --git a/jccolor.c b/jccolor.c
index fe9422a9..f32dcfa1 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -587,19 +587,24 @@ jinit_color_converter (j_compress_ptr cinfo)
if (rgb_red[cinfo->in_color_space] == 0 &&
rgb_green[cinfo->in_color_space] == 1 &&
rgb_blue[cinfo->in_color_space] == 2 &&
- rgb_pixelsize[cinfo->in_color_space] == 3)
- cconvert->pub.color_convert = null_convert;
- else if (cinfo->in_color_space == JCS_RGB ||
- cinfo->in_color_space == JCS_EXT_RGB ||
- cinfo->in_color_space == JCS_EXT_RGBX ||
- cinfo->in_color_space == JCS_EXT_BGR ||
- cinfo->in_color_space == JCS_EXT_BGRX ||
- cinfo->in_color_space == JCS_EXT_XBGR ||
- cinfo->in_color_space == JCS_EXT_XRGB ||
- cinfo->in_color_space == JCS_EXT_RGBA ||
- cinfo->in_color_space == JCS_EXT_BGRA ||
- cinfo->in_color_space == JCS_EXT_ABGR ||
- cinfo->in_color_space == JCS_EXT_ARGB)
+ rgb_pixelsize[cinfo->in_color_space] == 3) {
+#if defined(__mips__)
+ if (jsimd_c_can_null_convert())
+ cconvert->pub.color_convert = jsimd_c_null_convert;
+ else
+#endif
+ cconvert->pub.color_convert = null_convert;
+ } else if (cinfo->in_color_space == JCS_RGB ||
+ cinfo->in_color_space == JCS_EXT_RGB ||
+ cinfo->in_color_space == JCS_EXT_RGBX ||
+ cinfo->in_color_space == JCS_EXT_BGR ||
+ cinfo->in_color_space == JCS_EXT_BGRX ||
+ cinfo->in_color_space == JCS_EXT_XBGR ||
+ cinfo->in_color_space == JCS_EXT_XRGB ||
+ cinfo->in_color_space == JCS_EXT_RGBA ||
+ cinfo->in_color_space == JCS_EXT_BGRA ||
+ cinfo->in_color_space == JCS_EXT_ABGR ||
+ cinfo->in_color_space == JCS_EXT_ARGB)
cconvert->pub.color_convert = rgb_rgb_convert;
else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -625,18 +630,28 @@ jinit_color_converter (j_compress_ptr cinfo)
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = rgb_ycc_convert;
}
- } else if (cinfo->in_color_space == JCS_YCbCr)
- cconvert->pub.color_convert = null_convert;
- else
+ } else if (cinfo->in_color_space == JCS_YCbCr) {
+#if defined(__mips__)
+ if (jsimd_c_can_null_convert())
+ cconvert->pub.color_convert = jsimd_c_null_convert;
+ else
+#endif
+ cconvert->pub.color_convert = null_convert;
+ } else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
case JCS_CMYK:
if (cinfo->num_components != 4)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
- if (cinfo->in_color_space == JCS_CMYK)
- cconvert->pub.color_convert = null_convert;
- else
+ if (cinfo->in_color_space == JCS_CMYK) {
+#if defined(__mips__)
+ if (jsimd_c_can_null_convert())
+ cconvert->pub.color_convert = jsimd_c_null_convert;
+ else
+#endif
+ cconvert->pub.color_convert = null_convert;
+ } else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
@@ -646,9 +661,14 @@ jinit_color_converter (j_compress_ptr cinfo)
if (cinfo->in_color_space == JCS_CMYK) {
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = cmyk_ycck_convert;
- } else if (cinfo->in_color_space == JCS_YCCK)
- cconvert->pub.color_convert = null_convert;
- else
+ } else if (cinfo->in_color_space == JCS_YCCK) {
+#if defined(__mips__)
+ if (jsimd_c_can_null_convert())
+ cconvert->pub.color_convert = jsimd_c_null_convert;
+ else
+#endif
+ cconvert->pub.color_convert = null_convert;
+ } else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
@@ -656,7 +676,12 @@ jinit_color_converter (j_compress_ptr cinfo)
if (cinfo->jpeg_color_space != cinfo->in_color_space ||
cinfo->num_components != cinfo->input_components)
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
- cconvert->pub.color_convert = null_convert;
+#if defined(__mips__)
+ if (jsimd_c_can_null_convert())
+ cconvert->pub.color_convert = jsimd_c_null_convert;
+ else
+#endif
+ cconvert->pub.color_convert = null_convert;
break;
}
}
diff --git a/jsimd.h b/jsimd.h
index 2817137d..2b683e8a 100644
--- a/jsimd.h
+++ b/jsimd.h
@@ -40,6 +40,7 @@
EXTERN(int) jsimd_can_rgb_ycc JPP((void));
EXTERN(int) jsimd_can_rgb_gray JPP((void));
EXTERN(int) jsimd_can_ycc_rgb JPP((void));
+EXTERN(int) jsimd_c_can_null_convert JPP((void));
EXTERN(void) jsimd_rgb_ycc_convert
JPP((j_compress_ptr cinfo,
@@ -53,6 +54,10 @@ EXTERN(void) jsimd_ycc_rgb_convert
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_c_null_convert
+ JPP((j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
EXTERN(int) jsimd_can_h2v2_downsample JPP((void));
EXTERN(int) jsimd_can_h2v1_downsample JPP((void));
diff --git a/jsimd_none.c b/jsimd_none.c
index a6e82759..ac126e01 100644
--- a/jsimd_none.c
+++ b/jsimd_none.c
@@ -36,6 +36,12 @@ jsimd_can_ycc_rgb (void)
return 0;
}
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+ return 0;
+}
+
GLOBAL(void)
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -57,6 +63,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
{
}
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+}
+
GLOBAL(int)
jsimd_can_h2v2_downsample (void)
{
diff --git a/simd/jsimd.h b/simd/jsimd.h
index 7067a2d7..504429cb 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -475,6 +475,11 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_c_null_convert_mips_dspr2
+ JPP((JDIMENSION img_width, JSAMPARRAY input_buf,
+ JSAMPIMAGE output_buf, JDIMENSION output_row,
+ int num_rows, int num_components));
+
/* SIMD Downsample */
EXTERN(void) jsimd_h2v2_downsample_mmx
JPP((JDIMENSION image_width, int max_v_samp_factor,
diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c
index aebd549a..62244ffb 100644
--- a/simd/jsimd_mips.c
+++ b/simd/jsimd_mips.c
@@ -139,6 +139,22 @@ jsimd_can_ycc_rgb (void)
return 0;
}
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
GLOBAL(void)
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -262,6 +278,16 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
input_row, output_buf, num_rows);
}
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_null_convert_compr_mips_dspr2 (cinfo->image_width, input_buf,
+ output_buf, output_row, num_rows, cinfo->num_components);
+}
+
GLOBAL(int)
jsimd_can_h2v2_downsample (void)
{
diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S
index fa8f0c23..532966a5 100644
--- a/simd/jsimd_mips_dspr2.S
+++ b/simd/jsimd_mips_dspr2.S
@@ -25,6 +25,110 @@
#include "jsimd_mips_dspr2_asm.h"
/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2)
+/*
+ * a0 - cinfo->image_width
+ * a1 - input_buf
+ * a2 - output_buf
+ * a3 - output_row
+ * 16(sp) - num_rows
+ * 20(sp) - cinfo->num_components
+ *
+ * Null conversion for compression
+ */
+
+ SAVE_REGS_ON_STACK 8, s0, s1
+
+ lw t9, 24(sp) // t9 = num_rows
+ lw s0, 28(sp) // s0 = cinfo->num_components
+ andi t0, a0, 3 // t0 = cinfo->image_width & 3
+ beqz t0, 4f // no residual
+ nop
+0:
+ addiu t9, t9, -1
+ bltz t9, 7f
+ li t1, 0
+1:
+ sll t3, t1, 2
+ lwx t5, t3(a2) // t5 = outptr = output_buf[ci]
+ lw t2, 0(a1) // t2 = inptr = *input_buf
+ sll t4, a3, 2
+ lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row]
+ addu t2, t2, t1
+ addu s1, t5, a0
+ addu t6, t5, t0
+2:
+ lbu t3, 0(t2)
+ addiu t5, t5, 1
+ sb t3, -1(t5)
+ bne t6, t5, 2b
+ addu t2, t2, s0
+3:
+ lbu t3, 0(t2)
+ addu t4, t2, s0
+ addu t7, t4, s0
+ addu t8, t7, s0
+ addu t2, t8, s0
+ lbu t4, 0(t4)
+ lbu t7, 0(t7)
+ lbu t8, 0(t8)
+ addiu t5, t5, 4
+ sb t3, -4(t5)
+ sb t4, -3(t5)
+ sb t7, -2(t5)
+ bne s1, t5, 3b
+ sb t8, -1(t5)
+ addiu t1, t1, 1
+ bne t1, s0, 1b
+ nop
+ addiu a1, a1, 4
+ bgez t9, 0b
+ addiu a3, a3, 1
+ b 7f
+ nop
+4:
+ addiu t9, t9, -1
+ bltz t9, 7f
+ li t1, 0
+5:
+ sll t3, t1, 2
+ lwx t5, t3(a2) // t5 = outptr = output_buf[ci]
+ lw t2, 0(a1) // t2 = inptr = *input_buf
+ sll t4, a3, 2
+ lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row]
+ addu t2, t2, t1
+ addu s1, t5, a0
+ addu t6, t5, t0
+6:
+ lbu t3, 0(t2)
+ addu t4, t2, s0
+ addu t7, t4, s0
+ addu t8, t7, s0
+ addu t2, t8, s0
+ lbu t4, 0(t4)
+ lbu t7, 0(t7)
+ lbu t8, 0(t8)
+ addiu t5, t5, 4
+ sb t3, -4(t5)
+ sb t4, -3(t5)
+ sb t7, -2(t5)
+ bne s1, t5, 6b
+ sb t8, -1(t5)
+ addiu t1, t1, 1
+ bne t1, s0, 5b
+ nop
+ addiu a1, a1, 4
+ bgez t9, 4b
+ addiu a3, a3, 1
+7:
+ RESTORE_REGS_FROM_STACK 8, s0, s1
+
+ j ra
+ nop
+
+END(jsimd_c_null_convert_mips_dspr2)
+
+/*****************************************************************************/
/*
* jsimd_extrgb_ycc_convert_mips_dspr2
* jsimd_extbgr_ycc_convert_mips_dspr2