diff options
author | DRC <dcommander@users.sourceforge.net> | 2014-05-15 18:26:01 +0000 |
---|---|---|
committer | DRC <dcommander@users.sourceforge.net> | 2014-05-15 18:26:01 +0000 |
commit | 1b3fd7eead3ae2c83f88df43078878f0883aeea1 (patch) | |
tree | f0d3e9cfbeb32da2030be33859e603e0c84d7018 | |
parent | a3c3bbc2f43074900fb4c5062c0e1b9286f113e7 (diff) | |
download | libjpeg-turbo-1b3fd7eead3ae2c83f88df43078878f0883aeea1.tar.gz |
SIMD-accelerated NULL convert routine for MIPS DSPr2
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1304 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r-- | jccolor.c | 71 | ||||
-rw-r--r-- | jsimd.h | 5 | ||||
-rw-r--r-- | jsimd_none.c | 13 | ||||
-rw-r--r-- | simd/jsimd.h | 5 | ||||
-rw-r--r-- | simd/jsimd_mips.c | 26 | ||||
-rw-r--r-- | simd/jsimd_mips_dspr2.S | 104 |
6 files changed, 201 insertions, 23 deletions
@@ -587,19 +587,24 @@ jinit_color_converter (j_compress_ptr cinfo) if (rgb_red[cinfo->in_color_space] == 0 && rgb_green[cinfo->in_color_space] == 1 && rgb_blue[cinfo->in_color_space] == 2 && - rgb_pixelsize[cinfo->in_color_space] == 3) - cconvert->pub.color_convert = null_convert; - else if (cinfo->in_color_space == JCS_RGB || - cinfo->in_color_space == JCS_EXT_RGB || - cinfo->in_color_space == JCS_EXT_RGBX || - cinfo->in_color_space == JCS_EXT_BGR || - cinfo->in_color_space == JCS_EXT_BGRX || - cinfo->in_color_space == JCS_EXT_XBGR || - cinfo->in_color_space == JCS_EXT_XRGB || - cinfo->in_color_space == JCS_EXT_RGBA || - cinfo->in_color_space == JCS_EXT_BGRA || - cinfo->in_color_space == JCS_EXT_ABGR || - cinfo->in_color_space == JCS_EXT_ARGB) + rgb_pixelsize[cinfo->in_color_space] == 3) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else if (cinfo->in_color_space == JCS_RGB || + cinfo->in_color_space == JCS_EXT_RGB || + cinfo->in_color_space == JCS_EXT_RGBX || + cinfo->in_color_space == JCS_EXT_BGR || + cinfo->in_color_space == JCS_EXT_BGRX || + cinfo->in_color_space == JCS_EXT_XBGR || + cinfo->in_color_space == JCS_EXT_XRGB || + cinfo->in_color_space == JCS_EXT_RGBA || + cinfo->in_color_space == JCS_EXT_BGRA || + cinfo->in_color_space == JCS_EXT_ABGR || + cinfo->in_color_space == JCS_EXT_ARGB) cconvert->pub.color_convert = rgb_rgb_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); @@ -625,18 +630,28 @@ jinit_color_converter (j_compress_ptr cinfo) cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgb_ycc_convert; } - } else if (cinfo->in_color_space == JCS_YCbCr) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCbCr) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_CMYK: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == JCS_CMYK) - cconvert->pub.color_convert = null_convert; - else + if (cinfo->in_color_space == JCS_CMYK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -646,9 +661,14 @@ jinit_color_converter (j_compress_ptr cinfo) if (cinfo->in_color_space == JCS_CMYK) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = cmyk_ycck_convert; - } else if (cinfo->in_color_space == JCS_YCCK) - cconvert->pub.color_convert = null_convert; - else + } else if (cinfo->in_color_space == JCS_YCCK) { +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; + } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; @@ -656,7 +676,12 @@ jinit_color_converter (j_compress_ptr cinfo) if (cinfo->jpeg_color_space != cinfo->in_color_space || cinfo->num_components != cinfo->input_components) ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); - cconvert->pub.color_convert = null_convert; +#if defined(__mips__) + if (jsimd_c_can_null_convert()) + cconvert->pub.color_convert = jsimd_c_null_convert; + else +#endif + cconvert->pub.color_convert = null_convert; break; } } @@ -40,6 +40,7 @@ EXTERN(int) jsimd_can_rgb_ycc JPP((void)); EXTERN(int) jsimd_can_rgb_gray JPP((void)); EXTERN(int) jsimd_can_ycc_rgb JPP((void)); +EXTERN(int) jsimd_c_can_null_convert JPP((void)); EXTERN(void) jsimd_rgb_ycc_convert JPP((j_compress_ptr cinfo, @@ -53,6 +54,10 @@ EXTERN(void) jsimd_ycc_rgb_convert JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)); +EXTERN(void) jsimd_c_null_convert + JPP((j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); EXTERN(int) jsimd_can_h2v2_downsample JPP((void)); EXTERN(int) jsimd_can_h2v1_downsample JPP((void)); diff --git a/jsimd_none.c b/jsimd_none.c index a6e82759..ac126e01 100644 --- a/jsimd_none.c +++ b/jsimd_none.c @@ -36,6 +36,12 @@ jsimd_can_ycc_rgb (void) return 0; } +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + return 0; +} + GLOBAL(void) jsimd_rgb_ycc_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, @@ -57,6 +63,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, { } +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + GLOBAL(int) jsimd_can_h2v2_downsample (void) { diff --git a/simd/jsimd.h b/simd/jsimd.h index 7067a2d7..504429cb 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -475,6 +475,11 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2 JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)); +EXTERN(void) jsimd_c_null_convert_mips_dspr2 + JPP((JDIMENSION img_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows, int num_components)); + /* SIMD Downsample */ EXTERN(void) jsimd_h2v2_downsample_mmx JPP((JDIMENSION image_width, int max_v_samp_factor, diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c index aebd549a..62244ffb 100644 --- a/simd/jsimd_mips.c +++ b/simd/jsimd_mips.c @@ -139,6 +139,22 @@ jsimd_can_ycc_rgb (void) return 0; } +GLOBAL(int) +jsimd_c_can_null_convert (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + GLOBAL(void) jsimd_rgb_ycc_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, @@ -262,6 +278,16 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, input_row, output_buf, num_rows); } +GLOBAL(void) +jsimd_c_null_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_null_convert_compr_mips_dspr2 (cinfo->image_width, input_buf, + output_buf, output_row, num_rows, cinfo->num_components); +} + GLOBAL(int) jsimd_can_h2v2_downsample (void) { diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S index fa8f0c23..532966a5 100644 --- a/simd/jsimd_mips_dspr2.S +++ b/simd/jsimd_mips_dspr2.S @@ -25,6 +25,110 @@ #include "jsimd_mips_dspr2_asm.h" /*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + * 20(sp) - cinfo->num_components + * + * Null conversion for compression + */ + + SAVE_REGS_ON_STACK 8, s0, s1 + + lw t9, 24(sp) // t9 = num_rows + lw s0, 28(sp) // s0 = cinfo->num_components + andi t0, a0, 3 // t0 = cinfo->image_width & 3 + beqz t0, 4f // no residual + nop +0: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +1: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +2: + lbu t3, 0(t2) + addiu t5, t5, 1 + sb t3, -1(t5) + bne t6, t5, 2b + addu t2, t2, s0 +3: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 3b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 1b + nop + addiu a1, a1, 4 + bgez t9, 0b + addiu a3, a3, 1 + b 7f + nop +4: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +5: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +6: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 6b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 5b + nop + addiu a1, a1, 4 + bgez t9, 4b + addiu a3, a3, 1 +7: + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop + +END(jsimd_c_null_convert_mips_dspr2) + +/*****************************************************************************/ /* * jsimd_extrgb_ycc_convert_mips_dspr2 * jsimd_extbgr_ycc_convert_mips_dspr2 |