diff options
author | DRC <dcommander@users.sourceforge.net> | 2015-01-20 10:33:32 +0000 |
---|---|---|
committer | DRC <dcommander@users.sourceforge.net> | 2015-01-20 10:33:32 +0000 |
commit | 0e94025ac72a481ca822b9c876dcada6db977d8b (patch) | |
tree | 7a7268bc915487a10c06f72430ae2bb793318168 | |
parent | c60d662ee469712047b4159f829b8d7c886802bd (diff) | |
download | libjpeg-turbo-0e94025ac72a481ca822b9c876dcada6db977d8b.tar.gz |
Introduce fast paths to speed up NULL color conversion somewhat, particularly when using 64-bit code; on the decompression side, the "slow path" also now use an approach similar to that of the compression side (with the component loop outside of the column loop rather than inside.) This is faster when using 32-bit code.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.4.x@1521 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r-- | ChangeLog.txt | 5 | ||||
-rw-r--r-- | jccolor.c | 52 | ||||
-rw-r--r-- | jdcolor.c | 54 |
3 files changed, 88 insertions, 23 deletions
diff --git a/ChangeLog.txt b/ChangeLog.txt index 885a3268..5e05a384 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -22,6 +22,11 @@ See the comments in Makefile.am for information on how to re-enable the tests and to specify an expected result for them based on the particulars of your platform. +[4] The NULL color conversion routines have been significantly optimized, +which speeds up the compression of RGB and CMYK JPEGs by 5-20% when using +64-bit code and 0-3% when using 32-bit code, and the decompression of those +images by 10-30% when using 64-bit code and 3-12% when using 32-bit code. + 1.4.0 ===== @@ -5,7 +5,7 @@ * Copyright (C) 1991-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2009-2012, D. R. Commander. + * Copyright (C) 2009-2012, 2015 D. R. Commander. * Copyright (C) 2014, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * @@ -464,24 +464,54 @@ null_convert (j_compress_ptr cinfo, JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; - register JSAMPROW outptr; + register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; register int ci; int nc = cinfo->num_components; JDIMENSION num_cols = cinfo->image_width; - while (--num_rows >= 0) { - /* It seems fastest to make a separate pass for each component. */ - for (ci = 0; ci < nc; ci++) { - inptr = *input_buf; - outptr = output_buf[ci][output_row]; + if (nc == 3) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ - inptr += nc; + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; } } - input_buf++; - output_row++; + } else if (nc == 4) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + outptr3 = output_buf[3][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; + outptr3[col] = *inptr++; + } + } + } else { + while (--num_rows >= 0) { + /* It seems fastest to make a separate pass for each component. */ + for (ci = 0; ci < nc; ci++) { + inptr = *input_buf; + outptr = output_buf[ci][output_row]; + for (col = 0; col < num_cols; col++) { + outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + inptr += nc; + } + } + input_buf++; + output_row++; + } } } @@ -6,7 +6,7 @@ * Modified 2011 by Guido Vollbeding. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2009, 2011-2012, 2014, D. R. Commander. + * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander. * Copyright (C) 2013, Linaro Limited. * For conditions of distribution and use, see the accompanying README file. * @@ -364,23 +364,53 @@ null_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { - register JSAMPROW inptr, outptr; - register JDIMENSION count; + register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr; + register JDIMENSION col; register int num_components = cinfo->num_components; JDIMENSION num_cols = cinfo->output_width; int ci; - while (--num_rows >= 0) { - for (ci = 0; ci < num_components; ci++) { - inptr = input_buf[ci][input_row]; - outptr = output_buf[0] + ci; - for (count = num_cols; count > 0; count--) { - *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */ - outptr += num_components; + if (num_components == 3) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; } } - input_row++; - output_buf++; + } else if (num_components == 4) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + inptr3 = input_buf[3][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; + *outptr++ = inptr3[col]; + } + } + } else { + while (--num_rows >= 0) { + for (ci = 0; ci < num_components; ci++) { + inptr = input_buf[ci][input_row]; + outptr = *output_buf; + for (col = 0; col < num_cols; col++) { + outptr[ci] = inptr[col]; + outptr += num_components; + } + } + output_buf++; + input_row++; + } } } |