aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDRC <dcommander@users.sourceforge.net>2015-01-20 10:33:32 +0000
committerDRC <dcommander@users.sourceforge.net>2015-01-20 10:33:32 +0000
commit0e94025ac72a481ca822b9c876dcada6db977d8b (patch)
tree7a7268bc915487a10c06f72430ae2bb793318168
parentc60d662ee469712047b4159f829b8d7c886802bd (diff)
downloadlibjpeg-turbo-0e94025ac72a481ca822b9c876dcada6db977d8b.tar.gz
Introduce fast paths to speed up NULL color conversion somewhat, particularly when using 64-bit code; on the decompression side, the "slow path" also now use an approach similar to that of the compression side (with the component loop outside of the column loop rather than inside.) This is faster when using 32-bit code.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.4.x@1521 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r--ChangeLog.txt5
-rw-r--r--jccolor.c52
-rw-r--r--jdcolor.c54
3 files changed, 88 insertions, 23 deletions
diff --git a/ChangeLog.txt b/ChangeLog.txt
index 885a3268..5e05a384 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -22,6 +22,11 @@ See the comments in Makefile.am for information on how to re-enable the tests
and to specify an expected result for them based on the particulars of your
platform.
+[4] The NULL color conversion routines have been significantly optimized,
+which speeds up the compression of RGB and CMYK JPEGs by 5-20% when using
+64-bit code and 0-3% when using 32-bit code, and the decompression of those
+images by 10-30% when using 64-bit code and 3-12% when using 32-bit code.
+
1.4.0
=====
diff --git a/jccolor.c b/jccolor.c
index 4be75f71..34ea23b8 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -5,7 +5,7 @@
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2012, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015 D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California
* For conditions of distribution and use, see the accompanying README file.
*
@@ -464,24 +464,54 @@ null_convert (j_compress_ptr cinfo,
JDIMENSION output_row, int num_rows)
{
register JSAMPROW inptr;
- register JSAMPROW outptr;
+ register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
register JDIMENSION col;
register int ci;
int nc = cinfo->num_components;
JDIMENSION num_cols = cinfo->image_width;
- while (--num_rows >= 0) {
- /* It seems fastest to make a separate pass for each component. */
- for (ci = 0; ci < nc; ci++) {
- inptr = *input_buf;
- outptr = output_buf[ci][output_row];
+ if (nc == 3) {
+ while (--num_rows >= 0) {
+ inptr = *input_buf++;
+ outptr0 = output_buf[0][output_row];
+ outptr1 = output_buf[1][output_row];
+ outptr2 = output_buf[2][output_row];
+ output_row++;
for (col = 0; col < num_cols; col++) {
- outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
- inptr += nc;
+ outptr0[col] = *inptr++;
+ outptr1[col] = *inptr++;
+ outptr2[col] = *inptr++;
}
}
- input_buf++;
- output_row++;
+ } else if (nc == 4) {
+ while (--num_rows >= 0) {
+ inptr = *input_buf++;
+ outptr0 = output_buf[0][output_row];
+ outptr1 = output_buf[1][output_row];
+ outptr2 = output_buf[2][output_row];
+ outptr3 = output_buf[3][output_row];
+ output_row++;
+ for (col = 0; col < num_cols; col++) {
+ outptr0[col] = *inptr++;
+ outptr1[col] = *inptr++;
+ outptr2[col] = *inptr++;
+ outptr3[col] = *inptr++;
+ }
+ }
+ } else {
+ while (--num_rows >= 0) {
+ /* It seems fastest to make a separate pass for each component. */
+ for (ci = 0; ci < nc; ci++) {
+ inptr = *input_buf;
+ outptr = output_buf[ci][output_row];
+ for (col = 0; col < num_cols; col++) {
+ outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+ inptr += nc;
+ }
+ }
+ input_buf++;
+ output_row++;
+ }
}
}
diff --git a/jdcolor.c b/jdcolor.c
index 779fa51f..38db90f2 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -6,7 +6,7 @@
* Modified 2011 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011-2012, 2014, D. R. Commander.
+ * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander.
* Copyright (C) 2013, Linaro Limited.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -364,23 +364,53 @@ null_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
- register JSAMPROW inptr, outptr;
- register JDIMENSION count;
+ register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
+ register JDIMENSION col;
register int num_components = cinfo->num_components;
JDIMENSION num_cols = cinfo->output_width;
int ci;
- while (--num_rows >= 0) {
- for (ci = 0; ci < num_components; ci++) {
- inptr = input_buf[ci][input_row];
- outptr = output_buf[0] + ci;
- for (count = num_cols; count > 0; count--) {
- *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */
- outptr += num_components;
+ if (num_components == 3) {
+ while (--num_rows >= 0) {
+ inptr0 = input_buf[0][input_row];
+ inptr1 = input_buf[1][input_row];
+ inptr2 = input_buf[2][input_row];
+ input_row++;
+ outptr = *output_buf++;
+ for (col = 0; col < num_cols; col++) {
+ *outptr++ = inptr0[col];
+ *outptr++ = inptr1[col];
+ *outptr++ = inptr2[col];
}
}
- input_row++;
- output_buf++;
+ } else if (num_components == 4) {
+ while (--num_rows >= 0) {
+ inptr0 = input_buf[0][input_row];
+ inptr1 = input_buf[1][input_row];
+ inptr2 = input_buf[2][input_row];
+ inptr3 = input_buf[3][input_row];
+ input_row++;
+ outptr = *output_buf++;
+ for (col = 0; col < num_cols; col++) {
+ *outptr++ = inptr0[col];
+ *outptr++ = inptr1[col];
+ *outptr++ = inptr2[col];
+ *outptr++ = inptr3[col];
+ }
+ }
+ } else {
+ while (--num_rows >= 0) {
+ for (ci = 0; ci < num_components; ci++) {
+ inptr = input_buf[ci][input_row];
+ outptr = *output_buf;
+ for (col = 0; col < num_cols; col++) {
+ outptr[ci] = inptr[col];
+ outptr += num_components;
+ }
+ }
+ output_buf++;
+ input_row++;
+ }
}
}