aboutsummaryrefslogtreecommitdiff
path: root/jccolor.c
diff options
context:
space:
mode:
Diffstat (limited to 'jccolor.c')
-rw-r--r--jccolor.c68
1 files changed, 68 insertions, 0 deletions
diff --git a/jccolor.c b/jccolor.c
index 0a8a4b5..57a76c3 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -12,6 +12,8 @@
#include "jinclude.h"
#include "jpeglib.h"
+// this enables unrolling null_convert's loop, and reading/write ints for speed
+#define ENABLE_ANDROID_NULL_CONVERT
/* Private subobject */
@@ -298,6 +300,36 @@ grayscale_convert (j_compress_ptr cinfo,
}
}
+#ifdef ENABLE_ANDROID_NULL_CONVERT
+
+typedef unsigned long UINT32;
+
+#define B0(n) ((n) & 0xFF)
+#define B1(n) (((n) >> 8) & 0xFF)
+#define B2(n) (((n) >> 16) & 0xFF)
+#define B3(n) ((n) >> 24)
+
+#define PACK(a, b, c, d) ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24))
+
+static int ptr_is_quad(const void* p)
+{
+ return (((const char*)p - (const char*)0) & 3) == 0;
+}
+
+static void copyquads(const UINT32 in[], UINT32 out0[], UINT32 out1[], UINT32 out2[], int col4)
+{
+ do {
+ UINT32 src0 = *in++;
+ UINT32 src1 = *in++;
+ UINT32 src2 = *in++;
+ // LEndian
+ *out0++ = PACK(B0(src0), B3(src0), B2(src1), B1(src2));
+ *out1++ = PACK(B1(src0), B0(src1), B3(src1), B2(src2));
+ *out2++ = PACK(B2(src0), B1(src1), B0(src2), B3(src2));
+ } while (--col4 != 0);
+}
+
+#endif
/*
* Convert some rows of samples to the JPEG colorspace.
@@ -317,6 +349,42 @@ null_convert (j_compress_ptr cinfo,
int nc = cinfo->num_components;
JDIMENSION num_cols = cinfo->image_width;
+#ifdef ENABLE_ANDROID_NULL_CONVERT
+ if (1 == num_rows && 3 == nc && num_cols > 0) {
+ JSAMPROW inptr = *input_buf;
+ JSAMPROW outptr0 = output_buf[0][output_row];
+ JSAMPROW outptr1 = output_buf[1][output_row];
+ JSAMPROW outptr2 = output_buf[2][output_row];
+
+ int col = num_cols;
+ int col4 = col >> 2;
+ if (col4 > 0 && ptr_is_quad(inptr) && ptr_is_quad(outptr0) &&
+ ptr_is_quad(outptr1) && ptr_is_quad(outptr2)) {
+
+ const UINT32* in = (const UINT32*)inptr;
+ UINT32* out0 = (UINT32*)outptr0;
+ UINT32* out1 = (UINT32*)outptr1;
+ UINT32* out2 = (UINT32*)outptr2;
+ copyquads(in, out0, out1, out2, col4);
+ col &= 3;
+ if (0 == col)
+ return;
+ col4 <<= 2;
+ inptr += col4 * 3; /* we read this 3 times per in copyquads */
+ outptr0 += col4;
+ outptr1 += col4;
+ outptr2 += col4;
+ /* fall through to while-loop */
+ }
+ do {
+ *outptr0++ = *inptr++;
+ *outptr1++ = *inptr++;
+ *outptr2++ = *inptr++;
+ } while (--col != 0);
+ return;
+ }
+SLOW:
+#endif
while (--num_rows >= 0) {
/* It seems fastest to make a separate pass for each component. */
for (ci = 0; ci < nc; ci++) {