aboutsummaryrefslogtreecommitdiff
path: root/lib/src
diff options
context:
space:
mode:
authorDichenZhang1 <140119224+DichenZhang1@users.noreply.github.com>2024-06-03 11:31:09 -0700
committerGitHub <noreply@github.com>2024-06-03 11:31:09 -0700
commit0f97353d1dd4b474b1f5dc6fea6d3ed1ff768f41 (patch)
tree93e1e8e4340c504cc44c239f11fec39f8625e905 /lib/src
parent22bca349e37736960a5fcc4b917b695d36e716e2 (diff)
downloadlibultrahdr-0f97353d1dd4b474b1f5dc6fea6d3ed1ff768f41.tar.gz
Multi-threading for tone mapping algorithm (#158)
Diffstat (limited to 'lib/src')
-rw-r--r--lib/src/jpegr.cpp101
1 files changed, 61 insertions, 40 deletions
diff --git a/lib/src/jpegr.cpp b/lib/src/jpegr.cpp
index 0ce70b5..bf631c9 100644
--- a/lib/src/jpegr.cpp
+++ b/lib/src/jpegr.cpp
@@ -1759,7 +1759,6 @@ status_t JpegR::toneMap(jr_uncompressed_ptr src, jr_uncompressed_ptr dest,
dest->colorGamut = ULTRAHDR_COLORGAMUT_P3;
- size_t width = src->width;
size_t height = src->height;
ColorTransformFn hdrYuvToRgbFn = nullptr;
@@ -1810,52 +1809,74 @@ status_t JpegR::toneMap(jr_uncompressed_ptr src, jr_uncompressed_ptr dest,
uint8_t* luma_data = reinterpret_cast<uint8_t*>(dest->data);
uint8_t* chroma_data = reinterpret_cast<uint8_t*>(dest->chroma_data);
- float u_max = 0.0f;
-
- for (unsigned y = 0; y < height; y += 2) {
- for (unsigned x = 0; x < width; x += 2) {
- // We assume the input is P010, and output is YUV420
- float sdr_u_gamma = 0.0f;
- float sdr_v_gamma = 0.0f;
- for (int i = 0; i < 2; i++) {
- for (int j = 0; j < 2; j++) {
- Color hdr_yuv_gamma = getP010Pixel(src, x + j, y + i);
- Color hdr_rgb_gamma = hdrYuvToRgbFn(hdr_yuv_gamma);
-
- Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
-
- GlobalTonemapOutputs tonemap_outputs =
- hlgGlobalTonemap({hdr_rgb.r, hdr_rgb.g, hdr_rgb.b}, kHlgHeadroom);
- Color sdr_rgb_linear_bt2100 = {{{tonemap_outputs.rgb_out[0], tonemap_outputs.rgb_out[1],
- tonemap_outputs.rgb_out[2]}}};
- Color sdr_rgb = hdrGamutConversionFn(sdr_rgb_linear_bt2100);
-
- // Hard clip out-of-gamut values;
- sdr_rgb = clampPixelFloat(sdr_rgb);
-
- Color sdr_rgb_gamma = srgbOetf(sdr_rgb);
- Color sdr_yuv_gamma = p3RgbToYuv(sdr_rgb_gamma);
-
- sdr_yuv_gamma += {{{0.0f, 0.5f, 0.5f}}};
+ const int threads = (std::min)(GetCPUCoreCount(), 4);
+ size_t rowStep = threads == 1 ? height : kJobSzInRows;
+ JobQueue jobQueue;
+ std::function<void()> toneMapInternal;
- if (u_max < hdr_yuv_gamma.u) {
- u_max = hdr_yuv_gamma.u;
+ toneMapInternal = [src, dest, luma_data, chroma_data, hdrInvOetf, hdrGamutConversionFn,
+ hdrYuvToRgbFn, luma_stride, chroma_stride, &jobQueue]() -> void {
+ size_t rowStart, rowEnd;
+ while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+ for (size_t y = rowStart; y < rowEnd; y += 2) {
+ for (size_t x = 0; x < dest->width; x += 2) {
+ // We assume the input is P010, and output is YUV420
+ float sdr_u_gamma = 0.0f;
+ float sdr_v_gamma = 0.0f;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ Color hdr_yuv_gamma = getP010Pixel(src, x + j, y + i);
+ Color hdr_rgb_gamma = hdrYuvToRgbFn(hdr_yuv_gamma);
+
+ Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
+
+ GlobalTonemapOutputs tonemap_outputs =
+ hlgGlobalTonemap({hdr_rgb.r, hdr_rgb.g, hdr_rgb.b}, kHlgHeadroom);
+ Color sdr_rgb_linear_bt2100 = {{{tonemap_outputs.rgb_out[0],
+ tonemap_outputs.rgb_out[1],
+ tonemap_outputs.rgb_out[2]}}};
+ Color sdr_rgb = hdrGamutConversionFn(sdr_rgb_linear_bt2100);
+
+ // Hard clip out-of-gamut values;
+ sdr_rgb = clampPixelFloat(sdr_rgb);
+
+ Color sdr_rgb_gamma = srgbOetf(sdr_rgb);
+ Color sdr_yuv_gamma = srgbRgbToYuv(sdr_rgb_gamma);
+
+ sdr_yuv_gamma += {{{0.0f, 0.5f, 0.5f}}};
+
+ size_t out_y_idx = (y + i) * luma_stride + x + j;
+ luma_data[out_y_idx] = ScaleTo8Bit(sdr_yuv_gamma.y);
+
+ sdr_u_gamma += sdr_yuv_gamma.u * 0.25f;
+ sdr_v_gamma += sdr_yuv_gamma.v * 0.25f;
+ }
}
-
- size_t out_y_idx = (y + i) * luma_stride + x + j;
- luma_data[out_y_idx] = ScaleTo8Bit(sdr_yuv_gamma.y);
-
- sdr_u_gamma += sdr_yuv_gamma.u * 0.25f;
- sdr_v_gamma += sdr_yuv_gamma.v * 0.25f;
+ size_t out_chroma_idx = x / 2 + (y / 2) * chroma_stride;
+ size_t offset_cr = chroma_stride * (dest->height / 2);
+ chroma_data[out_chroma_idx] = ScaleTo8Bit(sdr_u_gamma);
+ chroma_data[out_chroma_idx + offset_cr] = ScaleTo8Bit(sdr_v_gamma);
}
}
- size_t out_chroma_idx = x / 2 + (y / 2) * chroma_stride;
- size_t offset_cr = chroma_stride * (dest->height / 2);
- chroma_data[out_chroma_idx] = ScaleTo8Bit(sdr_u_gamma);
- chroma_data[out_chroma_idx + offset_cr] = ScaleTo8Bit(sdr_v_gamma);
}
+ };
+
+ // tone map
+ std::vector<std::thread> workers;
+ for (int th = 0; th < threads - 1; th++) {
+ workers.push_back(std::thread(toneMapInternal));
}
+ rowStep = (threads == 1 ? height : kJobSzInRows) / kMapDimensionScaleFactor;
+ for (size_t rowStart = 0; rowStart < height;) {
+ size_t rowEnd = (std::min)(rowStart + rowStep, height);
+ jobQueue.enqueueJob(rowStart, rowEnd);
+ rowStart = rowEnd;
+ }
+ jobQueue.markQueueForEnd();
+ toneMapInternal();
+ std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
+
return JPEGR_NO_ERROR;
}