diff options
author | DichenZhang1 <140119224+DichenZhang1@users.noreply.github.com> | 2024-06-03 11:31:09 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-03 11:31:09 -0700 |
commit | 0f97353d1dd4b474b1f5dc6fea6d3ed1ff768f41 (patch) | |
tree | 93e1e8e4340c504cc44c239f11fec39f8625e905 /lib/src | |
parent | 22bca349e37736960a5fcc4b917b695d36e716e2 (diff) | |
download | libultrahdr-0f97353d1dd4b474b1f5dc6fea6d3ed1ff768f41.tar.gz |
Multi-threading for tone mapping algorithm (#158)
Diffstat (limited to 'lib/src')
-rw-r--r-- | lib/src/jpegr.cpp | 101 |
1 files changed, 61 insertions, 40 deletions
diff --git a/lib/src/jpegr.cpp b/lib/src/jpegr.cpp index 0ce70b5..bf631c9 100644 --- a/lib/src/jpegr.cpp +++ b/lib/src/jpegr.cpp @@ -1759,7 +1759,6 @@ status_t JpegR::toneMap(jr_uncompressed_ptr src, jr_uncompressed_ptr dest, dest->colorGamut = ULTRAHDR_COLORGAMUT_P3; - size_t width = src->width; size_t height = src->height; ColorTransformFn hdrYuvToRgbFn = nullptr; @@ -1810,52 +1809,74 @@ status_t JpegR::toneMap(jr_uncompressed_ptr src, jr_uncompressed_ptr dest, uint8_t* luma_data = reinterpret_cast<uint8_t*>(dest->data); uint8_t* chroma_data = reinterpret_cast<uint8_t*>(dest->chroma_data); - float u_max = 0.0f; - - for (unsigned y = 0; y < height; y += 2) { - for (unsigned x = 0; x < width; x += 2) { - // We assume the input is P010, and output is YUV420 - float sdr_u_gamma = 0.0f; - float sdr_v_gamma = 0.0f; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - Color hdr_yuv_gamma = getP010Pixel(src, x + j, y + i); - Color hdr_rgb_gamma = hdrYuvToRgbFn(hdr_yuv_gamma); - - Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma); - - GlobalTonemapOutputs tonemap_outputs = - hlgGlobalTonemap({hdr_rgb.r, hdr_rgb.g, hdr_rgb.b}, kHlgHeadroom); - Color sdr_rgb_linear_bt2100 = {{{tonemap_outputs.rgb_out[0], tonemap_outputs.rgb_out[1], - tonemap_outputs.rgb_out[2]}}}; - Color sdr_rgb = hdrGamutConversionFn(sdr_rgb_linear_bt2100); - - // Hard clip out-of-gamut values; - sdr_rgb = clampPixelFloat(sdr_rgb); - - Color sdr_rgb_gamma = srgbOetf(sdr_rgb); - Color sdr_yuv_gamma = p3RgbToYuv(sdr_rgb_gamma); - - sdr_yuv_gamma += {{{0.0f, 0.5f, 0.5f}}}; + const int threads = (std::min)(GetCPUCoreCount(), 4); + size_t rowStep = threads == 1 ? height : kJobSzInRows; + JobQueue jobQueue; + std::function<void()> toneMapInternal; - if (u_max < hdr_yuv_gamma.u) { - u_max = hdr_yuv_gamma.u; + toneMapInternal = [src, dest, luma_data, chroma_data, hdrInvOetf, hdrGamutConversionFn, + hdrYuvToRgbFn, luma_stride, chroma_stride, &jobQueue]() -> void { + size_t rowStart, rowEnd; + while (jobQueue.dequeueJob(rowStart, rowEnd)) { + for (size_t y = rowStart; y < rowEnd; y += 2) { + for (size_t x = 0; x < dest->width; x += 2) { + // We assume the input is P010, and output is YUV420 + float sdr_u_gamma = 0.0f; + float sdr_v_gamma = 0.0f; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + Color hdr_yuv_gamma = getP010Pixel(src, x + j, y + i); + Color hdr_rgb_gamma = hdrYuvToRgbFn(hdr_yuv_gamma); + + Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma); + + GlobalTonemapOutputs tonemap_outputs = + hlgGlobalTonemap({hdr_rgb.r, hdr_rgb.g, hdr_rgb.b}, kHlgHeadroom); + Color sdr_rgb_linear_bt2100 = {{{tonemap_outputs.rgb_out[0], + tonemap_outputs.rgb_out[1], + tonemap_outputs.rgb_out[2]}}}; + Color sdr_rgb = hdrGamutConversionFn(sdr_rgb_linear_bt2100); + + // Hard clip out-of-gamut values; + sdr_rgb = clampPixelFloat(sdr_rgb); + + Color sdr_rgb_gamma = srgbOetf(sdr_rgb); + Color sdr_yuv_gamma = srgbRgbToYuv(sdr_rgb_gamma); + + sdr_yuv_gamma += {{{0.0f, 0.5f, 0.5f}}}; + + size_t out_y_idx = (y + i) * luma_stride + x + j; + luma_data[out_y_idx] = ScaleTo8Bit(sdr_yuv_gamma.y); + + sdr_u_gamma += sdr_yuv_gamma.u * 0.25f; + sdr_v_gamma += sdr_yuv_gamma.v * 0.25f; + } } - - size_t out_y_idx = (y + i) * luma_stride + x + j; - luma_data[out_y_idx] = ScaleTo8Bit(sdr_yuv_gamma.y); - - sdr_u_gamma += sdr_yuv_gamma.u * 0.25f; - sdr_v_gamma += sdr_yuv_gamma.v * 0.25f; + size_t out_chroma_idx = x / 2 + (y / 2) * chroma_stride; + size_t offset_cr = chroma_stride * (dest->height / 2); + chroma_data[out_chroma_idx] = ScaleTo8Bit(sdr_u_gamma); + chroma_data[out_chroma_idx + offset_cr] = ScaleTo8Bit(sdr_v_gamma); } } - size_t out_chroma_idx = x / 2 + (y / 2) * chroma_stride; - size_t offset_cr = chroma_stride * (dest->height / 2); - chroma_data[out_chroma_idx] = ScaleTo8Bit(sdr_u_gamma); - chroma_data[out_chroma_idx + offset_cr] = ScaleTo8Bit(sdr_v_gamma); } + }; + + // tone map + std::vector<std::thread> workers; + for (int th = 0; th < threads - 1; th++) { + workers.push_back(std::thread(toneMapInternal)); } + rowStep = (threads == 1 ? height : kJobSzInRows) / kMapDimensionScaleFactor; + for (size_t rowStart = 0; rowStart < height;) { + size_t rowEnd = (std::min)(rowStart + rowStep, height); + jobQueue.enqueueJob(rowStart, rowEnd); + rowStart = rowEnd; + } + jobQueue.markQueueForEnd(); + toneMapInternal(); + std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); + return JPEGR_NO_ERROR; } |