diff options
author | Jason Macnak <natsu@google.com> | 2024-02-21 13:10:41 -0800 |
---|---|---|
committer | swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com <swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-02-26 18:50:19 +0000 |
commit | bbe6452b420c5ddc4b0fd421b0a3ce271262f4ca (patch) | |
tree | d47db43706c69888a2bda00eb991c3c1efe0d6b7 | |
parent | 0f69b790c7a491e103802870b2f670c5936b9930 (diff) | |
download | swiftshader-bbe6452b420c5ddc4b0fd421b0a3ce271262f4ca.tar.gz |
Support ycbcr conversion linear filtering
Updates SS to performs separate sampling and filtering for luma and
chroma to avoid using the wrong offsets for interpolation.
Bug: b/324625557
Test: android.media.decoder.cts.DecodeAccuracyTest
Test: dEQP-VK.*
Change-Id: I017586a19f24ccfab18fba457be0942d31ec9bf8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/73128
Tested-by: Jason Macnak <natsu@google.com>
Commit-Queue: Jason Macnak <natsu@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Presubmit-Ready: Jason Macnak <natsu@google.com>
-rw-r--r-- | src/Device/Sampler.hpp | 3 | ||||
-rw-r--r-- | src/Pipeline/SamplerCore.cpp | 840 | ||||
-rw-r--r-- | src/Pipeline/SamplerCore.hpp | 6 | ||||
-rw-r--r-- | src/Pipeline/SpirvShaderSampling.cpp | 3 | ||||
-rw-r--r-- | src/Reactor/Print.hpp | 18 | ||||
-rw-r--r-- | src/Reactor/Reactor.hpp | 3 | ||||
-rw-r--r-- | src/Vulkan/VkPhysicalDevice.cpp | 1 | ||||
-rw-r--r-- | src/Vulkan/VkSampler.cpp | 3 | ||||
-rw-r--r-- | src/Vulkan/VkSampler.hpp | 3 |
9 files changed, 515 insertions, 365 deletions
diff --git a/src/Device/Sampler.hpp b/src/Device/Sampler.hpp index 7402b5c87..95973471b 100644 --- a/src/Device/Sampler.hpp +++ b/src/Device/Sampler.hpp @@ -105,6 +105,9 @@ struct Sampler VkSamplerYcbcrModelConversion ycbcrModel; bool studioSwing; // Narrow range bool swappedChroma; // Cb/Cr components in reverse order + FilterType chromaFilter; + VkChromaLocation chromaXOffset; + VkChromaLocation chromaYOffset; float mipLodBias = 0.0f; float maxAnisotropy = 0.0f; diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp index ab55c036a..855ac22b1 100644 --- a/src/Pipeline/SamplerCore.cpp +++ b/src/Pipeline/SamplerCore.cpp @@ -277,7 +277,7 @@ Float4 SamplerCore::applySwizzle(const Vector4f &c, VkComponentSwizzle swizzle, Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod) { - Short4 offset = *Pointer<Short4>(mipmap + halfOffset); + Short4 offset = *Pointer<UShort4>(mipmap + halfOffset); if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) { @@ -477,11 +477,198 @@ Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, F } } +void SamplerCore::bilinearInterpolateFloat(Vector4f &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4f &c00, Vector4f &c01, Vector4f &c10, Vector4f &c11, const Pointer<Byte> &mipmap, bool interpolateComponent0, bool interpolateComponent1, bool interpolateComponent2, bool interpolateComponent3) +{ + int componentCount = textureComponentCount(); + + Float4 unnormalizedUUUU0 = (Float4(uuuu0) / Float4(1 << 16)) * Float4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width))); + Float4 unnormalizedVVVV0 = (Float4(vvvv0) / Float4(1 << 16)) * Float4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height))); + + Float4 frac0u = Frac(unnormalizedUUUU0); + Float4 frac0v = Frac(unnormalizedVVVV0); + + if(interpolateComponent0 && componentCount >= 1) + { + c00.x = Mix(c00.x, c10.x, frac0u); + c01.x = Mix(c01.x, c11.x, frac0u); + output.x = Mix(c00.x, c01.x, frac0v); + } + if(interpolateComponent1 && componentCount >= 2) + { + c00.y = Mix(c00.y, c10.y, frac0u); + c01.y = Mix(c01.y, c11.y, frac0u); + output.y = Mix(c00.y, c01.y, frac0v); + } + if(interpolateComponent2 && componentCount >= 3) + { + c00.z = Mix(c00.z, c10.z, frac0u); + c01.z = Mix(c01.z, c11.z, frac0u); + output.z = Mix(c00.z, c01.z, frac0v); + } + if(interpolateComponent3 && componentCount >= 4) + { + c00.w = Mix(c00.w, c10.w, frac0u); + c01.w = Mix(c01.w, c11.w, frac0u); + output.w = Mix(c00.w, c01.w, frac0v); + } +} + +void SamplerCore::bilinearInterpolate(Vector4s &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4s &c00, Vector4s &c01, Vector4s &c10, Vector4s &c11, const Pointer<Byte> &mipmap) +{ + int componentCount = textureComponentCount(); + + // Fractions + UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width))); + UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height))); + + UShort4 f1u = ~f0u; + UShort4 f1v = ~f0v; + + UShort4 f0u0v = MulHigh(f0u, f0v); + UShort4 f1u0v = MulHigh(f1u, f0v); + UShort4 f0u1v = MulHigh(f0u, f1v); + UShort4 f1u1v = MulHigh(f1u, f1v); + + // Signed fractions + Short4 f1u1vs; + Short4 f0u1vs; + Short4 f1u0vs; + Short4 f0u0vs; + + if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) + { + f1u1vs = f1u1v >> 1; + f0u1vs = f0u1v >> 1; + f1u0vs = f1u0v >> 1; + f0u0vs = f0u0v >> 1; + } + + // Bilinear interpolation + if(componentCount >= 1) + { + if(has16bitTextureComponents() && hasUnsignedTextureComponent(0)) + { + c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u); + c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u); + output.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v); + } + else + { + if(hasUnsignedTextureComponent(0)) + { + c00.x = MulHigh(As<UShort4>(c00.x), f1u1v); + c10.x = MulHigh(As<UShort4>(c10.x), f0u1v); + c01.x = MulHigh(As<UShort4>(c01.x), f1u0v); + c11.x = MulHigh(As<UShort4>(c11.x), f0u0v); + } + else + { + c00.x = MulHigh(c00.x, f1u1vs); + c10.x = MulHigh(c10.x, f0u1vs); + c01.x = MulHigh(c01.x, f1u0vs); + c11.x = MulHigh(c11.x, f0u0vs); + } + + output.x = (c00.x + c10.x) + (c01.x + c11.x); + if(!hasUnsignedTextureComponent(0)) output.x = AddSat(output.x, output.x); // Correct for signed fractions + } + } + + if(componentCount >= 2) + { + if(has16bitTextureComponents() && hasUnsignedTextureComponent(1)) + { + c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u); + c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u); + output.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v); + } + else + { + if(hasUnsignedTextureComponent(1)) + { + c00.y = MulHigh(As<UShort4>(c00.y), f1u1v); + c10.y = MulHigh(As<UShort4>(c10.y), f0u1v); + c01.y = MulHigh(As<UShort4>(c01.y), f1u0v); + c11.y = MulHigh(As<UShort4>(c11.y), f0u0v); + } + else + { + c00.y = MulHigh(c00.y, f1u1vs); + c10.y = MulHigh(c10.y, f0u1vs); + c01.y = MulHigh(c01.y, f1u0vs); + c11.y = MulHigh(c11.y, f0u0vs); + } + + output.y = (c00.y + c10.y) + (c01.y + c11.y); + if(!hasUnsignedTextureComponent(1)) output.y = AddSat(output.y, output.y); // Correct for signed fractions + } + } + + if(componentCount >= 3) + { + if(has16bitTextureComponents() && hasUnsignedTextureComponent(2)) + { + c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u); + c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u); + output.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v); + } + else + { + if(hasUnsignedTextureComponent(2)) + { + c00.z = MulHigh(As<UShort4>(c00.z), f1u1v); + c10.z = MulHigh(As<UShort4>(c10.z), f0u1v); + c01.z = MulHigh(As<UShort4>(c01.z), f1u0v); + c11.z = MulHigh(As<UShort4>(c11.z), f0u0v); + } + else + { + c00.z = MulHigh(c00.z, f1u1vs); + c10.z = MulHigh(c10.z, f0u1vs); + c01.z = MulHigh(c01.z, f1u0vs); + c11.z = MulHigh(c11.z, f0u0vs); + } + + output.z = (c00.z + c10.z) + (c01.z + c11.z); + if(!hasUnsignedTextureComponent(2)) output.z = AddSat(output.z, output.z); // Correct for signed fractions + } + } + + if(componentCount >= 4) + { + if(has16bitTextureComponents() && hasUnsignedTextureComponent(3)) + { + c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u); + c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u); + output.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v); + } + else + { + if(hasUnsignedTextureComponent(3)) + { + c00.w = MulHigh(As<UShort4>(c00.w), f1u1v); + c10.w = MulHigh(As<UShort4>(c10.w), f0u1v); + c01.w = MulHigh(As<UShort4>(c01.w), f1u0v); + c11.w = MulHigh(As<UShort4>(c11.w), f0u0v); + } + else + { + c00.w = MulHigh(c00.w, f1u1vs); + c10.w = MulHigh(c10.w, f0u1vs); + c01.w = MulHigh(c01.w, f1u0vs); + c11.w = MulHigh(c11.w, f0u0vs); + } + + output.w = (c00.w + c10.w) + (c01.w + c11.w); + if(!hasUnsignedTextureComponent(3)) output.w = AddSat(output.w, output.w); // Correct for signed fractions + } + } +} + Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD) { Vector4s c; - int componentCount = textureComponentCount(); bool gather = (state.textureFilter == FILTER_GATHER); Pointer<Byte> mipmap = selectMipmap(texture, lod, secondLOD); @@ -489,191 +676,230 @@ Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, applyOffset(u, v, w, offset, mipmap); - Short4 uuuu = address(u, state.addressingModeU, mipmap); - Short4 vvvv = address(v, state.addressingModeV, mipmap); - Short4 wwww = address(w, state.addressingModeW, mipmap); + Short4 uuuu = address(u, state.addressingModeU); + Short4 vvvv = address(v, state.addressingModeV); + Short4 wwww = address(w, state.addressingModeW); Short4 layerIndex = computeLayerIndex16(a, mipmap); - if(state.textureFilter == FILTER_POINT) - { - c = sampleTexel(uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer); - } - else + if(isYcbcrFormat()) { - Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); - Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); - Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); - Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); + uint8_t lumaBits = 8; + uint8_t chromaBits = 8; + switch(state.textureFormat) + { + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + lumaBits = 8; + chromaBits = 8; + break; + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: + lumaBits = 10; + chromaBits = 10; + break; + default: + UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); + break; + } - Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer); - Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer); - Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer); - Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer); + // TODO: investigate apparent precision losses in dEQP-VK.ycbcr when sampling and interpolating with Short4. - if(!gather) // Blend + // Unnnormalized YUV values in [0, 255] for 8-bit formats, [0, 1023] for 10-bit formats. + Vector4f yuv; + Vector4f yuv00; + Vector4f yuv10; + Vector4f yuv01; + Vector4f yuv11; + + if(state.textureFilter == FILTER_POINT) { - // Fractions - UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width))); - UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height))); + sampleLumaTexel(yuv, uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer); + } + else + { + Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); + Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); + Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); + Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); + + sampleLumaTexel(yuv00, uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer); + sampleLumaTexel(yuv01, uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer); + sampleLumaTexel(yuv10, uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer); + sampleLumaTexel(yuv11, uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer); - UShort4 f1u = ~f0u; - UShort4 f1v = ~f0v; + bilinearInterpolateFloat(yuv, uuuu0, vvvv0, yuv00, yuv01, yuv10, yuv11, mipmap, false, true, false, false); + } + + // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels. + Pointer<Byte> mipmapU = Pointer<Byte>(mipmap + 1 * sizeof(Mipmap)); + Pointer<Byte> mipmapV = Pointer<Byte>(mipmap + 2 * sizeof(Mipmap)); + Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmapU + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats. + Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmapV + OFFSET(Mipmap, buffer)); + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#textures-implict-reconstruction + // but using normalized coordinates. + Float4 chromaU = u; + Float4 chromaV = v; + if(state.chromaXOffset == VK_CHROMA_LOCATION_COSITED_EVEN) + { + chromaU += (Float4(0.25f) / Float4(*Pointer<UInt4>(mipmapU + OFFSET(Mipmap, width)))); + } + if(state.chromaYOffset == VK_CHROMA_LOCATION_COSITED_EVEN) + { + chromaV += (Float4(0.25f) / Float4(*Pointer<UInt4>(mipmapU + OFFSET(Mipmap, height)))); + } - UShort4 f0u0v = MulHigh(f0u, f0v); - UShort4 f1u0v = MulHigh(f1u, f0v); - UShort4 f0u1v = MulHigh(f0u, f1v); - UShort4 f1u1v = MulHigh(f1u, f1v); + Short4 chromaUUUU = address(chromaU, state.addressingModeU); + Short4 chromaVVVV = address(chromaV, state.addressingModeV); + + if(state.chromaFilter == FILTER_POINT) + { + sampleChromaTexel(yuv, chromaUUUU, chromaVVVV, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV); + } + else + { + Short4 chromaUUUU0 = offsetSample(chromaUUUU, mipmapU, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); + Short4 chromaVVVV0 = offsetSample(chromaVVVV, mipmapU, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); + Short4 chromaUUUU1 = offsetSample(chromaUUUU, mipmapU, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); + Short4 chromaVVVV1 = offsetSample(chromaVVVV, mipmapU, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); + + sampleChromaTexel(yuv00, chromaUUUU0, chromaVVVV0, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV); + sampleChromaTexel(yuv01, chromaUUUU0, chromaVVVV1, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV); + sampleChromaTexel(yuv10, chromaUUUU1, chromaVVVV0, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV); + sampleChromaTexel(yuv11, chromaUUUU1, chromaVVVV1, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV); + + bilinearInterpolateFloat(yuv, chromaUUUU0, chromaVVVV0, yuv00, yuv01, yuv10, yuv11, mipmapU, true, false, true, false); + } + + if(state.swappedChroma) + { + std::swap(yuv.x, yuv.z); + } + + if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + // Scale to the output 15-bit. + c.x = UShort4(yuv.x) << (15 - chromaBits); + c.y = UShort4(yuv.y) << (15 - lumaBits); + c.z = UShort4(yuv.z) << (15 - chromaBits); + } + else + { + const float twoPowLumaBits = static_cast<float>(0x1u << lumaBits); + const float twoPowLumaBitsMinus8 = static_cast<float>(0x1u << (lumaBits - 8)); + const float twoPowChromaBits = static_cast<float>(0x1u << chromaBits); + const float twoPowChromaBitsMinus1 = static_cast<float>(0x1u << (chromaBits - 1)); + const float twoPowChromaBitsMinus8 = static_cast<float>(0x1u << (chromaBits - 8)); - // Signed fractions - Short4 f1u1vs; - Short4 f0u1vs; - Short4 f1u0vs; - Short4 f0u0vs; + Float4 y = Float4(yuv.y); + Float4 u = Float4(yuv.z); + Float4 v = Float4(yuv.x); - if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) + if(state.studioSwing) { - f1u1vs = f1u1v >> 1; - f0u1vs = f0u1v >> 1; - f1u0vs = f1u0v >> 1; - f0u0vs = f0u0v >> 1; + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_NARROW + y = ((y / Float4(twoPowLumaBitsMinus8)) - Float4(16.0f)) / Float4(219.0f); + u = ((u / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f); + v = ((v / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f); } - - // Bilinear interpolation - if(componentCount >= 1) + else { - if(has16bitTextureComponents() && hasUnsignedTextureComponent(0)) - { - c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u); - c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u); - c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v); - } - else - { - if(hasUnsignedTextureComponent(0)) - { - c00.x = MulHigh(As<UShort4>(c00.x), f1u1v); - c10.x = MulHigh(As<UShort4>(c10.x), f0u1v); - c01.x = MulHigh(As<UShort4>(c01.x), f1u0v); - c11.x = MulHigh(As<UShort4>(c11.x), f0u0v); - } - else - { - c00.x = MulHigh(c00.x, f1u1vs); - c10.x = MulHigh(c10.x, f0u1vs); - c01.x = MulHigh(c01.x, f1u0vs); - c11.x = MulHigh(c11.x, f0u0vs); - } - - c.x = (c00.x + c10.x) + (c01.x + c11.x); - if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions - } + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_FULL + y = y / Float4(twoPowLumaBits - 1.0f); + u = (u - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f); + v = (v - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f); } - if(componentCount >= 2) - { - if(has16bitTextureComponents() && hasUnsignedTextureComponent(1)) - { - c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u); - c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u); - c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v); - } - else - { - if(hasUnsignedTextureComponent(1)) - { - c00.y = MulHigh(As<UShort4>(c00.y), f1u1v); - c10.y = MulHigh(As<UShort4>(c10.y), f0u1v); - c01.y = MulHigh(As<UShort4>(c01.y), f1u0v); - c11.y = MulHigh(As<UShort4>(c11.y), f0u0v); - } - else - { - c00.y = MulHigh(c00.y, f1u1vs); - c10.y = MulHigh(c10.y, f0u1vs); - c01.y = MulHigh(c01.y, f1u0vs); - c11.y = MulHigh(c11.y, f0u0vs); - } + // Now, `y` is in [0, 1] and `u` and `v` are in [-0.5, 0.5]. - c.y = (c00.y + c10.y) + (c01.y + c11.y); - if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions - } + if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + { + c.x = Short4(v * static_cast<float>(0x7FFF)); + c.y = Short4(y * static_cast<float>(0x7FFF)); + c.z = Short4(u * static_cast<float>(0x7FFF)); } - - if(componentCount >= 3) + else { - if(has16bitTextureComponents() && hasUnsignedTextureComponent(2)) - { - c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u); - c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u); - c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v); - } - else - { - if(hasUnsignedTextureComponent(2)) - { - c00.z = MulHigh(As<UShort4>(c00.z), f1u1v); - c10.z = MulHigh(As<UShort4>(c10.z), f0u1v); - c01.z = MulHigh(As<UShort4>(c01.z), f1u0v); - c11.z = MulHigh(As<UShort4>(c11.z), f0u0v); - } - else - { - c00.z = MulHigh(c00.z, f1u1vs); - c10.z = MulHigh(c10.z, f0u1vs); - c01.z = MulHigh(c01.z, f1u0vs); - c11.z = MulHigh(c11.z, f0u0vs); - } + // Generic YCbCr to RGB transformation: + // R = Y + 2 * (1 - Kr) * Cr + // G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr + // B = Y + 2 * (1 - Kb) * Cb - c.z = (c00.z + c10.z) + (c01.z + c11.z); - if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions - } - } + float Kb = 0.114f; + float Kr = 0.299f; - if(componentCount >= 4) - { - if(has16bitTextureComponents() && hasUnsignedTextureComponent(3)) + switch(state.ycbcrModel) { - c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u); - c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u); - c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v); + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: + Kb = 0.0722f; + Kr = 0.2126f; + break; + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: + Kb = 0.114f; + Kr = 0.299f; + break; + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: + Kb = 0.0593f; + Kr = 0.2627f; + break; + default: + UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel)); } - else - { - if(hasUnsignedTextureComponent(3)) - { - c00.w = MulHigh(As<UShort4>(c00.w), f1u1v); - c10.w = MulHigh(As<UShort4>(c10.w), f0u1v); - c01.w = MulHigh(As<UShort4>(c01.w), f1u0v); - c11.w = MulHigh(As<UShort4>(c11.w), f0u0v); - } - else - { - c00.w = MulHigh(c00.w, f1u1vs); - c10.w = MulHigh(c10.w, f0u1vs); - c01.w = MulHigh(c01.w, f1u0vs); - c11.w = MulHigh(c11.w, f0u0vs); - } - c.w = (c00.w + c10.w) + (c01.w + c11.w); - if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions - } + const float Kg = 1.0f - Kr - Kb; + + const float Rr = 2 * (1 - Kr); + const float Gb = -2 * Kb * (1 - Kb) / Kg; + const float Gr = -2 * Kr * (1 - Kr) / Kg; + const float Bb = 2 * (1 - Kb); + + Float4 r = y + Float4(Rr) * v; + Float4 g = y + Float4(Gb) * u + Float4(Gr) * v; + Float4 b = y + Float4(Bb) * u; + + c.x = Short4(r * static_cast<float>(0x7FFF)); + c.y = Short4(g * static_cast<float>(0x7FFF)); + c.z = Short4(b * static_cast<float>(0x7FFF)); } } - else // Gather + } + else // !isYcbcrFormat() + { + if(state.textureFilter == FILTER_POINT) { - VkComponentSwizzle swizzle = gatherSwizzle(); - switch(swizzle) + c = sampleTexel(uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer); + } + else + { + Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); + Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); + Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); + Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); + + Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer); + Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer); + Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer); + Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer); + + if(!gather) // Blend { - case VK_COMPONENT_SWIZZLE_ZERO: - case VK_COMPONENT_SWIZZLE_ONE: - // Handled at the final component swizzle. - break; - default: - c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; - break; + bilinearInterpolate(c, uuuu0, vvvv0, c00, c01, c10, c11, mipmap); + } + else + { + VkComponentSwizzle swizzle = gatherSwizzle(); + switch(swizzle) + { + case VK_COMPONENT_SWIZZLE_ZERO: + case VK_COMPONENT_SWIZZLE_ONE: + // Handled at the final component swizzle. + break; + default: + c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; + break; + } } } } @@ -692,9 +918,9 @@ Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, F applyOffset(u_, v_, w_, offset, mipmap); - Short4 uuuu = address(u_, state.addressingModeU, mipmap); - Short4 vvvv = address(v_, state.addressingModeV, mipmap); - Short4 wwww = address(w_, state.addressingModeW, mipmap); + Short4 uuuu = address(u_, state.addressingModeU); + Short4 vvvv = address(v_, state.addressingModeV); + Short4 wwww = address(w_, state.addressingModeW); if(state.textureFilter == FILTER_POINT) { @@ -1753,226 +1979,112 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer) return c; } -Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer) +void SamplerCore::sampleLumaTexel(Vector4f &output, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &lumaMipmap, Pointer<Byte> lumaBuffer) { - Vector4s c; + ASSERT(isYcbcrFormat()); UInt index[4]; - computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap); + computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, lumaMipmap); - if(isYcbcrFormat()) - { - // Generates 15-bit output. + // Luminance (either 8-bit or 10-bit in bottom bits). + UShort4 Y; - // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels. - Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); - Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats. - Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); - - // Luminance (either 8-bit or 10-bit in bottom bits). - UShort4 Y; + switch(state.textureFormat) + { + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: { - switch(state.textureFormat) - { - case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: - case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: - { - Y = Insert(Y, UShort(bufferY[index[0]]), 0); - Y = Insert(Y, UShort(bufferY[index[1]]), 1); - Y = Insert(Y, UShort(bufferY[index[2]]), 2); - Y = Insert(Y, UShort(bufferY[index[3]]), 3); - } - break; - case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: - { - Y = Insert(Y, Pointer<UShort>(bufferY)[index[0]], 0); - Y = Insert(Y, Pointer<UShort>(bufferY)[index[1]], 1); - Y = Insert(Y, Pointer<UShort>(bufferY)[index[2]], 2); - Y = Insert(Y, Pointer<UShort>(bufferY)[index[3]], 3); - // Top 10 bits of each 16 bits: - Y = (Y & UShort4(0xFFC0u)) >> 6; - } - break; - default: - UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); - break; - } + Y = Insert(Y, UShort(lumaBuffer[index[0]]), 0); + Y = Insert(Y, UShort(lumaBuffer[index[1]]), 1); + Y = Insert(Y, UShort(lumaBuffer[index[2]]), 2); + Y = Insert(Y, UShort(lumaBuffer[index[3]]), 3); } - - // Chroma (either 8-bit or 10-bit in bottom bits). - UShort4 Cb, Cr; + break; + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: { - computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap + sizeof(Mipmap)); - UShort4 U, V; + Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[0]], 0); + Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[1]], 1); + Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[2]], 2); + Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[3]], 3); + // Top 10 bits of each 16 bits: + Y = (Y & UShort4(0xFFC0u)) >> 6; + } + break; + default: + UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); + break; + } - switch(state.textureFormat) - { - case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: - { - U = Insert(U, UShort(bufferU[index[0]]), 0); - U = Insert(U, UShort(bufferU[index[1]]), 1); - U = Insert(U, UShort(bufferU[index[2]]), 2); - U = Insert(U, UShort(bufferU[index[3]]), 3); - - V = Insert(V, UShort(bufferV[index[0]]), 0); - V = Insert(V, UShort(bufferV[index[1]]), 1); - V = Insert(V, UShort(bufferV[index[2]]), 2); - V = Insert(V, UShort(bufferV[index[3]]), 3); - } - break; - case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: - { - UShort4 UV; - UV = Insert(UV, Pointer<UShort>(bufferU)[index[0]], 0); - UV = Insert(UV, Pointer<UShort>(bufferU)[index[1]], 1); - UV = Insert(UV, Pointer<UShort>(bufferU)[index[2]], 2); - UV = Insert(UV, Pointer<UShort>(bufferU)[index[3]], 3); - - U = (UV & UShort4(0x00FFu)); - V = (UV & UShort4(0xFF00u)) >> 8; - } - break; - case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: - { - UInt4 UV; - UV = Insert(UV, Pointer<UInt>(bufferU)[index[0]], 0); - UV = Insert(UV, Pointer<UInt>(bufferU)[index[1]], 1); - UV = Insert(UV, Pointer<UInt>(bufferU)[index[2]], 2); - UV = Insert(UV, Pointer<UInt>(bufferU)[index[3]], 3); - // Top 10 bits of first 16-bits: - U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6); - // Top 10 bits of second 16-bits: - V = UShort4((UV & UInt4(0xFFC00000u)) >> 22); - } - break; - default: - UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); - break; - } + output.y = Float4(Y); +} - if(!state.swappedChroma) - { - Cb = U; - Cr = V; - } - else - { - Cb = V; - Cr = U; - } - } +void SamplerCore::sampleChromaTexel(Vector4f &output, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmapU, Pointer<Byte> bufferU, Pointer<Byte> &mipmapV, Pointer<Byte> bufferV) +{ + ASSERT(isYcbcrFormat()); - uint8_t lumaBits = 8; - uint8_t chromaBits = 8; - switch(state.textureFormat) + UInt index[4]; + + // Chroma (either 8-bit or 10-bit in bottom bits). + UShort4 U, V; + computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmapU); + + switch(state.textureFormat) + { + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: { - case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: - case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: - lumaBits = 8; - chromaBits = 8; - break; - case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: - lumaBits = 10; - chromaBits = 10; - break; - default: - UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); - break; + U = Insert(U, UShort(bufferU[index[0]]), 0); + U = Insert(U, UShort(bufferU[index[1]]), 1); + U = Insert(U, UShort(bufferU[index[2]]), 2); + U = Insert(U, UShort(bufferU[index[3]]), 3); + + V = Insert(V, UShort(bufferV[index[0]]), 0); + V = Insert(V, UShort(bufferV[index[1]]), 1); + V = Insert(V, UShort(bufferV[index[2]]), 2); + V = Insert(V, UShort(bufferV[index[3]]), 3); } - - if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + break; + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: { - // Scale to the output 15-bit. - c.x = Cr << (15 - chromaBits); - c.y = Y << (15 - lumaBits); - c.z = Cb << (15 - chromaBits); + UShort4 UV; + UV = Insert(UV, Pointer<UShort>(bufferU)[index[0]], 0); + UV = Insert(UV, Pointer<UShort>(bufferU)[index[1]], 1); + UV = Insert(UV, Pointer<UShort>(bufferU)[index[2]], 2); + UV = Insert(UV, Pointer<UShort>(bufferU)[index[3]], 3); + + U = (UV & UShort4(0x00FFu)); + V = (UV & UShort4(0xFF00u)) >> 8; } - else + break; + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: { - const float twoPowLumaBits = static_cast<float>(0x1u << lumaBits); - const float twoPowLumaBitsMinus8 = static_cast<float>(0x1u << (lumaBits - 8)); - const float twoPowChromaBits = static_cast<float>(0x1u << chromaBits); - const float twoPowChromaBitsMinus1 = static_cast<float>(0x1u << (chromaBits - 1)); - const float twoPowChromaBitsMinus8 = static_cast<float>(0x1u << (chromaBits - 8)); - - Float4 y = Float4(Y); - Float4 u = Float4(Cb); - Float4 v = Float4(Cr); - - if(state.studioSwing) - { - // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_NARROW - y = ((y / Float4(twoPowLumaBitsMinus8)) - Float4(16.0f)) / Float4(219.0f); - u = ((u / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f); - v = ((v / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f); - } - else - { - // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_FULL - y = y / Float4(twoPowLumaBits - 1.0f); - u = (u - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f); - v = (v - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f); - } - - // Now, `y` is in [0, 1] and `u` and `v` are in [-0.5, 0.5]. - - if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) - { - c.x = Short4(v * static_cast<float>(0x7FFF)); - c.y = Short4(y * static_cast<float>(0x7FFF)); - c.z = Short4(u * static_cast<float>(0x7FFF)); - } - else - { - // Generic YCbCr to RGB transformation: - // R = Y + 2 * (1 - Kr) * Cr - // G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr - // B = Y + 2 * (1 - Kb) * Cb - - float Kb = 0.114f; - float Kr = 0.299f; - - switch(state.ycbcrModel) - { - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: - Kb = 0.0722f; - Kr = 0.2126f; - break; - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: - Kb = 0.114f; - Kr = 0.299f; - break; - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: - Kb = 0.0593f; - Kr = 0.2627f; - break; - default: - UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel)); - } - - const float Kg = 1.0f - Kr - Kb; + UInt4 UV; + UV = Insert(UV, Pointer<UInt>(bufferU)[index[0]], 0); + UV = Insert(UV, Pointer<UInt>(bufferU)[index[1]], 1); + UV = Insert(UV, Pointer<UInt>(bufferU)[index[2]], 2); + UV = Insert(UV, Pointer<UInt>(bufferU)[index[3]], 3); + // Top 10 bits of first 16-bits: + U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6); + // Top 10 bits of second 16-bits: + V = UShort4((UV & UInt4(0xFFC00000u)) >> 22); + } + break; + default: + UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); + break; + } - const float Rr = 2 * (1 - Kr); - const float Gb = -2 * Kb * (1 - Kb) / Kg; - const float Gr = -2 * Kr * (1 - Kr) / Kg; - const float Bb = 2 * (1 - Kb); + output.x = Float4(V); + output.z = Float4(U); +} - Float4 r = y + Float4(Rr) * v; - Float4 g = y + Float4(Gb) * u + Float4(Gr) * v; - Float4 b = y + Float4(Bb) * u; +Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer) +{ + ASSERT(!isYcbcrFormat()); - c.x = Short4(r * static_cast<float>(0x7FFF)); - c.y = Short4(g * static_cast<float>(0x7FFF)); - c.z = Short4(b * static_cast<float>(0x7FFF)); - } - } - } - else - { - return sampleTexel(index, buffer); - } + UInt index[4]; + computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap); - return c; + return sampleTexel(index, buffer); } Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, const Float4 &dRef, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer) @@ -2281,7 +2393,7 @@ Int4 SamplerCore::computeFilterOffset(Float &lod) return Int4(~0); } -Short4 SamplerCore::address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap) +Short4 SamplerCore::address(const Float4 &uw, AddressingMode addressingMode) { if(addressingMode == ADDRESSING_UNUSED) { diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp index 2074a897a..bcd468225 100644 --- a/src/Pipeline/SamplerCore.hpp +++ b/src/Pipeline/SamplerCore.hpp @@ -86,12 +86,16 @@ private: void applyOffset(Float4 &u, Float4 &v, Float4 &w, Vector4i &offset, Pointer<Byte> mipmap); void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, const Short4 &cubeArrayLayer, const Int4 &sample, const Pointer<Byte> &mipmap); void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, const Int4 &sample, Int4 valid, const Pointer<Byte> &mipmap); + void bilinearInterpolateFloat(Vector4f &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4f &c00, Vector4f &c01, Vector4f &c10, Vector4f &c11, const Pointer<Byte> &mipmap, bool interpolateComponent0, bool interpolateComponent1, bool interpolateComponent2, bool interpolateComponent3); + void bilinearInterpolate(Vector4s &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4s &c00, Vector4s &c01, Vector4s &c10, Vector4s &c11, const Pointer<Byte> &mipmap); + void sampleLumaTexel(Vector4f& output, Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &lumaMipmap, Pointer<Byte> lumaBuffer); + void sampleChromaTexel(Vector4f& output, Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &mipmapU, Pointer<Byte> bufferU, Pointer<Byte> &mipmapV, Pointer<Byte> bufferV); Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer); Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer); Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &w, const Float4 &dRef, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer); Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid); Pointer<Byte> selectMipmap(const Pointer<Byte> &texture, const Float &lod, bool secondLOD); - Short4 address(const Float4 &uvw, AddressingMode addressingMode, Pointer<Byte> &mipmap); + Short4 address(const Float4 &uvw, AddressingMode addressingMode); Short4 computeLayerIndex16(const Float4 &a, Pointer<Byte> &mipmap); void address(const Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Int4 &filter, int whd, AddressingMode addressingMode); Int4 computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap); diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp index 4c674050e..7d4dd6811 100644 --- a/src/Pipeline/SpirvShaderSampling.cpp +++ b/src/Pipeline/SpirvShaderSampling.cpp @@ -75,6 +75,9 @@ SpirvEmitter::ImageSampler *SpirvEmitter::getImageSampler(const vk::Device *devi samplerState.ycbcrModel = vkSamplerState->ycbcrModel; samplerState.studioSwing = vkSamplerState->studioSwing; samplerState.swappedChroma = vkSamplerState->swappedChroma; + samplerState.chromaFilter = vkSamplerState->chromaFilter == VK_FILTER_LINEAR ? FILTER_LINEAR : FILTER_POINT; + samplerState.chromaXOffset = vkSamplerState->chromaXOffset; + samplerState.chromaYOffset = vkSamplerState->chromaYOffset; samplerState.mipLodBias = vkSamplerState->mipLodBias; samplerState.maxAnisotropy = vkSamplerState->maxAnisotropy; diff --git a/src/Reactor/Print.hpp b/src/Reactor/Print.hpp index 6d2b4d1a3..8a4afd6ac 100644 --- a/src/Reactor/Print.hpp +++ b/src/Reactor/Print.hpp @@ -499,6 +499,24 @@ static_assert(3 == RR_COUNT_ARGUMENTS(a, b, c), "RR_COUNT_ARGUMENTS broken"); # define RR_WATCH_FMT_12(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12) \ RR_WATCH_FMT_11(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11) \ "\n " #_12 ": {11}" +# define RR_WATCH_FMT_13(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13) \ + RR_WATCH_FMT_12(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12) \ + "\n " #_13 ": {12}" +# define RR_WATCH_FMT_14(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14) \ + RR_WATCH_FMT_13(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13) \ + "\n " #_14 ": {13}" +# define RR_WATCH_FMT_15(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15) \ + RR_WATCH_FMT_14(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14) \ + "\n " #_15 ": {14}" +# define RR_WATCH_FMT_16(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \ + RR_WATCH_FMT_15(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15) \ + "\n " #_16 ": {15}" +# define RR_WATCH_FMT_17(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17) \ + RR_WATCH_FMT_16(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \ + "\n " #_17 ": {16}" +# define RR_WATCH_FMT_18(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18) \ + RR_WATCH_FMT_17(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17) \ + "\n " #_18 ": {17}" // RR_WATCH() is a helper that prints the name and value of all the supplied // arguments. diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp index f5f189e7a..3c5a1200e 100644 --- a/src/Reactor/Reactor.hpp +++ b/src/Reactor/Reactor.hpp @@ -2053,6 +2053,9 @@ RValue<Float4> Trunc(RValue<Float4> x); RValue<Float4> Frac(RValue<Float4> x); RValue<Float4> Floor(RValue<Float4> x); RValue<Float4> Ceil(RValue<Float4> x); +inline RValue<Float4> Mix(RValue<Float4> x, RValue<Float4> y, RValue<Float4> frac) { + return (x * (Float4(1.0f) - frac)) + (y * frac); +} // Trigonometric functions RValue<Float4> Sin(RValue<Float4> x); diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp index 3b447db1c..152f021a7 100644 --- a/src/Vulkan/VkPhysicalDevice.cpp +++ b/src/Vulkan/VkPhysicalDevice.cpp @@ -2131,6 +2131,7 @@ void PhysicalDevice::GetFormatProperties(Format format, VkFormatProperties3 *pFo pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT | diff --git a/src/Vulkan/VkSampler.cpp b/src/Vulkan/VkSampler.cpp index 0458f0adf..d007cf1fe 100644 --- a/src/Vulkan/VkSampler.cpp +++ b/src/Vulkan/VkSampler.cpp @@ -44,6 +44,9 @@ SamplerState::SamplerState(const VkSamplerCreateInfo *pCreateInfo, const vk::Sam ycbcrModel = ycbcrConversion->ycbcrModel; studioSwing = (ycbcrConversion->ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW); swappedChroma = (ycbcrConversion->components.r != VK_COMPONENT_SWIZZLE_R); + chromaFilter = ycbcrConversion->chromaFilter; + chromaXOffset = ycbcrConversion->xChromaOffset; + chromaYOffset = ycbcrConversion->yChromaOffset; } } diff --git a/src/Vulkan/VkSampler.hpp b/src/Vulkan/VkSampler.hpp index 4a627c18b..f20240a7d 100644 --- a/src/Vulkan/VkSampler.hpp +++ b/src/Vulkan/VkSampler.hpp @@ -56,6 +56,9 @@ struct SamplerState : sw::Memset<SamplerState> const bool highPrecisionFiltering = false; bool studioSwing = false; // Narrow range bool swappedChroma = false; // Cb/Cr components in reverse order + VkFilter chromaFilter = VK_FILTER_NEAREST; + VkChromaLocation chromaXOffset = VK_CHROMA_LOCATION_COSITED_EVEN; + VkChromaLocation chromaYOffset = VK_CHROMA_LOCATION_COSITED_EVEN; }; class Sampler : public Object<Sampler, VkSampler>, public SamplerState |