aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Macnak <natsu@google.com>2024-02-21 13:10:41 -0800
committerswiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com <swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-02-26 18:50:19 +0000
commitbbe6452b420c5ddc4b0fd421b0a3ce271262f4ca (patch)
treed47db43706c69888a2bda00eb991c3c1efe0d6b7
parent0f69b790c7a491e103802870b2f670c5936b9930 (diff)
downloadswiftshader-bbe6452b420c5ddc4b0fd421b0a3ce271262f4ca.tar.gz
Support ycbcr conversion linear filtering
Updates SS to performs separate sampling and filtering for luma and chroma to avoid using the wrong offsets for interpolation. Bug: b/324625557 Test: android.media.decoder.cts.DecodeAccuracyTest Test: dEQP-VK.* Change-Id: I017586a19f24ccfab18fba457be0942d31ec9bf8 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/73128 Tested-by: Jason Macnak <natsu@google.com> Commit-Queue: Jason Macnak <natsu@google.com> Reviewed-by: Chris Forbes <chrisforbes@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Presubmit-Ready: Jason Macnak <natsu@google.com>
-rw-r--r--src/Device/Sampler.hpp3
-rw-r--r--src/Pipeline/SamplerCore.cpp840
-rw-r--r--src/Pipeline/SamplerCore.hpp6
-rw-r--r--src/Pipeline/SpirvShaderSampling.cpp3
-rw-r--r--src/Reactor/Print.hpp18
-rw-r--r--src/Reactor/Reactor.hpp3
-rw-r--r--src/Vulkan/VkPhysicalDevice.cpp1
-rw-r--r--src/Vulkan/VkSampler.cpp3
-rw-r--r--src/Vulkan/VkSampler.hpp3
9 files changed, 515 insertions, 365 deletions
diff --git a/src/Device/Sampler.hpp b/src/Device/Sampler.hpp
index 7402b5c87..95973471b 100644
--- a/src/Device/Sampler.hpp
+++ b/src/Device/Sampler.hpp
@@ -105,6 +105,9 @@ struct Sampler
VkSamplerYcbcrModelConversion ycbcrModel;
bool studioSwing; // Narrow range
bool swappedChroma; // Cb/Cr components in reverse order
+ FilterType chromaFilter;
+ VkChromaLocation chromaXOffset;
+ VkChromaLocation chromaYOffset;
float mipLodBias = 0.0f;
float maxAnisotropy = 0.0f;
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index ab55c036a..855ac22b1 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -277,7 +277,7 @@ Float4 SamplerCore::applySwizzle(const Vector4f &c, VkComponentSwizzle swizzle,
Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
{
- Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
+ Short4 offset = *Pointer<UShort4>(mipmap + halfOffset);
if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
{
@@ -477,11 +477,198 @@ Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, F
}
}
+void SamplerCore::bilinearInterpolateFloat(Vector4f &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4f &c00, Vector4f &c01, Vector4f &c10, Vector4f &c11, const Pointer<Byte> &mipmap, bool interpolateComponent0, bool interpolateComponent1, bool interpolateComponent2, bool interpolateComponent3)
+{
+ int componentCount = textureComponentCount();
+
+ Float4 unnormalizedUUUU0 = (Float4(uuuu0) / Float4(1 << 16)) * Float4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width)));
+ Float4 unnormalizedVVVV0 = (Float4(vvvv0) / Float4(1 << 16)) * Float4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height)));
+
+ Float4 frac0u = Frac(unnormalizedUUUU0);
+ Float4 frac0v = Frac(unnormalizedVVVV0);
+
+ if(interpolateComponent0 && componentCount >= 1)
+ {
+ c00.x = Mix(c00.x, c10.x, frac0u);
+ c01.x = Mix(c01.x, c11.x, frac0u);
+ output.x = Mix(c00.x, c01.x, frac0v);
+ }
+ if(interpolateComponent1 && componentCount >= 2)
+ {
+ c00.y = Mix(c00.y, c10.y, frac0u);
+ c01.y = Mix(c01.y, c11.y, frac0u);
+ output.y = Mix(c00.y, c01.y, frac0v);
+ }
+ if(interpolateComponent2 && componentCount >= 3)
+ {
+ c00.z = Mix(c00.z, c10.z, frac0u);
+ c01.z = Mix(c01.z, c11.z, frac0u);
+ output.z = Mix(c00.z, c01.z, frac0v);
+ }
+ if(interpolateComponent3 && componentCount >= 4)
+ {
+ c00.w = Mix(c00.w, c10.w, frac0u);
+ c01.w = Mix(c01.w, c11.w, frac0u);
+ output.w = Mix(c00.w, c01.w, frac0v);
+ }
+}
+
+void SamplerCore::bilinearInterpolate(Vector4s &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4s &c00, Vector4s &c01, Vector4s &c10, Vector4s &c11, const Pointer<Byte> &mipmap)
+{
+ int componentCount = textureComponentCount();
+
+ // Fractions
+ UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width)));
+ UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height)));
+
+ UShort4 f1u = ~f0u;
+ UShort4 f1v = ~f0v;
+
+ UShort4 f0u0v = MulHigh(f0u, f0v);
+ UShort4 f1u0v = MulHigh(f1u, f0v);
+ UShort4 f0u1v = MulHigh(f0u, f1v);
+ UShort4 f1u1v = MulHigh(f1u, f1v);
+
+ // Signed fractions
+ Short4 f1u1vs;
+ Short4 f0u1vs;
+ Short4 f1u0vs;
+ Short4 f0u0vs;
+
+ if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
+ {
+ f1u1vs = f1u1v >> 1;
+ f0u1vs = f0u1v >> 1;
+ f1u0vs = f1u0v >> 1;
+ f0u0vs = f0u0v >> 1;
+ }
+
+ // Bilinear interpolation
+ if(componentCount >= 1)
+ {
+ if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
+ {
+ c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u);
+ c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u);
+ output.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
+ }
+ else
+ {
+ if(hasUnsignedTextureComponent(0))
+ {
+ c00.x = MulHigh(As<UShort4>(c00.x), f1u1v);
+ c10.x = MulHigh(As<UShort4>(c10.x), f0u1v);
+ c01.x = MulHigh(As<UShort4>(c01.x), f1u0v);
+ c11.x = MulHigh(As<UShort4>(c11.x), f0u0v);
+ }
+ else
+ {
+ c00.x = MulHigh(c00.x, f1u1vs);
+ c10.x = MulHigh(c10.x, f0u1vs);
+ c01.x = MulHigh(c01.x, f1u0vs);
+ c11.x = MulHigh(c11.x, f0u0vs);
+ }
+
+ output.x = (c00.x + c10.x) + (c01.x + c11.x);
+ if(!hasUnsignedTextureComponent(0)) output.x = AddSat(output.x, output.x); // Correct for signed fractions
+ }
+ }
+
+ if(componentCount >= 2)
+ {
+ if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
+ {
+ c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u);
+ c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u);
+ output.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
+ }
+ else
+ {
+ if(hasUnsignedTextureComponent(1))
+ {
+ c00.y = MulHigh(As<UShort4>(c00.y), f1u1v);
+ c10.y = MulHigh(As<UShort4>(c10.y), f0u1v);
+ c01.y = MulHigh(As<UShort4>(c01.y), f1u0v);
+ c11.y = MulHigh(As<UShort4>(c11.y), f0u0v);
+ }
+ else
+ {
+ c00.y = MulHigh(c00.y, f1u1vs);
+ c10.y = MulHigh(c10.y, f0u1vs);
+ c01.y = MulHigh(c01.y, f1u0vs);
+ c11.y = MulHigh(c11.y, f0u0vs);
+ }
+
+ output.y = (c00.y + c10.y) + (c01.y + c11.y);
+ if(!hasUnsignedTextureComponent(1)) output.y = AddSat(output.y, output.y); // Correct for signed fractions
+ }
+ }
+
+ if(componentCount >= 3)
+ {
+ if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
+ {
+ c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u);
+ c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u);
+ output.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
+ }
+ else
+ {
+ if(hasUnsignedTextureComponent(2))
+ {
+ c00.z = MulHigh(As<UShort4>(c00.z), f1u1v);
+ c10.z = MulHigh(As<UShort4>(c10.z), f0u1v);
+ c01.z = MulHigh(As<UShort4>(c01.z), f1u0v);
+ c11.z = MulHigh(As<UShort4>(c11.z), f0u0v);
+ }
+ else
+ {
+ c00.z = MulHigh(c00.z, f1u1vs);
+ c10.z = MulHigh(c10.z, f0u1vs);
+ c01.z = MulHigh(c01.z, f1u0vs);
+ c11.z = MulHigh(c11.z, f0u0vs);
+ }
+
+ output.z = (c00.z + c10.z) + (c01.z + c11.z);
+ if(!hasUnsignedTextureComponent(2)) output.z = AddSat(output.z, output.z); // Correct for signed fractions
+ }
+ }
+
+ if(componentCount >= 4)
+ {
+ if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
+ {
+ c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u);
+ c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u);
+ output.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
+ }
+ else
+ {
+ if(hasUnsignedTextureComponent(3))
+ {
+ c00.w = MulHigh(As<UShort4>(c00.w), f1u1v);
+ c10.w = MulHigh(As<UShort4>(c10.w), f0u1v);
+ c01.w = MulHigh(As<UShort4>(c01.w), f1u0v);
+ c11.w = MulHigh(As<UShort4>(c11.w), f0u0v);
+ }
+ else
+ {
+ c00.w = MulHigh(c00.w, f1u1vs);
+ c10.w = MulHigh(c10.w, f0u1vs);
+ c01.w = MulHigh(c01.w, f1u0vs);
+ c11.w = MulHigh(c11.w, f0u0vs);
+ }
+
+ output.w = (c00.w + c10.w) + (c01.w + c11.w);
+ if(!hasUnsignedTextureComponent(3)) output.w = AddSat(output.w, output.w); // Correct for signed fractions
+ }
+ }
+}
+
Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD)
{
Vector4s c;
- int componentCount = textureComponentCount();
bool gather = (state.textureFilter == FILTER_GATHER);
Pointer<Byte> mipmap = selectMipmap(texture, lod, secondLOD);
@@ -489,191 +676,230 @@ Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v,
applyOffset(u, v, w, offset, mipmap);
- Short4 uuuu = address(u, state.addressingModeU, mipmap);
- Short4 vvvv = address(v, state.addressingModeV, mipmap);
- Short4 wwww = address(w, state.addressingModeW, mipmap);
+ Short4 uuuu = address(u, state.addressingModeU);
+ Short4 vvvv = address(v, state.addressingModeV);
+ Short4 wwww = address(w, state.addressingModeW);
Short4 layerIndex = computeLayerIndex16(a, mipmap);
- if(state.textureFilter == FILTER_POINT)
- {
- c = sampleTexel(uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer);
- }
- else
+ if(isYcbcrFormat())
{
- Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
- Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
- Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
- Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+ uint8_t lumaBits = 8;
+ uint8_t chromaBits = 8;
+ switch(state.textureFormat)
+ {
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ lumaBits = 8;
+ chromaBits = 8;
+ break;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ lumaBits = 10;
+ chromaBits = 10;
+ break;
+ default:
+ UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+ break;
+ }
- Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
- Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
- Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
- Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
+ // TODO: investigate apparent precision losses in dEQP-VK.ycbcr when sampling and interpolating with Short4.
- if(!gather) // Blend
+ // Unnnormalized YUV values in [0, 255] for 8-bit formats, [0, 1023] for 10-bit formats.
+ Vector4f yuv;
+ Vector4f yuv00;
+ Vector4f yuv10;
+ Vector4f yuv01;
+ Vector4f yuv11;
+
+ if(state.textureFilter == FILTER_POINT)
{
- // Fractions
- UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, width)));
- UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<UInt4>(mipmap + OFFSET(Mipmap, height)));
+ sampleLumaTexel(yuv, uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer);
+ }
+ else
+ {
+ Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
+ Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
+ Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
+ Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+
+ sampleLumaTexel(yuv00, uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
+ sampleLumaTexel(yuv01, uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
+ sampleLumaTexel(yuv10, uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
+ sampleLumaTexel(yuv11, uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
- UShort4 f1u = ~f0u;
- UShort4 f1v = ~f0v;
+ bilinearInterpolateFloat(yuv, uuuu0, vvvv0, yuv00, yuv01, yuv10, yuv11, mipmap, false, true, false, false);
+ }
+
+ // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
+ Pointer<Byte> mipmapU = Pointer<Byte>(mipmap + 1 * sizeof(Mipmap));
+ Pointer<Byte> mipmapV = Pointer<Byte>(mipmap + 2 * sizeof(Mipmap));
+ Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmapU + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
+ Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmapV + OFFSET(Mipmap, buffer));
+
+ // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#textures-implict-reconstruction
+ // but using normalized coordinates.
+ Float4 chromaU = u;
+ Float4 chromaV = v;
+ if(state.chromaXOffset == VK_CHROMA_LOCATION_COSITED_EVEN)
+ {
+ chromaU += (Float4(0.25f) / Float4(*Pointer<UInt4>(mipmapU + OFFSET(Mipmap, width))));
+ }
+ if(state.chromaYOffset == VK_CHROMA_LOCATION_COSITED_EVEN)
+ {
+ chromaV += (Float4(0.25f) / Float4(*Pointer<UInt4>(mipmapU + OFFSET(Mipmap, height))));
+ }
- UShort4 f0u0v = MulHigh(f0u, f0v);
- UShort4 f1u0v = MulHigh(f1u, f0v);
- UShort4 f0u1v = MulHigh(f0u, f1v);
- UShort4 f1u1v = MulHigh(f1u, f1v);
+ Short4 chromaUUUU = address(chromaU, state.addressingModeU);
+ Short4 chromaVVVV = address(chromaV, state.addressingModeV);
+
+ if(state.chromaFilter == FILTER_POINT)
+ {
+ sampleChromaTexel(yuv, chromaUUUU, chromaVVVV, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV);
+ }
+ else
+ {
+ Short4 chromaUUUU0 = offsetSample(chromaUUUU, mipmapU, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
+ Short4 chromaVVVV0 = offsetSample(chromaVVVV, mipmapU, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
+ Short4 chromaUUUU1 = offsetSample(chromaUUUU, mipmapU, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
+ Short4 chromaVVVV1 = offsetSample(chromaVVVV, mipmapU, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+
+ sampleChromaTexel(yuv00, chromaUUUU0, chromaVVVV0, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV);
+ sampleChromaTexel(yuv01, chromaUUUU0, chromaVVVV1, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV);
+ sampleChromaTexel(yuv10, chromaUUUU1, chromaVVVV0, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV);
+ sampleChromaTexel(yuv11, chromaUUUU1, chromaVVVV1, wwww, layerIndex, sample, mipmapU, bufferU, mipmapV, bufferV);
+
+ bilinearInterpolateFloat(yuv, chromaUUUU0, chromaVVVV0, yuv00, yuv01, yuv10, yuv11, mipmapU, true, false, true, false);
+ }
+
+ if(state.swappedChroma)
+ {
+ std::swap(yuv.x, yuv.z);
+ }
+
+ if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+ {
+ // Scale to the output 15-bit.
+ c.x = UShort4(yuv.x) << (15 - chromaBits);
+ c.y = UShort4(yuv.y) << (15 - lumaBits);
+ c.z = UShort4(yuv.z) << (15 - chromaBits);
+ }
+ else
+ {
+ const float twoPowLumaBits = static_cast<float>(0x1u << lumaBits);
+ const float twoPowLumaBitsMinus8 = static_cast<float>(0x1u << (lumaBits - 8));
+ const float twoPowChromaBits = static_cast<float>(0x1u << chromaBits);
+ const float twoPowChromaBitsMinus1 = static_cast<float>(0x1u << (chromaBits - 1));
+ const float twoPowChromaBitsMinus8 = static_cast<float>(0x1u << (chromaBits - 8));
- // Signed fractions
- Short4 f1u1vs;
- Short4 f0u1vs;
- Short4 f1u0vs;
- Short4 f0u0vs;
+ Float4 y = Float4(yuv.y);
+ Float4 u = Float4(yuv.z);
+ Float4 v = Float4(yuv.x);
- if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
+ if(state.studioSwing)
{
- f1u1vs = f1u1v >> 1;
- f0u1vs = f0u1v >> 1;
- f1u0vs = f1u0v >> 1;
- f0u0vs = f0u0v >> 1;
+ // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_NARROW
+ y = ((y / Float4(twoPowLumaBitsMinus8)) - Float4(16.0f)) / Float4(219.0f);
+ u = ((u / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
+ v = ((v / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
}
-
- // Bilinear interpolation
- if(componentCount >= 1)
+ else
{
- if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
- {
- c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u);
- c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u);
- c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
- }
- else
- {
- if(hasUnsignedTextureComponent(0))
- {
- c00.x = MulHigh(As<UShort4>(c00.x), f1u1v);
- c10.x = MulHigh(As<UShort4>(c10.x), f0u1v);
- c01.x = MulHigh(As<UShort4>(c01.x), f1u0v);
- c11.x = MulHigh(As<UShort4>(c11.x), f0u0v);
- }
- else
- {
- c00.x = MulHigh(c00.x, f1u1vs);
- c10.x = MulHigh(c10.x, f0u1vs);
- c01.x = MulHigh(c01.x, f1u0vs);
- c11.x = MulHigh(c11.x, f0u0vs);
- }
-
- c.x = (c00.x + c10.x) + (c01.x + c11.x);
- if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
- }
+ // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_FULL
+ y = y / Float4(twoPowLumaBits - 1.0f);
+ u = (u - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
+ v = (v - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
}
- if(componentCount >= 2)
- {
- if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
- {
- c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u);
- c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u);
- c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
- }
- else
- {
- if(hasUnsignedTextureComponent(1))
- {
- c00.y = MulHigh(As<UShort4>(c00.y), f1u1v);
- c10.y = MulHigh(As<UShort4>(c10.y), f0u1v);
- c01.y = MulHigh(As<UShort4>(c01.y), f1u0v);
- c11.y = MulHigh(As<UShort4>(c11.y), f0u0v);
- }
- else
- {
- c00.y = MulHigh(c00.y, f1u1vs);
- c10.y = MulHigh(c10.y, f0u1vs);
- c01.y = MulHigh(c01.y, f1u0vs);
- c11.y = MulHigh(c11.y, f0u0vs);
- }
+ // Now, `y` is in [0, 1] and `u` and `v` are in [-0.5, 0.5].
- c.y = (c00.y + c10.y) + (c01.y + c11.y);
- if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
- }
+ if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
+ {
+ c.x = Short4(v * static_cast<float>(0x7FFF));
+ c.y = Short4(y * static_cast<float>(0x7FFF));
+ c.z = Short4(u * static_cast<float>(0x7FFF));
}
-
- if(componentCount >= 3)
+ else
{
- if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
- {
- c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u);
- c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u);
- c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
- }
- else
- {
- if(hasUnsignedTextureComponent(2))
- {
- c00.z = MulHigh(As<UShort4>(c00.z), f1u1v);
- c10.z = MulHigh(As<UShort4>(c10.z), f0u1v);
- c01.z = MulHigh(As<UShort4>(c01.z), f1u0v);
- c11.z = MulHigh(As<UShort4>(c11.z), f0u0v);
- }
- else
- {
- c00.z = MulHigh(c00.z, f1u1vs);
- c10.z = MulHigh(c10.z, f0u1vs);
- c01.z = MulHigh(c01.z, f1u0vs);
- c11.z = MulHigh(c11.z, f0u0vs);
- }
+ // Generic YCbCr to RGB transformation:
+ // R = Y + 2 * (1 - Kr) * Cr
+ // G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr
+ // B = Y + 2 * (1 - Kb) * Cb
- c.z = (c00.z + c10.z) + (c01.z + c11.z);
- if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
- }
- }
+ float Kb = 0.114f;
+ float Kr = 0.299f;
- if(componentCount >= 4)
- {
- if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
+ switch(state.ycbcrModel)
{
- c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u);
- c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u);
- c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
+ Kb = 0.0722f;
+ Kr = 0.2126f;
+ break;
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
+ Kb = 0.114f;
+ Kr = 0.299f;
+ break;
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
+ Kb = 0.0593f;
+ Kr = 0.2627f;
+ break;
+ default:
+ UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
}
- else
- {
- if(hasUnsignedTextureComponent(3))
- {
- c00.w = MulHigh(As<UShort4>(c00.w), f1u1v);
- c10.w = MulHigh(As<UShort4>(c10.w), f0u1v);
- c01.w = MulHigh(As<UShort4>(c01.w), f1u0v);
- c11.w = MulHigh(As<UShort4>(c11.w), f0u0v);
- }
- else
- {
- c00.w = MulHigh(c00.w, f1u1vs);
- c10.w = MulHigh(c10.w, f0u1vs);
- c01.w = MulHigh(c01.w, f1u0vs);
- c11.w = MulHigh(c11.w, f0u0vs);
- }
- c.w = (c00.w + c10.w) + (c01.w + c11.w);
- if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
- }
+ const float Kg = 1.0f - Kr - Kb;
+
+ const float Rr = 2 * (1 - Kr);
+ const float Gb = -2 * Kb * (1 - Kb) / Kg;
+ const float Gr = -2 * Kr * (1 - Kr) / Kg;
+ const float Bb = 2 * (1 - Kb);
+
+ Float4 r = y + Float4(Rr) * v;
+ Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
+ Float4 b = y + Float4(Bb) * u;
+
+ c.x = Short4(r * static_cast<float>(0x7FFF));
+ c.y = Short4(g * static_cast<float>(0x7FFF));
+ c.z = Short4(b * static_cast<float>(0x7FFF));
}
}
- else // Gather
+ }
+ else // !isYcbcrFormat()
+ {
+ if(state.textureFilter == FILTER_POINT)
{
- VkComponentSwizzle swizzle = gatherSwizzle();
- switch(swizzle)
+ c = sampleTexel(uuuu, vvvv, wwww, layerIndex, sample, mipmap, buffer);
+ }
+ else
+ {
+ Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
+ Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
+ Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
+ Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+
+ Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
+ Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, layerIndex, sample, mipmap, buffer);
+ Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
+ Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, layerIndex, sample, mipmap, buffer);
+
+ if(!gather) // Blend
{
- case VK_COMPONENT_SWIZZLE_ZERO:
- case VK_COMPONENT_SWIZZLE_ONE:
- // Handled at the final component swizzle.
- break;
- default:
- c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
- break;
+ bilinearInterpolate(c, uuuu0, vvvv0, c00, c01, c10, c11, mipmap);
+ }
+ else
+ {
+ VkComponentSwizzle swizzle = gatherSwizzle();
+ switch(swizzle)
+ {
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ case VK_COMPONENT_SWIZZLE_ONE:
+ // Handled at the final component swizzle.
+ break;
+ default:
+ c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+ break;
+ }
}
}
}
@@ -692,9 +918,9 @@ Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, F
applyOffset(u_, v_, w_, offset, mipmap);
- Short4 uuuu = address(u_, state.addressingModeU, mipmap);
- Short4 vvvv = address(v_, state.addressingModeV, mipmap);
- Short4 wwww = address(w_, state.addressingModeW, mipmap);
+ Short4 uuuu = address(u_, state.addressingModeU);
+ Short4 vvvv = address(v_, state.addressingModeV);
+ Short4 wwww = address(w_, state.addressingModeW);
if(state.textureFilter == FILTER_POINT)
{
@@ -1753,226 +1979,112 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
return c;
}
-Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer)
+void SamplerCore::sampleLumaTexel(Vector4f &output, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &lumaMipmap, Pointer<Byte> lumaBuffer)
{
- Vector4s c;
+ ASSERT(isYcbcrFormat());
UInt index[4];
- computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap);
+ computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, lumaMipmap);
- if(isYcbcrFormat())
- {
- // Generates 15-bit output.
+ // Luminance (either 8-bit or 10-bit in bottom bits).
+ UShort4 Y;
- // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
- Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
- Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
- Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
-
- // Luminance (either 8-bit or 10-bit in bottom bits).
- UShort4 Y;
+ switch(state.textureFormat)
+ {
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
{
- switch(state.textureFormat)
- {
- case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
- case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
- {
- Y = Insert(Y, UShort(bufferY[index[0]]), 0);
- Y = Insert(Y, UShort(bufferY[index[1]]), 1);
- Y = Insert(Y, UShort(bufferY[index[2]]), 2);
- Y = Insert(Y, UShort(bufferY[index[3]]), 3);
- }
- break;
- case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
- {
- Y = Insert(Y, Pointer<UShort>(bufferY)[index[0]], 0);
- Y = Insert(Y, Pointer<UShort>(bufferY)[index[1]], 1);
- Y = Insert(Y, Pointer<UShort>(bufferY)[index[2]], 2);
- Y = Insert(Y, Pointer<UShort>(bufferY)[index[3]], 3);
- // Top 10 bits of each 16 bits:
- Y = (Y & UShort4(0xFFC0u)) >> 6;
- }
- break;
- default:
- UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
- break;
- }
+ Y = Insert(Y, UShort(lumaBuffer[index[0]]), 0);
+ Y = Insert(Y, UShort(lumaBuffer[index[1]]), 1);
+ Y = Insert(Y, UShort(lumaBuffer[index[2]]), 2);
+ Y = Insert(Y, UShort(lumaBuffer[index[3]]), 3);
}
-
- // Chroma (either 8-bit or 10-bit in bottom bits).
- UShort4 Cb, Cr;
+ break;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
{
- computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap + sizeof(Mipmap));
- UShort4 U, V;
+ Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[0]], 0);
+ Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[1]], 1);
+ Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[2]], 2);
+ Y = Insert(Y, Pointer<UShort>(lumaBuffer)[index[3]], 3);
+ // Top 10 bits of each 16 bits:
+ Y = (Y & UShort4(0xFFC0u)) >> 6;
+ }
+ break;
+ default:
+ UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+ break;
+ }
- switch(state.textureFormat)
- {
- case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
- {
- U = Insert(U, UShort(bufferU[index[0]]), 0);
- U = Insert(U, UShort(bufferU[index[1]]), 1);
- U = Insert(U, UShort(bufferU[index[2]]), 2);
- U = Insert(U, UShort(bufferU[index[3]]), 3);
-
- V = Insert(V, UShort(bufferV[index[0]]), 0);
- V = Insert(V, UShort(bufferV[index[1]]), 1);
- V = Insert(V, UShort(bufferV[index[2]]), 2);
- V = Insert(V, UShort(bufferV[index[3]]), 3);
- }
- break;
- case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
- {
- UShort4 UV;
- UV = Insert(UV, Pointer<UShort>(bufferU)[index[0]], 0);
- UV = Insert(UV, Pointer<UShort>(bufferU)[index[1]], 1);
- UV = Insert(UV, Pointer<UShort>(bufferU)[index[2]], 2);
- UV = Insert(UV, Pointer<UShort>(bufferU)[index[3]], 3);
-
- U = (UV & UShort4(0x00FFu));
- V = (UV & UShort4(0xFF00u)) >> 8;
- }
- break;
- case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
- {
- UInt4 UV;
- UV = Insert(UV, Pointer<UInt>(bufferU)[index[0]], 0);
- UV = Insert(UV, Pointer<UInt>(bufferU)[index[1]], 1);
- UV = Insert(UV, Pointer<UInt>(bufferU)[index[2]], 2);
- UV = Insert(UV, Pointer<UInt>(bufferU)[index[3]], 3);
- // Top 10 bits of first 16-bits:
- U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6);
- // Top 10 bits of second 16-bits:
- V = UShort4((UV & UInt4(0xFFC00000u)) >> 22);
- }
- break;
- default:
- UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
- break;
- }
+ output.y = Float4(Y);
+}
- if(!state.swappedChroma)
- {
- Cb = U;
- Cr = V;
- }
- else
- {
- Cb = V;
- Cr = U;
- }
- }
+void SamplerCore::sampleChromaTexel(Vector4f &output, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmapU, Pointer<Byte> bufferU, Pointer<Byte> &mipmapV, Pointer<Byte> bufferV)
+{
+ ASSERT(isYcbcrFormat());
- uint8_t lumaBits = 8;
- uint8_t chromaBits = 8;
- switch(state.textureFormat)
+ UInt index[4];
+
+ // Chroma (either 8-bit or 10-bit in bottom bits).
+ UShort4 U, V;
+ computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmapU);
+
+ switch(state.textureFormat)
+ {
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
{
- case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
- case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
- lumaBits = 8;
- chromaBits = 8;
- break;
- case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
- lumaBits = 10;
- chromaBits = 10;
- break;
- default:
- UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
- break;
+ U = Insert(U, UShort(bufferU[index[0]]), 0);
+ U = Insert(U, UShort(bufferU[index[1]]), 1);
+ U = Insert(U, UShort(bufferU[index[2]]), 2);
+ U = Insert(U, UShort(bufferU[index[3]]), 3);
+
+ V = Insert(V, UShort(bufferV[index[0]]), 0);
+ V = Insert(V, UShort(bufferV[index[1]]), 1);
+ V = Insert(V, UShort(bufferV[index[2]]), 2);
+ V = Insert(V, UShort(bufferV[index[3]]), 3);
}
-
- if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+ break;
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
{
- // Scale to the output 15-bit.
- c.x = Cr << (15 - chromaBits);
- c.y = Y << (15 - lumaBits);
- c.z = Cb << (15 - chromaBits);
+ UShort4 UV;
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[0]], 0);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[1]], 1);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[2]], 2);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[3]], 3);
+
+ U = (UV & UShort4(0x00FFu));
+ V = (UV & UShort4(0xFF00u)) >> 8;
}
- else
+ break;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
{
- const float twoPowLumaBits = static_cast<float>(0x1u << lumaBits);
- const float twoPowLumaBitsMinus8 = static_cast<float>(0x1u << (lumaBits - 8));
- const float twoPowChromaBits = static_cast<float>(0x1u << chromaBits);
- const float twoPowChromaBitsMinus1 = static_cast<float>(0x1u << (chromaBits - 1));
- const float twoPowChromaBitsMinus8 = static_cast<float>(0x1u << (chromaBits - 8));
-
- Float4 y = Float4(Y);
- Float4 u = Float4(Cb);
- Float4 v = Float4(Cr);
-
- if(state.studioSwing)
- {
- // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_NARROW
- y = ((y / Float4(twoPowLumaBitsMinus8)) - Float4(16.0f)) / Float4(219.0f);
- u = ((u / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
- v = ((v / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
- }
- else
- {
- // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_FULL
- y = y / Float4(twoPowLumaBits - 1.0f);
- u = (u - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
- v = (v - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
- }
-
- // Now, `y` is in [0, 1] and `u` and `v` are in [-0.5, 0.5].
-
- if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
- {
- c.x = Short4(v * static_cast<float>(0x7FFF));
- c.y = Short4(y * static_cast<float>(0x7FFF));
- c.z = Short4(u * static_cast<float>(0x7FFF));
- }
- else
- {
- // Generic YCbCr to RGB transformation:
- // R = Y + 2 * (1 - Kr) * Cr
- // G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr
- // B = Y + 2 * (1 - Kb) * Cb
-
- float Kb = 0.114f;
- float Kr = 0.299f;
-
- switch(state.ycbcrModel)
- {
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
- Kb = 0.0722f;
- Kr = 0.2126f;
- break;
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
- Kb = 0.114f;
- Kr = 0.299f;
- break;
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
- Kb = 0.0593f;
- Kr = 0.2627f;
- break;
- default:
- UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
- }
-
- const float Kg = 1.0f - Kr - Kb;
+ UInt4 UV;
+ UV = Insert(UV, Pointer<UInt>(bufferU)[index[0]], 0);
+ UV = Insert(UV, Pointer<UInt>(bufferU)[index[1]], 1);
+ UV = Insert(UV, Pointer<UInt>(bufferU)[index[2]], 2);
+ UV = Insert(UV, Pointer<UInt>(bufferU)[index[3]], 3);
+ // Top 10 bits of first 16-bits:
+ U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6);
+ // Top 10 bits of second 16-bits:
+ V = UShort4((UV & UInt4(0xFFC00000u)) >> 22);
+ }
+ break;
+ default:
+ UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+ break;
+ }
- const float Rr = 2 * (1 - Kr);
- const float Gb = -2 * Kb * (1 - Kb) / Kg;
- const float Gr = -2 * Kr * (1 - Kr) / Kg;
- const float Bb = 2 * (1 - Kb);
+ output.x = Float4(V);
+ output.z = Float4(U);
+}
- Float4 r = y + Float4(Rr) * v;
- Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
- Float4 b = y + Float4(Bb) * u;
+Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer)
+{
+ ASSERT(!isYcbcrFormat());
- c.x = Short4(r * static_cast<float>(0x7FFF));
- c.y = Short4(g * static_cast<float>(0x7FFF));
- c.z = Short4(b * static_cast<float>(0x7FFF));
- }
- }
- }
- else
- {
- return sampleTexel(index, buffer);
- }
+ UInt index[4];
+ computeIndices(index, uuuu, vvvv, wwww, layerIndex, sample, mipmap);
- return c;
+ return sampleTexel(index, buffer);
}
Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, const Float4 &dRef, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer)
@@ -2281,7 +2393,7 @@ Int4 SamplerCore::computeFilterOffset(Float &lod)
return Int4(~0);
}
-Short4 SamplerCore::address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap)
+Short4 SamplerCore::address(const Float4 &uw, AddressingMode addressingMode)
{
if(addressingMode == ADDRESSING_UNUSED)
{
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 2074a897a..bcd468225 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -86,12 +86,16 @@ private:
void applyOffset(Float4 &u, Float4 &v, Float4 &w, Vector4i &offset, Pointer<Byte> mipmap);
void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, const Short4 &cubeArrayLayer, const Int4 &sample, const Pointer<Byte> &mipmap);
void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, const Int4 &sample, Int4 valid, const Pointer<Byte> &mipmap);
+ void bilinearInterpolateFloat(Vector4f &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4f &c00, Vector4f &c01, Vector4f &c10, Vector4f &c11, const Pointer<Byte> &mipmap, bool interpolateComponent0, bool interpolateComponent1, bool interpolateComponent2, bool interpolateComponent3);
+ void bilinearInterpolate(Vector4s &output, const Short4 &uuuu0, const Short4 &vvvv0, Vector4s &c00, Vector4s &c01, Vector4s &c10, Vector4s &c11, const Pointer<Byte> &mipmap);
+ void sampleLumaTexel(Vector4f& output, Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &lumaMipmap, Pointer<Byte> lumaBuffer);
+ void sampleChromaTexel(Vector4f& output, Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &mipmapU, Pointer<Byte> bufferU, Pointer<Byte> &mipmapV, Pointer<Byte> bufferV);
Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &w, const Short4 &cubeArrayLayer, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer);
Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer);
Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &w, const Float4 &dRef, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer);
Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid);
Pointer<Byte> selectMipmap(const Pointer<Byte> &texture, const Float &lod, bool secondLOD);
- Short4 address(const Float4 &uvw, AddressingMode addressingMode, Pointer<Byte> &mipmap);
+ Short4 address(const Float4 &uvw, AddressingMode addressingMode);
Short4 computeLayerIndex16(const Float4 &a, Pointer<Byte> &mipmap);
void address(const Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Int4 &filter, int whd, AddressingMode addressingMode);
Int4 computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap);
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 4c674050e..7d4dd6811 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -75,6 +75,9 @@ SpirvEmitter::ImageSampler *SpirvEmitter::getImageSampler(const vk::Device *devi
samplerState.ycbcrModel = vkSamplerState->ycbcrModel;
samplerState.studioSwing = vkSamplerState->studioSwing;
samplerState.swappedChroma = vkSamplerState->swappedChroma;
+ samplerState.chromaFilter = vkSamplerState->chromaFilter == VK_FILTER_LINEAR ? FILTER_LINEAR : FILTER_POINT;
+ samplerState.chromaXOffset = vkSamplerState->chromaXOffset;
+ samplerState.chromaYOffset = vkSamplerState->chromaYOffset;
samplerState.mipLodBias = vkSamplerState->mipLodBias;
samplerState.maxAnisotropy = vkSamplerState->maxAnisotropy;
diff --git a/src/Reactor/Print.hpp b/src/Reactor/Print.hpp
index 6d2b4d1a3..8a4afd6ac 100644
--- a/src/Reactor/Print.hpp
+++ b/src/Reactor/Print.hpp
@@ -499,6 +499,24 @@ static_assert(3 == RR_COUNT_ARGUMENTS(a, b, c), "RR_COUNT_ARGUMENTS broken");
# define RR_WATCH_FMT_12(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12) \
RR_WATCH_FMT_11(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11) \
"\n " #_12 ": {11}"
+# define RR_WATCH_FMT_13(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13) \
+ RR_WATCH_FMT_12(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12) \
+ "\n " #_13 ": {12}"
+# define RR_WATCH_FMT_14(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14) \
+ RR_WATCH_FMT_13(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13) \
+ "\n " #_14 ": {13}"
+# define RR_WATCH_FMT_15(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15) \
+ RR_WATCH_FMT_14(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14) \
+ "\n " #_15 ": {14}"
+# define RR_WATCH_FMT_16(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \
+ RR_WATCH_FMT_15(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15) \
+ "\n " #_16 ": {15}"
+# define RR_WATCH_FMT_17(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17) \
+ RR_WATCH_FMT_16(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \
+ "\n " #_17 ": {16}"
+# define RR_WATCH_FMT_18(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18) \
+ RR_WATCH_FMT_17(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17) \
+ "\n " #_18 ": {17}"
// RR_WATCH() is a helper that prints the name and value of all the supplied
// arguments.
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index f5f189e7a..3c5a1200e 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2053,6 +2053,9 @@ RValue<Float4> Trunc(RValue<Float4> x);
RValue<Float4> Frac(RValue<Float4> x);
RValue<Float4> Floor(RValue<Float4> x);
RValue<Float4> Ceil(RValue<Float4> x);
+inline RValue<Float4> Mix(RValue<Float4> x, RValue<Float4> y, RValue<Float4> frac) {
+ return (x * (Float4(1.0f) - frac)) + (y * frac);
+}
// Trigonometric functions
RValue<Float4> Sin(RValue<Float4> x);
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 3b447db1c..152f021a7 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -2131,6 +2131,7 @@ void PhysicalDevice::GetFormatProperties(Format format, VkFormatProperties3 *pFo
pFormatProperties->optimalTilingFeatures |=
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
diff --git a/src/Vulkan/VkSampler.cpp b/src/Vulkan/VkSampler.cpp
index 0458f0adf..d007cf1fe 100644
--- a/src/Vulkan/VkSampler.cpp
+++ b/src/Vulkan/VkSampler.cpp
@@ -44,6 +44,9 @@ SamplerState::SamplerState(const VkSamplerCreateInfo *pCreateInfo, const vk::Sam
ycbcrModel = ycbcrConversion->ycbcrModel;
studioSwing = (ycbcrConversion->ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
swappedChroma = (ycbcrConversion->components.r != VK_COMPONENT_SWIZZLE_R);
+ chromaFilter = ycbcrConversion->chromaFilter;
+ chromaXOffset = ycbcrConversion->xChromaOffset;
+ chromaYOffset = ycbcrConversion->yChromaOffset;
}
}
diff --git a/src/Vulkan/VkSampler.hpp b/src/Vulkan/VkSampler.hpp
index 4a627c18b..f20240a7d 100644
--- a/src/Vulkan/VkSampler.hpp
+++ b/src/Vulkan/VkSampler.hpp
@@ -56,6 +56,9 @@ struct SamplerState : sw::Memset<SamplerState>
const bool highPrecisionFiltering = false;
bool studioSwing = false; // Narrow range
bool swappedChroma = false; // Cb/Cr components in reverse order
+ VkFilter chromaFilter = VK_FILTER_NEAREST;
+ VkChromaLocation chromaXOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
+ VkChromaLocation chromaYOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
};
class Sampler : public Object<Sampler, VkSampler>, public SamplerState