From 3f2e61dea6c447ed7b98483fa0ce9ccc3cb5cc3c Mon Sep 17 00:00:00 2001 From: Jamie Madill Date: Fri, 5 Sep 2014 10:38:05 -0400 Subject: Enable MRT pixel shader rewriting. Writing to all 8 pixel outputs was causing performance problems on Intel and AMD. Enabling Geoff's work to rewrite our pixel shaders solves the regression. This patch also includes a workaround to the nVidia driver bug where it would ignore NULL RT values in OMSetRenderTargets, by compacting the RT list to skip NULL values. BUG=angle:705 BUG=365078 Change-Id: Ia68af6f0ccd5f10c484d6f76297a0bec694948f0 Reviewed-on: https://chromium-review.googlesource.com/214852 Tested-by: Jamie Madill Reviewed-by: Geoff Lang --- src/libGLESv2/Framebuffer.cpp | 2 + src/libGLESv2/ProgramBinary.cpp | 30 +- src/libGLESv2/ProgramBinary.h | 8 +- src/libGLESv2/renderer/d3d/DynamicHLSL.cpp | 41 ++- .../renderer/d3d/d3d11/renderer11_utils.cpp | 9 +- tests/angle_tests/DrawBuffersTest.cpp | 329 +++++++++++++++++++++ 6 files changed, 396 insertions(+), 23 deletions(-) create mode 100644 tests/angle_tests/DrawBuffersTest.cpp diff --git a/src/libGLESv2/Framebuffer.cpp b/src/libGLESv2/Framebuffer.cpp index b9c4a71c..6247b5a0 100644 --- a/src/libGLESv2/Framebuffer.cpp +++ b/src/libGLESv2/Framebuffer.cpp @@ -649,10 +649,12 @@ ColorbufferInfo Framebuffer::getColorbuffersForRender() const ASSERT(drawBufferState == GL_BACK || drawBufferState == (GL_COLOR_ATTACHMENT0_EXT + colorAttachment)); colorbuffersForRender.push_back(colorbuffer); } +#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_DISABLED) else { colorbuffersForRender.push_back(NULL); } +#endif } return colorbuffersForRender; diff --git a/src/libGLESv2/ProgramBinary.cpp b/src/libGLESv2/ProgramBinary.cpp index 652b7d63..97fc4aeb 100644 --- a/src/libGLESv2/ProgramBinary.cpp +++ b/src/libGLESv2/ProgramBinary.cpp @@ -82,7 +82,7 @@ unsigned int ParseAndStripArrayIndex(std::string* name) return subscript; } -void GetInputLayoutFromShader(const std::vector &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS]) +void GetDefaultInputLayoutFromShader(const std::vector &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS]) { size_t layoutIndex = 0; for (size_t attributeIndex = 0; attributeIndex < shaderAttributes.size(); attributeIndex++) @@ -108,6 +108,24 @@ void GetInputLayoutFromShader(const std::vector &shaderAttributes } } +std::vector GetDefaultOutputLayoutFromShader(const std::vector &shaderOutputVars) +{ +#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED) + std::vector defaultPixelOutput(1); +#else + std::vector defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS); +#endif + for (size_t i = 0; i < defaultPixelOutput.size(); i++) + { + defaultPixelOutput[i] = GL_NONE; + } + + ASSERT(!shaderOutputVars.empty()); + defaultPixelOutput[0] = GL_COLOR_ATTACHMENT0 + shaderOutputVars[0].outputIndex; + + return defaultPixelOutput; +} + bool IsRowMajorLayout(const sh::InterfaceBlockField &var) { return var.isRowMajorLayout; @@ -261,7 +279,9 @@ rx::ShaderExecutable *ProgramBinary::getPixelExecutableForOutputLayout(const std { for (size_t executableIndex = 0; executableIndex < mPixelExecutables.size(); executableIndex++) { +#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED) if (mPixelExecutables[executableIndex]->matchesSignature(outputSignature)) +#endif { return mPixelExecutables[executableIndex]->shaderExecutable(); } @@ -1701,14 +1721,10 @@ bool ProgramBinary::link(InfoLog &infoLog, const AttributeBindings &attributeBin if (success) { VertexFormat defaultInputLayout[MAX_VERTEX_ATTRIBS]; - GetInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout); + GetDefaultInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout); rx::ShaderExecutable *defaultVertexExecutable = getVertexExecutableForInputLayout(defaultInputLayout); - std::vector defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS); - for (size_t i = 0; i < defaultPixelOutput.size(); i++) - { - defaultPixelOutput[i] = (i == 0) ? GL_COLOR_ATTACHMENT0 : GL_NONE; - } + std::vector defaultPixelOutput = GetDefaultOutputLayoutFromShader(mPixelShaderKey); rx::ShaderExecutable *defaultPixelExecutable = getPixelExecutableForOutputLayout(defaultPixelOutput); if (usesGeometryShader()) diff --git a/src/libGLESv2/ProgramBinary.h b/src/libGLESv2/ProgramBinary.h index ee6a645f..76baaec2 100644 --- a/src/libGLESv2/ProgramBinary.h +++ b/src/libGLESv2/ProgramBinary.h @@ -24,6 +24,11 @@ #include #include +// TODO(jmadill): place this in workarounds library +#define ANGLE_WORKAROUND_ENABLED 1 +#define ANGLE_WORKAROUND_DISABLED 2 +#define ANGLE_MRT_PERF_WORKAROUND ANGLE_WORKAROUND_ENABLED + namespace sh { class HLSLBlockEncoder; @@ -271,8 +276,7 @@ class ProgramBinary : public RefCountObject PixelExecutable(const std::vector &outputSignature, rx::ShaderExecutable *shaderExecutable); ~PixelExecutable(); - // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers - bool matchesSignature(const std::vector &signature) const { return true; /* mOutputSignature == signature; */ } + bool matchesSignature(const std::vector &signature) const { return mOutputSignature == signature; } const std::vector &outputSignature() const { return mOutputSignature; } rx::ShaderExecutable *shaderExecutable() const { return mShaderExecutable; } diff --git a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp index 6aa0d137..cd2b1a85 100644 --- a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp +++ b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp @@ -22,7 +22,7 @@ META_ASSERT(GL_INVALID_INDEX == UINT_MAX); using namespace gl; -namespace gl_d3d +namespace { std::string HLSLComponentTypeString(GLenum componentType) @@ -70,6 +70,21 @@ std::string HLSLTypeString(GLenum type) return HLSLComponentTypeString(gl::VariableComponentType(type), gl::VariableComponentCount(type)); } +const rx::PixelShaderOuputVariable &GetOutputAtLocation(const std::vector &outputVariables, + unsigned int location) +{ + for (size_t variableIndex = 0; variableIndex < outputVariables.size(); ++variableIndex) + { + if (outputVariables[variableIndex].outputIndex == location) + { + return outputVariables[variableIndex]; + } + } + + UNREACHABLE(); + return outputVariables[0]; +} + } namespace rx @@ -328,7 +343,7 @@ std::string DynamicHLSL::generateVaryingHLSL(const ShaderD3D *shader) const { GLenum componentType = VariableComponentType(transposedType); int columnCount = VariableColumnCount(transposedType); - typeString = gl_d3d::HLSLComponentTypeString(componentType, columnCount); + typeString = HLSLComponentTypeString(componentType, columnCount); } varyingHLSL += typeString + " v" + n + " : " + varyingSemantic + n + ";\n"; } @@ -361,12 +376,12 @@ std::string DynamicHLSL::generateVertexShaderForInputLayout(const std::string &s if (IsMatrixType(shaderAttribute.type)) { // Matrix types are always transposed - structHLSL += " " + gl_d3d::HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type)); + structHLSL += " " + HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type)); } else { GLenum componentType = mRenderer->getVertexComponentType(vertexFormat); - structHLSL += " " + gl_d3d::HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type)); + structHLSL += " " + HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type)); } structHLSL += " " + decorateVariable(shaderAttribute.name) + " : TEXCOORD" + Str(semanticIndex) + ";\n"; @@ -421,17 +436,19 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(const std::string std::string declarationHLSL; std::string copyHLSL; - for (size_t i = 0; i < outputVariables.size(); i++) + + for (size_t layoutIndex = 0; layoutIndex < outputLayout.size(); ++layoutIndex) { - const PixelShaderOuputVariable& outputVariable = outputVariables[i]; - ASSERT(outputLayout.size() > outputVariable.outputIndex); + GLenum binding = outputLayout[layoutIndex]; - // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers - bool outputIndexEnabled = true; // outputLayout[outputVariable.outputIndex] != GL_NONE - if (outputIndexEnabled) + if (binding != GL_NONE) { - declarationHLSL += " " + gl_d3d::HLSLTypeString(outputVariable.type) + " " + outputVariable.name + - " : " + targetSemantic + Str(outputVariable.outputIndex) + ";\n"; + unsigned int location = (binding - GL_COLOR_ATTACHMENT0); + + const PixelShaderOuputVariable &outputVariable = GetOutputAtLocation(outputVariables, location); + + declarationHLSL += " " + HLSLTypeString(outputVariable.type) + " " + outputVariable.name + + " : " + targetSemantic + Str(layoutIndex) + ";\n"; copyHLSL += " output." + outputVariable.name + " = " + outputVariable.source + ";\n"; } diff --git a/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp b/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp index c021e2ab..bb2e5362 100644 --- a/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp +++ b/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp @@ -9,6 +9,7 @@ #include "libGLESv2/renderer/d3d/d3d11/renderer11_utils.h" #include "libGLESv2/renderer/d3d/d3d11/formatutils11.h" +#include "libGLESv2/ProgramBinary.h" #include "common/debug.h" @@ -392,9 +393,13 @@ static size_t GetMaximumSimultaneousRenderTargets(D3D_FEATURE_LEVEL featureLevel case D3D_FEATURE_LEVEL_11_1: case D3D_FEATURE_LEVEL_11_0: return D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; - // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers case D3D_FEATURE_LEVEL_10_1: - case D3D_FEATURE_LEVEL_10_0: return 1; /* D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT; */ + case D3D_FEATURE_LEVEL_10_0: +#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED) + return D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT; +#else + return 1; +#endif case D3D_FEATURE_LEVEL_9_3: return D3D_FL9_3_SIMULTANEOUS_RENDER_TARGET_COUNT; case D3D_FEATURE_LEVEL_9_2: diff --git a/tests/angle_tests/DrawBuffersTest.cpp b/tests/angle_tests/DrawBuffersTest.cpp new file mode 100644 index 00000000..fdf2bdbb --- /dev/null +++ b/tests/angle_tests/DrawBuffersTest.cpp @@ -0,0 +1,329 @@ +#include "ANGLETest.h" + +class DrawBuffersTest : public ANGLETest +{ + protected: + DrawBuffersTest(int clientVersion) + { + setWindowWidth(128); + setWindowHeight(128); + setConfigRedBits(8); + setConfigGreenBits(8); + setConfigBlueBits(8); + setConfigAlphaBits(8); + setConfigDepthBits(24); + setClientVersion(clientVersion); + } + + virtual void SetUp() + { + ANGLETest::SetUp(); + + glGenFramebuffers(1, &mFBO); + glBindFramebuffer(GL_FRAMEBUFFER, mFBO); + + glGenTextures(4, mTextures); + + for (size_t texIndex = 0; texIndex < ArraySize(mTextures); texIndex++) + { + glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]); + glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, getWindowWidth(), getWindowHeight()); + } + + GLfloat data[] = + { + -1.0f, 1.0f, + -1.0f, -2.0f, + 2.0f, 1.0f + }; + + glGenBuffers(1, &mBuffer); + glBindBuffer(GL_ARRAY_BUFFER, mBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 6, data, GL_STATIC_DRAW); + + GLint maxDrawBuffers; + glGetIntegerv(GL_MAX_DRAW_BUFFERS, &maxDrawBuffers); + ASSERT_EQ(maxDrawBuffers, 8); + + ASSERT_GL_NO_ERROR(); + } + + virtual void TearDown() + { + glDeleteFramebuffers(1, &mFBO); + glDeleteTextures(4, mTextures); + glDeleteBuffers(1, &mBuffer); + + ANGLETest::TearDown(); + } + + void setupMRTProgramESSL3(bool bufferEnabled[8], GLuint *programOut) + { + const std::string vertexShaderSource = + "#version 300 es\n" + "in vec4 position;\n" + "void main() {\n" + " gl_Position = position;\n" + "}\n"; + + std::stringstream strstr; + + strstr << "#version 300 es\n" + "precision highp float;\n"; + + for (unsigned int index = 0; index < 8; index++) + { + if (bufferEnabled[index]) + { + strstr << "layout(location = " << index << ") " + "out vec4 value" << index << ";\n"; + } + } + + strstr << "void main()\n" + "{\n"; + + for (unsigned int index = 0; index < 8; index++) + { + if (bufferEnabled[index]) + { + unsigned int r = (index + 1) & 1; + unsigned int g = (index + 1) & 2; + unsigned int b = (index + 1) & 4; + + strstr << " value" << index << " = vec4(" + << r << ".0, " << g << ".0, " + << b << ".0, 1.0);\n"; + } + } + + strstr << "}\n"; + + *programOut = CompileProgram(vertexShaderSource, strstr.str()); + if (*programOut == 0) + { + FAIL() << "shader compilation failed."; + } + + glUseProgram(*programOut); + + GLint location = glGetAttribLocation(*programOut, "position"); + ASSERT_NE(location, -1); + glBindBuffer(GL_ARRAY_BUFFER, mBuffer); + glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL); + glEnableVertexAttribArray(location); + } + + void setupMRTProgramESSL1(bool bufferEnabled[8], GLuint *programOut) + { + const std::string vertexShaderSource = + "attribute vec4 position;\n" + "void main() {\n" + " gl_Position = position;\n" + "}\n"; + + std::stringstream strstr; + + strstr << "#extension GL_EXT_draw_buffers : enable\n" + "precision highp float;\n" + "void main()\n" + "{\n"; + + for (unsigned int index = 0; index < 8; index++) + { + if (bufferEnabled[index]) + { + unsigned int r = (index + 1) & 1; + unsigned int g = (index + 1) & 2; + unsigned int b = (index + 1) & 4; + + strstr << " gl_FragData[" << index << "] = vec4(" + << r << ".0, " << g << ".0, " + << b << ".0, 1.0);\n"; + } + } + + strstr << "}\n"; + + *programOut = CompileProgram(vertexShaderSource, strstr.str()); + if (*programOut == 0) + { + FAIL() << "shader compilation failed."; + } + + glUseProgram(*programOut); + + GLint location = glGetAttribLocation(*programOut, "position"); + ASSERT_NE(location, -1); + glBindBuffer(GL_ARRAY_BUFFER, mBuffer); + glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL); + glEnableVertexAttribArray(location); + } + + void setupMRTProgram(bool bufferEnabled[8], GLuint *programOut) + { + if (getClientVersion() == 3) + { + setupMRTProgramESSL3(bufferEnabled, programOut); + } + else + { + ASSERT_EQ(getClientVersion(), 2); + setupMRTProgramESSL1(bufferEnabled, programOut); + } + } + + void verifyAttachment(unsigned int index, GLuint textureName) + { + for (unsigned int colorAttachment = 0; colorAttachment < 8; colorAttachment++) + { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, GL_TEXTURE_2D, 0, 0); + } + + glBindTexture(GL_TEXTURE_2D, textureName); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, textureName, 0); + + unsigned int r = (((index + 1) & 1) > 0) ? 255 : 0; + unsigned int g = (((index + 1) & 2) > 0) ? 255 : 0; + unsigned int b = (((index + 1) & 4) > 0) ? 255 : 0; + + EXPECT_PIXEL_EQ(getWindowWidth() / 2, getWindowHeight() / 2, r, g, b, 255); + } + + void gapsTest() + { + glBindTexture(GL_TEXTURE_2D, mTextures[0]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, mTextures[0], 0); + + bool flags[8] = { false, true }; + + GLuint program; + setupMRTProgram(flags, &program); + + const GLenum bufs[] = + { + GL_NONE, + GL_COLOR_ATTACHMENT1 + }; + glUseProgram(program); + glDrawBuffersEXT(2, bufs); + glDrawArrays(GL_TRIANGLES, 0, 3); + + verifyAttachment(1, mTextures[0]); + + glDeleteProgram(program); + } + + void firstAndLastTest() + { + glBindTexture(GL_TEXTURE_2D, mTextures[0]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTextures[0], 0); + + glBindTexture(GL_TEXTURE_2D, mTextures[1]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, mTextures[1], 0); + + bool flags[8] = { true, false, false, true }; + + GLuint program; + setupMRTProgram(flags, &program); + + const GLenum bufs[] = + { + GL_COLOR_ATTACHMENT0, + GL_NONE, + GL_NONE, + GL_COLOR_ATTACHMENT3 + }; + + glUseProgram(program); + glDrawBuffersEXT(4, bufs); + glDrawArrays(GL_TRIANGLES, 0, 3); + + verifyAttachment(0, mTextures[0]); + verifyAttachment(3, mTextures[1]); + + EXPECT_GL_NO_ERROR(); + + glDeleteProgram(program); + } + + void firstHalfNULLTest() + { + bool flags[8] = { false }; + GLenum bufs[8] = { GL_NONE }; + + for (unsigned int texIndex = 0; texIndex < 4; texIndex++) + { + glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT4 + texIndex, GL_TEXTURE_2D, mTextures[texIndex], 0); + flags[texIndex + 4] = true; + bufs[texIndex + 4] = GL_COLOR_ATTACHMENT4 + texIndex; + } + + GLuint program; + setupMRTProgram(flags, &program); + + glUseProgram(program); + glDrawBuffersEXT(8, bufs); + glDrawArrays(GL_TRIANGLES, 0, 3); + + for (unsigned int texIndex = 0; texIndex < 4; texIndex++) + { + verifyAttachment(texIndex + 4, mTextures[texIndex]); + } + + EXPECT_GL_NO_ERROR(); + + glDeleteProgram(program); + } + + GLuint mFBO; + GLuint mTextures[4]; + GLuint mBuffer; +}; + +class DrawBuffersTestESSL3 : public DrawBuffersTest +{ + protected: + DrawBuffersTestESSL3() + : DrawBuffersTest(3) + {} +}; + +class DrawBuffersTestESSL1 : public DrawBuffersTest +{ + protected: + DrawBuffersTestESSL1() + : DrawBuffersTest(2) + {} +}; + +TEST_F(DrawBuffersTestESSL3, Gaps) +{ + gapsTest(); +} + +TEST_F(DrawBuffersTestESSL1, Gaps) +{ + gapsTest(); +} + +TEST_F(DrawBuffersTestESSL3, FirstAndLast) +{ + firstAndLastTest(); +} + +TEST_F(DrawBuffersTestESSL1, FirstAndLast) +{ + firstAndLastTest(); +} + +TEST_F(DrawBuffersTestESSL3, FirstHalfNULL) +{ + firstHalfNULLTest(); +} + +TEST_F(DrawBuffersTestESSL1, FirstHalfNULL) +{ + firstHalfNULLTest(); +} -- cgit v1.2.3