Merge pull request #2969 from Sonicadvance1/qualcomm_hack_removal

Remove all of our workarounds for Qualcomm devices we don't support anymore
This commit is contained in:
Markus Wick 2015-09-05 10:10:19 +02:00
commit 0f3263ac63
9 changed files with 38 additions and 314 deletions

View File

@ -41,24 +41,11 @@ OpenGLPostProcessing::OpenGLPostProcessing()
: m_initialized(false) : m_initialized(false)
{ {
CreateHeader(); CreateHeader();
m_attribute_workaround = DriverDetails::HasBug(DriverDetails::BUG_BROKENATTRIBUTELESS);
if (m_attribute_workaround)
{
glGenBuffers(1, &m_attribute_vbo);
glGenVertexArrays(1, &m_attribute_vao);
}
} }
OpenGLPostProcessing::~OpenGLPostProcessing() OpenGLPostProcessing::~OpenGLPostProcessing()
{ {
m_shader.Destroy(); m_shader.Destroy();
if (m_attribute_workaround)
{
glDeleteBuffers(1, &m_attribute_vbo);
glDeleteVertexArrays(1, &m_attribute_vao);
}
} }
void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle dst, void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle dst,
@ -70,9 +57,6 @@ void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle
glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight()); glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight());
if (m_attribute_workaround)
glBindVertexArray(m_attribute_vao);
else
OpenGL_BindAttributelessVAO(); OpenGL_BindAttributelessVAO();
m_shader.Bind(); m_shader.Bind();
@ -174,9 +158,6 @@ void OpenGLPostProcessing::ApplyShader()
const char* vertex_shader = s_vertex_shader; const char* vertex_shader = s_vertex_shader;
if (m_attribute_workaround)
vertex_shader = s_vertex_workaround_shader;
// and compile it // and compile it
if (!ProgramShaderCache::CompileShader(m_shader, vertex_shader, code.c_str())) if (!ProgramShaderCache::CompileShader(m_shader, vertex_shader, code.c_str()))
{ {
@ -192,23 +173,6 @@ void OpenGLPostProcessing::ApplyShader()
m_uniform_src_rect = glGetUniformLocation(m_shader.glprogid, "src_rect"); m_uniform_src_rect = glGetUniformLocation(m_shader.glprogid, "src_rect");
m_uniform_layer = glGetUniformLocation(m_shader.glprogid, "layer"); m_uniform_layer = glGetUniformLocation(m_shader.glprogid, "layer");
if (m_attribute_workaround)
{
GLfloat vertices[] = {
-1.f, -1.f, 0.f, 0.f,
1.f, -1.f, 1.f, 0.f,
-1.f, 1.f, 0.f, 1.f,
1.f, 1.f, 1.f, 1.f,
};
glBindBuffer(GL_ARRAY_BUFFER, m_attribute_vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glBindVertexArray(m_attribute_vao);
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 4, GL_FLOAT, 0, 0, nullptr);
}
for (const auto& it : m_config.GetOptions()) for (const auto& it : m_config.GetOptions())
{ {
std::string glsl_name = "option_" + it.first; std::string glsl_name = "option_" + it.first;

View File

@ -35,11 +35,6 @@ private:
GLuint m_uniform_layer; GLuint m_uniform_layer;
std::string m_glsl_header; std::string m_glsl_header;
// These are only used when working around Qualcomm's broken attributeless rendering
GLuint m_attribute_vao;
GLuint m_attribute_vbo;
bool m_attribute_workaround = false;
std::unordered_map<std::string, GLuint> m_uniform_bindings; std::unordered_map<std::string, GLuint> m_uniform_bindings;
void CreateHeader(); void CreateHeader();

View File

@ -583,10 +583,6 @@ void ProgramShaderCache::CreateHeader()
"#define frac fract\n" "#define frac fract\n"
"#define lerp mix\n" "#define lerp mix\n"
// Terrible hacks, look at DriverDetails.h
"%s\n" // replace textureSize as constant
"%s\n" // wipe out all centroid usages
, GetGLSLVersionString().c_str() , GetGLSLVersionString().c_str()
, v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "" , v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
, !is_glsles && g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : "" , !is_glsles && g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : ""
@ -606,9 +602,6 @@ void ProgramShaderCache::CreateHeader()
, is_glsles ? "precision highp int;" : "" , is_glsles ? "precision highp int;" : ""
, is_glsles ? "precision highp sampler2DArray;" : "" , is_glsles ? "precision highp sampler2DArray;" : ""
, (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : "" , (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : ""
, DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : ""
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "#define centroid" : ""
); );
} }

View File

@ -1705,8 +1705,6 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
} }
// --------------------------------------------------------------------- // ---------------------------------------------------------------------
if (!DriverDetails::HasBug(DriverDetails::BUG_BROKENSWAP))
{
glEnable(GL_BLEND); glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
@ -1719,16 +1717,13 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
// Do our OSD callbacks // Do our OSD callbacks
OSD::DoCallbacks(OSD::OSD_ONFRAME); OSD::DoCallbacks(OSD::OSD_ONFRAME);
OSD::DrawMessages(); OSD::DrawMessages();
}
// Copy the rendered frame to the real window // Copy the rendered frame to the real window
GLInterface->Swap(); GLInterface->Swap();
// Clear framebuffer // Clear framebuffer
if (!DriverDetails::HasBug(DriverDetails::BUG_BROKENSWAP))
{
glClearColor(0, 0, 0, 0); glClearColor(0, 0, 0, 0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
}
if (s_vsync != g_ActiveConfig.IsVSync()) if (s_vsync != g_ActiveConfig.IsVSync())
{ {

View File

@ -42,16 +42,8 @@ namespace DriverDetails
// This is a list of all known bugs for each vendor // This is a list of all known bugs for each vendor
// We use this to check if the device and driver has a issue // We use this to check if the device and driver has a issue
static BugInfo m_known_bugs[] = { static BugInfo m_known_bugs[] = {
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_NODYNUBOACCESS, 14.0, 94.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENCENTROID, 14.0, 46.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENINFOLOG, -1.0, 46.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_ANNIHILATEDUBOS, 41.0, 46.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENSWAP, -1.0, 46.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true}, {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENTEXTURESIZE, -1.0, 65.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENATTRIBUTELESS, -1.0, 94.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENNEGATEDBOOLEAN,-1.0, -1.0, true}, {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENNEGATEDBOOLEAN,-1.0, -1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENIVECSHIFTS, -1.0, 46.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENGLES31, -1.0, -1.0, true}, {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENGLES31, -1.0, -1.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true}, {OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENVSYNC, -1.0, -1.0, true}, {OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENVSYNC, -1.0, -1.0, true},

View File

@ -58,32 +58,6 @@ namespace DriverDetails
// This'll ensure we know exactly what the issue is. // This'll ensure we know exactly what the issue is.
enum Bug enum Bug
{ {
// Bug: No Dynamic UBO array object access
// Affected Devices: Qualcomm/Adreno
// Started Version: 14
// Ended Version: 95
// Accessing UBO array members dynamically causes the Adreno shader compiler to crash
// Errors out with "Internal Error"
// With v53 video drivers, dynamic member access "works." It works to the extent that it doesn't crash.
// With v95 drivers everything works as it should.
BUG_NODYNUBOACCESS = 0,
// Bug: Centroid is broken in shaders
// Affected devices: Qualcomm/Adreno
// Started Version: 14
// Ended Version: 53
// Centroid in/out, used in the shaders, is used for multisample buffers to get the texel correctly
// When MSAA is disabled, it acts like a regular in/out
// Tends to cause the driver to render full white or black
BUG_BROKENCENTROID,
// Bug: INFO_LOG_LENGTH broken
// Affected devices: Qualcomm/Adreno
// Started Version: ? (Noticed on v14)
// Ended Version: 53
// When compiling a shader, it is important that when it fails,
// you first get the length of the information log prior to grabbing it.
// This allows you to allocate an array to store all of the log
// Adreno devices /always/ return 0 when querying GL_INFO_LOG_LENGTH
// They also max out at 1024 bytes(1023 characters + null terminator) for the log
BUG_BROKENINFOLOG, BUG_BROKENINFOLOG,
// Bug: UBO buffer offset broken // Bug: UBO buffer offset broken
// Affected devices: all mesa drivers // Affected devices: all mesa drivers
@ -104,22 +78,6 @@ namespace DriverDetails
// Please see issue #6105 on Google Code. Let's hope buffer storage solves this issues. // Please see issue #6105 on Google Code. Let's hope buffer storage solves this issues.
// TODO: Detect broken drivers. // TODO: Detect broken drivers.
BUG_BROKENPINNEDMEMORY, BUG_BROKENPINNEDMEMORY,
// Bug: Entirely broken UBOs
// Affected devices: Qualcomm/Adreno
// Started Version: ? (Noticed on v45)
// Ended Version: 53
// Uniform buffers are entirely broken on Qualcomm drivers with v45
// Trying to use the uniform buffers causes a malloc to fail inside the driver
// To be safe, blanket drivers from v41 - v45
BUG_ANNIHILATEDUBOS,
// Bug : Can't draw on screen text and clear correctly.
// Affected devices: Qualcomm/Adreno
// Started Version: ?
// Ended Version: 53
// Current code for drawing on screen text and clearing the framebuffer doesn't work on Adreno
// Drawing on screen text causes the whole screen to swizzle in a terrible fashion
// Clearing the framebuffer causes one to never see a frame.
BUG_BROKENSWAP,
// Bug: glBufferSubData/glMapBufferRange stalls + OOM // Bug: glBufferSubData/glMapBufferRange stalls + OOM
// Affected devices: Adreno a3xx/Mali-t6xx // Affected devices: Adreno a3xx/Mali-t6xx
// Started Version: -1 // Started Version: -1
@ -128,12 +86,6 @@ namespace DriverDetails
// The driver stalls in each instance no matter what you do // The driver stalls in each instance no matter what you do
// Apparently Mali and Adreno share code in this regard since it was wrote by the same person. // Apparently Mali and Adreno share code in this regard since it was wrote by the same person.
BUG_BROKENBUFFERSTREAM, BUG_BROKENBUFFERSTREAM,
// Bug: GLSL ES 3.0 textureSize causes abort
// Affected devices: Adreno a3xx
// Started Version: -1 (Noticed in v53)
// Ended Version: 66
// If a shader includes a textureSize function call then the shader compiler will call abort()
BUG_BROKENTEXTURESIZE,
// Bug: ARB_buffer_storage doesn't work with ARRAY_BUFFER type streams // Bug: ARB_buffer_storage doesn't work with ARRAY_BUFFER type streams
// Affected devices: GeForce 4xx+ // Affected devices: GeForce 4xx+
// Started Version: -1 // Started Version: -1
@ -169,14 +121,6 @@ namespace DriverDetails
// It works for all the buffer types we use except GL_ELEMENT_ARRAY_BUFFER. // It works for all the buffer types we use except GL_ELEMENT_ARRAY_BUFFER.
// Causes complete blackscreen issues. // Causes complete blackscreen issues.
BUG_INTELBROKENBUFFERSTORAGE, BUG_INTELBROKENBUFFERSTORAGE,
// Bug: Qualcomm has broken attributeless rendering
// Affected devices: Adreno
// Started Version: -1
// Ended Version: v66 (07-09-2014 dev version), v95 shipping
// Qualcomm has had attributeless rendering broken forever
// This was fixed in a v66 development version, the first shipping driver version with the release was v95.
// To be safe, make v95 the minimum version to work around this issue
BUG_BROKENATTRIBUTELESS,
// Bug: Qualcomm has broken boolean negation // Bug: Qualcomm has broken boolean negation
// Affected devices: Adreno // Affected devices: Adreno
// Started Version: -1 // Started Version: -1
@ -202,39 +146,6 @@ namespace DriverDetails
// if (cond == false) // if (cond == false)
BUG_BROKENNEGATEDBOOLEAN, BUG_BROKENNEGATEDBOOLEAN,
// Bug: Qualcomm has broken ivec to scalar and ivec to ivec bitshifts
// Affected devices: Adreno
// Started Version: -1
// Ended Version: 46 (TODO: Test more devices, the real end is currently unknown)
// Qualcomm has broken integer vector to integer bitshifts, and integer vector to integer vector bitshifts
// A compilation error is generated when trying to compile the shaders.
//
// For example:
// Broken on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ab <<= 2;
//
// Working on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ab.x <<= 2;
// ab.y <<= 2;
// ab.z <<= 2;
// ab.w <<= 2;
//
// Broken on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ivec4 cd = ivec4(1,2,3,4);
// ab <<= cd;
//
// Working on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ivec4 cd = ivec4(1,2,3,4);
// ab.x <<= cd.x;
// ab.y <<= cd.y;
// ab.z <<= cd.z;
// ab.w <<= cd.w;
BUG_BROKENIVECSHIFTS,
// Bug: glCopyImageSubData doesn't work on i965 // Bug: glCopyImageSubData doesn't work on i965
// Started Version: -1 // Started Version: -1
// Ended Version: 10.6.4 // Ended Version: 10.6.4

View File

@ -253,9 +253,6 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
} }
} }
object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("lacc = clamp(lacc, 0, 255);\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
object.Write("%s%d = float4(irshift((mat * (lacc + irshift(lacc, 7))), 8)) / 255.0;\n", dest, j);
else
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}\n"); object.Write("}\n");
} }

View File

@ -215,30 +215,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
"int3 itrunc(float3 x) { return int3(trunc(x)); }\n" "int3 itrunc(float3 x) { return int3(trunc(x)); }\n"
"int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n"); "int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Add functions to do shifts on scalars and ivecs.
// These functions all have the same name to enable them to be used no matter what code is generated.
// For example: tev color op code uses .rgb as a swizzle, but alpha code only uses .a.
out.Write("int ilshift(int a, int b) { return a << b; }\n"
"int irshift(int a, int b) { return a >> b; }\n"
"int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
"int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
"int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
"int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"
"int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
"int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
"int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
"int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"
"int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
"int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
"int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
"int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
}
if (ApiType == API_OPENGL) if (ApiType == API_OPENGL)
{ {
// Declare samplers // Declare samplers
@ -518,10 +494,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (texcoord < numTexgen) if (texcoord < numTexgen)
{ {
out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\ttempcoord = irshift(fixpoint_uv%d, " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
else
out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
} }
else else
@ -748,65 +720,34 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
int mtxidx = 2*(bpmem.tevind[n].mid-1); int mtxidx = 2*(bpmem.tevind[n].mid-1);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)), 3);\n", n, mtxidx, n, mtxidx+1, n);
// TODO: should use a shader uid branch for this for better performance
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
// TODO: should use a shader uid branch for this for better performance // TODO: should use a shader uid branch for this for better performance
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
} }
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix { // s matrix
_assert_(bpmem.tevind[n].mid >= 5); _assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5); int mtxidx = 2*(bpmem.tevind[n].mid-5);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.xx), 8);\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
} }
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix { // t matrix
_assert_(bpmem.tevind[n].mid >= 9); _assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9); int mtxidx = 2*(bpmem.tevind[n].mid-9);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.yy), 8);\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
} }
}
else else
{ {
out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n); out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n);
@ -844,9 +785,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
// Emulate s24 overflows // Emulate s24 overflows
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\ttevcoord.xy = irshift(ilshift(tevcoord.xy, 8), 8);\n");
else
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
} }
@ -1054,30 +992,6 @@ static inline void WriteTevRegular(T& out, const char* components, int bias, int
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
// - a rounding bias is added before dividing by 256 // - a rounding bias is added before dividing by 256
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Haxx - cleaner code by not having irshift and ilshift in the emitted code by omitting them if not used.
const char* leftShift = tevScaleTableLeft[shift];
const char* rightShift = tevScaleTableRight[shift];
if (rightShift[0])
out.Write("irshift(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
else
out.Write("((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
out.Write(" %s ", tevOpTable[op]);
if (leftShift[0])
out.Write("irshift((ilshift((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7))), %s)%s), 8)",
components, components, components, components, components,
leftShift+4, tevLerpBias[2*op+(shift!=3)]);
else
out.Write("irshift(((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7)))%s), 8)",
components, components, components, components, components, tevLerpBias[2*op+(shift!=3)]);
if (rightShift[0])
out.Write(", %s)", rightShift+4);
}
else
{
out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
out.Write(" %s ", tevOpTable[op]); out.Write(" %s ", tevOpTable[op]);
out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)", out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)",
@ -1085,7 +999,6 @@ static inline void WriteTevRegular(T& out, const char* components, int bias, int
tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]); tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]);
out.Write(")%s", tevScaleTableRight[shift]); out.Write(")%s", tevScaleTableRight[shift]);
} }
}
template<class T> template<class T>
static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
@ -1247,9 +1160,6 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data)
} }
out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tint ifog = iround(fog * 256.0);\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\tprev.rgb = irshift((prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog), 8);\n");
else
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
} }

View File

@ -32,29 +32,6 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Add functions to do shifts on scalars and ivecs.
// This is included in the vertex shader for lighting shader generation.
out.Write("int ilshift(int a, int b) { return a << b; }\n"
"int irshift(int a, int b) { return a >> b; }\n"
"int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
"int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
"int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
"int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"
"int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
"int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
"int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
"int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"
"int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
"int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
"int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
"int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
}
out.Write("%s", s_lighting_struct); out.Write("%s", s_lighting_struct);
// uniforms // uniforms
@ -155,15 +132,6 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
// transforms // transforms
if (components & VB_HAS_POSMTXIDX) if (components & VB_HAS_POSMTXIDX)
{
if (is_writing_shadercode && (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS) && !DriverDetails::HasBug(DriverDetails::BUG_ANNIHILATEDUBOS)))
{
// This'll cause issues, but it can't be helped
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL)
out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n");
}
else
{ {
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");
@ -172,7 +140,6 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
} }
}
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");