From 0622979d3b1761cfec6108ddb4b77d67518986f3 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 3 Sep 2017 16:32:37 +1000 Subject: [PATCH] ShaderGen: Support writing integer colors when logic op is enabled This is required for D3D to support logic op. --- Source/Core/VideoCommon/BPMemory.cpp | 13 +++++++ Source/Core/VideoCommon/BPMemory.h | 2 + Source/Core/VideoCommon/PixelShaderGen.cpp | 26 ++++++++++--- Source/Core/VideoCommon/PixelShaderGen.h | 3 +- Source/Core/VideoCommon/UberShaderPixel.cpp | 42 ++++++++++++++------- Source/Core/VideoCommon/UberShaderPixel.h | 1 + 6 files changed, 68 insertions(+), 19 deletions(-) diff --git a/Source/Core/VideoCommon/BPMemory.cpp b/Source/Core/VideoCommon/BPMemory.cpp index 26891ba941..8c0a2ec43a 100644 --- a/Source/Core/VideoCommon/BPMemory.cpp +++ b/Source/Core/VideoCommon/BPMemory.cpp @@ -10,6 +10,19 @@ // STATE_TO_SAVE BPMemory bpmem; +bool BlendMode::UseLogicOp() const +{ + // Logicop bit has lowest priority. + if (subtract || blendenable || !logicopenable) + return false; + + // Fast path for Kirby's Return to Dreamland, they use it with dstAlpha. + if (logicmode == BlendMode::NOOP) + return false; + + return true; +} + float FogParam0::GetA() const { // scale mantissa from 11 to 23 bits diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index e2730c1472..fce322c4fd 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -648,6 +648,8 @@ union BlendMode BitField<12, 4, LogicOp> logicmode; u32 hex; + + bool UseLogicOp() const; }; union FogParam0 diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index d503b21176..5e561456ab 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -175,6 +175,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; uid_data->dither = bpmem.blendmode.dither && uid_data->rgba6_format; + uid_data->uint_output = bpmem.blendmode.UseLogicOp(); u32 numStages = uid_data->genMode_numtevstages + 1; @@ -434,7 +435,7 @@ static void SampleTexture(ShaderCode& out, const char* texcoords, const char* te static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType ApiType, bool per_pixel_depth, bool use_dual_source); static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); -static void WriteColor(ShaderCode& out, const pixel_shader_uid_data* uid_data, +static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source); ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config, @@ -568,8 +569,12 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host else // D3D { out.Write("void main(\n"); - out.Write(" out float4 ocol0 : SV_Target0,\n" - " out float4 ocol1 : SV_Target1,\n%s" + if (uid_data->uint_output) + out.Write(" out uint4 ocol0 : SV_Target,\n"); + else + out.Write(" out float4 ocol0 : SV_Target0,\n" + " out float4 ocol1 : SV_Target1,\n"); + out.Write("%s" " in float4 rawpos : SV_Position,\n", uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : ""); @@ -778,7 +783,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host WriteFog(out, uid_data); // Write the color and alpha values to the framebuffer - WriteColor(out, uid_data, use_dual_source); + WriteColor(out, ApiType, uid_data, use_dual_source); if (uid_data->bounding_box) { @@ -1302,8 +1307,19 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } -static void WriteColor(ShaderCode& out, const pixel_shader_uid_data* uid_data, bool use_dual_source) +static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, + bool use_dual_source) { + // D3D requires that the shader outputs be uint when writing to a uint render target for logic op. + if (api_type == APIType::D3D && uid_data->uint_output) + { + if (uid_data->rgba6_format) + out.Write("\tocol0 = uint4(prev & 0xFC);\n"); + else + out.Write("\tocol0 = uint4(prev);\n"); + return; + } + if (uid_data->rgba6_format) out.Write("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n"); else diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index ee422bee8d..253dc59e22 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -43,7 +43,8 @@ struct pixel_shader_uid_data u32 numColorChans : 2; u32 rgba6_format : 1; u32 dither : 1; - u32 pad : 16; + u32 uint_output : 1; + u32 pad : 15; u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 9b4d8d2251..8f6521e890 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -25,6 +25,7 @@ PixelShaderUid GetPixelShaderUid() (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) || (bpmem.zmode.testenable && bpmem.genMode.zfreeze); + uid_data->uint_output = bpmem.blendmode.UseLogicOp(); return out; } @@ -1164,18 +1165,29 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " }\n" "\n"); - // TODO: Do we still want to support two pass alpha blending? - out.Write(" if (bpmem_rgba6_format)\n" - " ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n" - " else\n" - " ocol0.rgb = float3(TevResult.rgb) / 255.0;\n" - "\n" - " if (bpmem_dstalpha != 0u)\n"); - out.Write(" ocol0.a = float(%s >> 2) / 63.0;\n", - BitfieldExtract("bpmem_dstalpha", ConstantAlpha().alpha).c_str()); - out.Write(" else\n" - " ocol0.a = float(TevResult.a >> 2) / 63.0;\n" - " \n"); + // D3D requires that the shader outputs be uint when writing to a uint render target for logic op. + if (ApiType == APIType::D3D && uid_data->uint_output) + { + out.Write(" if (bpmem_rgba6_format)\n" + " ocol0 = uint4(TevResult & 0xFC);\n" + " else\n" + " ocol0 = uint4(TevResult);\n" + "\n"); + } + else + { + out.Write(" if (bpmem_rgba6_format)\n" + " ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n" + " else\n" + " ocol0.rgb = float3(TevResult.rgb) / 255.0;\n" + "\n" + " if (bpmem_dstalpha != 0u)\n"); + out.Write(" ocol0.a = float(%s >> 2) / 63.0;\n", + BitfieldExtract("bpmem_dstalpha", ConstantAlpha().alpha).c_str()); + out.Write(" else\n" + " ocol0.a = float(TevResult.a >> 2) / 63.0;\n" + " \n"); + } if (use_dual_source) { @@ -1260,7 +1272,11 @@ void EnumeratePixelShaderUids(const std::function& continue; puid->per_pixel_depth = per_pixel_depth != 0; - callback(uid); + for (u32 uint_output = 0; uint_output < 2; uint_output++) + { + puid->uint_output = uint_output; + callback(uid); + } } } } diff --git a/Source/Core/VideoCommon/UberShaderPixel.h b/Source/Core/VideoCommon/UberShaderPixel.h index d7dc8109e8..3a5a8f8435 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.h +++ b/Source/Core/VideoCommon/UberShaderPixel.h @@ -15,6 +15,7 @@ struct pixel_ubershader_uid_data u32 num_texgens : 4; u32 early_depth : 1; u32 per_pixel_depth : 1; + u32 uint_output : 1; u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); } };