[GPU] Scale gradients by SSAA factor
This commit is contained in:
parent
e6fa0ad139
commit
c7fbe0e6d5
|
@ -17,6 +17,7 @@
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/base/string.h"
|
#include "xenia/base/string.h"
|
||||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -630,21 +631,44 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
if (instr.opcode == FetchOpcode::kGetTextureGradients) {
|
if (instr.opcode == FetchOpcode::kGetTextureGradients) {
|
||||||
// Handle before doing anything that actually needs the texture.
|
// Handle before doing anything that actually needs the texture.
|
||||||
bool grad_operand_temp_pushed = false;
|
bool grad_operand_temp_pushed = false;
|
||||||
DxbcSrc grad_operand =
|
DxbcSrc grad_operand = LoadOperand(
|
||||||
LoadOperand(instr.operands[0], 0b0011, grad_operand_temp_pushed);
|
instr.operands[0],
|
||||||
if (used_result_components & 0b0101) {
|
((used_result_nonzero_components & 0b0011) ? 0b0001 : 0) |
|
||||||
DxbcOpDerivRTXFine(
|
((used_result_nonzero_components & 0b1100) ? 0b0010 : 0),
|
||||||
DxbcDest::R(system_temp_result_, used_result_components & 0b0101),
|
grad_operand_temp_pushed);
|
||||||
grad_operand.SwizzleSwizzled(0b010000));
|
if (used_result_nonzero_components & 0b0101) {
|
||||||
|
DxbcOpDerivRTXCoarse(DxbcDest::R(system_temp_result_,
|
||||||
|
used_result_nonzero_components & 0b0101),
|
||||||
|
grad_operand.SwizzleSwizzled(0b010000));
|
||||||
}
|
}
|
||||||
if (used_result_components & 0b1010) {
|
if (used_result_nonzero_components & 0b1010) {
|
||||||
DxbcOpDerivRTYFine(
|
DxbcOpDerivRTYCoarse(DxbcDest::R(system_temp_result_,
|
||||||
DxbcDest::R(system_temp_result_, used_result_components & 0b1010),
|
used_result_nonzero_components & 0b1010),
|
||||||
grad_operand.SwizzleSwizzled(0b01000000));
|
grad_operand.SwizzleSwizzled(0b01000000));
|
||||||
}
|
}
|
||||||
if (grad_operand_temp_pushed) {
|
if (grad_operand_temp_pushed) {
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
if (!edram_rov_used_ && cvars::ssaa_scale_gradients) {
|
||||||
|
// Scale the gradients to guest pixels with SSAA.
|
||||||
|
uint32_t ssaa_scale_temp = PushSystemTemp();
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
DxbcOpMovC(DxbcDest::R(ssaa_scale_temp,
|
||||||
|
(used_result_nonzero_components & 0b0011) |
|
||||||
|
(used_result_nonzero_components >> 2)),
|
||||||
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
|
kSysConst_SampleCountLog2_Vec,
|
||||||
|
kSysConst_SampleCountLog2_Comp |
|
||||||
|
((kSysConst_SampleCountLog2_Comp + 1) << 2)),
|
||||||
|
DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f));
|
||||||
|
DxbcOpMul(
|
||||||
|
DxbcDest::R(system_temp_result_, used_result_nonzero_components),
|
||||||
|
DxbcSrc::R(system_temp_result_),
|
||||||
|
DxbcSrc::R(ssaa_scale_temp, 0b01000100));
|
||||||
|
// Release ssaa_scale_temp.
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
|
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1387,12 +1411,27 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
DxbcOpExp(lod_dest, lod_src);
|
DxbcOpExp(lod_dest, lod_src);
|
||||||
// FIXME(Triang3l): Gradient exponent adjustment is currently not done
|
// FIXME(Triang3l): Gradient exponent adjustment is currently not done
|
||||||
// in getCompTexLOD, so don't do it here too.
|
// in getCompTexLOD, so don't do it here too.
|
||||||
|
bool ssaa_scale_gradients =
|
||||||
|
!instr.attributes.use_register_gradients && !edram_rov_used_ &&
|
||||||
|
cvars::ssaa_scale_gradients;
|
||||||
#if 0
|
#if 0
|
||||||
// Extract gradient exponent biases from the fetch constant and merge
|
// Extract gradient exponent biases from the fetch constant and merge
|
||||||
// them with the LOD bias.
|
// them with the LOD bias.
|
||||||
DxbcOpIBFE(DxbcDest::R(grad_h_lod_temp, 0b0011), DxbcSrc::LU(5),
|
DxbcOpIBFE(DxbcDest::R(grad_h_lod_temp, 0b0011), DxbcSrc::LU(5),
|
||||||
DxbcSrc::LU(22, 27, 0, 0),
|
DxbcSrc::LU(22, 27, 0, 0),
|
||||||
RequestTextureFetchConstantWord(tfetch_index, 4));
|
RequestTextureFetchConstantWord(tfetch_index, 4));
|
||||||
|
if (ssaa_scale_gradients) {
|
||||||
|
// Adjust the gradient scales to include the SSAA scale.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
DxbcOpIAdd(DxbcDest::R(grad_h_lod_temp, 0b0011),
|
||||||
|
DxbcSrc::R(grad_h_lod_temp),
|
||||||
|
DxbcSrc::CB(
|
||||||
|
cbuffer_index_system_constants_,
|
||||||
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
|
kSysConst_SampleCountLog2_Vec,
|
||||||
|
kSysConst_SampleCountLog2_Comp |
|
||||||
|
((kSysConst_SampleCountLog2_Comp + 1) << 2)));
|
||||||
|
}
|
||||||
DxbcOpIMAd(DxbcDest::R(grad_h_lod_temp, 0b0011),
|
DxbcOpIMAd(DxbcDest::R(grad_h_lod_temp, 0b0011),
|
||||||
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::LI(int32_t(1) << 23),
|
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::LI(int32_t(1) << 23),
|
||||||
DxbcSrc::LF(1.0f));
|
DxbcSrc::LF(1.0f));
|
||||||
|
@ -1400,6 +1439,32 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kYYYY));
|
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kYYYY));
|
||||||
DxbcOpMul(lod_dest, lod_src,
|
DxbcOpMul(lod_dest, lod_src,
|
||||||
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kXXXX));
|
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kXXXX));
|
||||||
|
#else
|
||||||
|
if (ssaa_scale_gradients) {
|
||||||
|
// Adjust the gradient scales in each direction to include the SSAA
|
||||||
|
// scale - for ddy scale, grad_v_temp.w, not grad_h_lod_temp.w, must
|
||||||
|
// be used.
|
||||||
|
// ddy.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
DxbcOpMovC(DxbcDest::R(grad_v_temp, 0b1000),
|
||||||
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
|
kSysConst_SampleCountLog2_Vec)
|
||||||
|
.Select(kSysConst_SampleCountLog2_Comp + 1),
|
||||||
|
DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f));
|
||||||
|
DxbcOpMul(DxbcDest::R(grad_v_temp, 0b1000), lod_src,
|
||||||
|
DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW));
|
||||||
|
// ddx (after ddy handling, because the ddy code uses lod_src, and
|
||||||
|
// it's being overwritten now).
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
DxbcOpIf(true,
|
||||||
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
|
kSysConst_SampleCountLog2_Vec)
|
||||||
|
.Select(kSysConst_SampleCountLog2_Comp));
|
||||||
|
DxbcOpMul(lod_dest, lod_src, DxbcSrc::LF(2.0f));
|
||||||
|
DxbcOpEndIf();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
// Obtain the gradients and apply biases to them.
|
// Obtain the gradients and apply biases to them.
|
||||||
if (instr.attributes.use_register_gradients) {
|
if (instr.attributes.use_register_gradients) {
|
||||||
|
@ -1458,8 +1523,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
DxbcSrc::R(grad_v_temp),
|
DxbcSrc::R(grad_v_temp),
|
||||||
DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW));
|
DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW));
|
||||||
#else
|
#else
|
||||||
DxbcOpMul(DxbcDest::R(grad_v_temp, grad_mask),
|
// With SSAA gradient scaling, the scale is separate in each
|
||||||
DxbcSrc::R(grad_v_temp), lod_src);
|
// direction.
|
||||||
|
DxbcOpMul(
|
||||||
|
DxbcDest::R(grad_v_temp, grad_mask), DxbcSrc::R(grad_v_temp),
|
||||||
|
ssaa_scale_gradients ? DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW)
|
||||||
|
: lod_src);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (instr.dimension == xenos::FetchOpDimension::k1D) {
|
if (instr.dimension == xenos::FetchOpDimension::k1D) {
|
||||||
|
|
|
@ -40,6 +40,14 @@ DEFINE_bool(
|
||||||
"be fully covered when MSAA is used with fullscreen passes.",
|
"be fully covered when MSAA is used with fullscreen passes.",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
|
||||||
|
DEFINE_bool(
|
||||||
|
ssaa_scale_gradients, true,
|
||||||
|
"When using SSAA instead of native MSAA, adjust texture coordinate "
|
||||||
|
"derivatives used for mipmap selection, and getGradients results, to guest "
|
||||||
|
"pixels as if true MSAA rather than SSAA was used.\n"
|
||||||
|
"Reduces bandwidth usage of texture fetching.",
|
||||||
|
"GPU");
|
||||||
|
|
||||||
DEFINE_string(
|
DEFINE_string(
|
||||||
depth_float24_conversion, "",
|
depth_float24_conversion, "",
|
||||||
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
||||||
|
|
|
@ -22,6 +22,8 @@ DECLARE_bool(gpu_allow_invalid_fetch_constants);
|
||||||
|
|
||||||
DECLARE_bool(half_pixel_offset);
|
DECLARE_bool(half_pixel_offset);
|
||||||
|
|
||||||
|
DECLARE_bool(ssaa_scale_gradients);
|
||||||
|
|
||||||
DECLARE_string(depth_float24_conversion);
|
DECLARE_string(depth_float24_conversion);
|
||||||
|
|
||||||
DECLARE_int32(query_occlusion_fake_sample_count);
|
DECLARE_int32(query_occlusion_fake_sample_count);
|
||||||
|
|
|
@ -551,6 +551,12 @@ enum class FetchOpcode : uint32_t {
|
||||||
kGetTextureComputedLod = 17,
|
kGetTextureComputedLod = 17,
|
||||||
|
|
||||||
// Source is 2-component. XZ = ddx(source.xy), YW = ddy(source.xy).
|
// Source is 2-component. XZ = ddx(source.xy), YW = ddy(source.xy).
|
||||||
|
// TODO(Triang3l): Verify whether it's coarse or fine (on Adreno 200, for
|
||||||
|
// instance). This is using the texture unit, where the LOD is computed for
|
||||||
|
// the whole quad (according to the Direct3D 11.3 specification), so likely
|
||||||
|
// coarse; ddx / ddy from the Shader Model 4 era is also compiled by FXC to
|
||||||
|
// deriv_rtx/rty_coarse when targeting Shader Model 5, and on TeraScale,
|
||||||
|
// coarse / fine selection only appeared on Direct3D 11 GPUs.
|
||||||
kGetTextureGradients = 18,
|
kGetTextureGradients = 18,
|
||||||
|
|
||||||
// Gets the weights used in a bilinear fetch.
|
// Gets the weights used in a bilinear fetch.
|
||||||
|
|
Loading…
Reference in New Issue