[GPU] Scale gradients by SSAA factor
This commit is contained in:
parent
e6fa0ad139
commit
c7fbe0e6d5
|
@ -17,6 +17,7 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -630,21 +631,44 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
if (instr.opcode == FetchOpcode::kGetTextureGradients) {
|
||||
// Handle before doing anything that actually needs the texture.
|
||||
bool grad_operand_temp_pushed = false;
|
||||
DxbcSrc grad_operand =
|
||||
LoadOperand(instr.operands[0], 0b0011, grad_operand_temp_pushed);
|
||||
if (used_result_components & 0b0101) {
|
||||
DxbcOpDerivRTXFine(
|
||||
DxbcDest::R(system_temp_result_, used_result_components & 0b0101),
|
||||
grad_operand.SwizzleSwizzled(0b010000));
|
||||
DxbcSrc grad_operand = LoadOperand(
|
||||
instr.operands[0],
|
||||
((used_result_nonzero_components & 0b0011) ? 0b0001 : 0) |
|
||||
((used_result_nonzero_components & 0b1100) ? 0b0010 : 0),
|
||||
grad_operand_temp_pushed);
|
||||
if (used_result_nonzero_components & 0b0101) {
|
||||
DxbcOpDerivRTXCoarse(DxbcDest::R(system_temp_result_,
|
||||
used_result_nonzero_components & 0b0101),
|
||||
grad_operand.SwizzleSwizzled(0b010000));
|
||||
}
|
||||
if (used_result_components & 0b1010) {
|
||||
DxbcOpDerivRTYFine(
|
||||
DxbcDest::R(system_temp_result_, used_result_components & 0b1010),
|
||||
grad_operand.SwizzleSwizzled(0b01000000));
|
||||
if (used_result_nonzero_components & 0b1010) {
|
||||
DxbcOpDerivRTYCoarse(DxbcDest::R(system_temp_result_,
|
||||
used_result_nonzero_components & 0b1010),
|
||||
grad_operand.SwizzleSwizzled(0b01000000));
|
||||
}
|
||||
if (grad_operand_temp_pushed) {
|
||||
PopSystemTemp();
|
||||
}
|
||||
if (!edram_rov_used_ && cvars::ssaa_scale_gradients) {
|
||||
// Scale the gradients to guest pixels with SSAA.
|
||||
uint32_t ssaa_scale_temp = PushSystemTemp();
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
DxbcOpMovC(DxbcDest::R(ssaa_scale_temp,
|
||||
(used_result_nonzero_components & 0b0011) |
|
||||
(used_result_nonzero_components >> 2)),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_SampleCountLog2_Vec,
|
||||
kSysConst_SampleCountLog2_Comp |
|
||||
((kSysConst_SampleCountLog2_Comp + 1) << 2)),
|
||||
DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f));
|
||||
DxbcOpMul(
|
||||
DxbcDest::R(system_temp_result_, used_result_nonzero_components),
|
||||
DxbcSrc::R(system_temp_result_),
|
||||
DxbcSrc::R(ssaa_scale_temp, 0b01000100));
|
||||
// Release ssaa_scale_temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
|
||||
return;
|
||||
}
|
||||
|
@ -1387,12 +1411,27 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
DxbcOpExp(lod_dest, lod_src);
|
||||
// FIXME(Triang3l): Gradient exponent adjustment is currently not done
|
||||
// in getCompTexLOD, so don't do it here too.
|
||||
bool ssaa_scale_gradients =
|
||||
!instr.attributes.use_register_gradients && !edram_rov_used_ &&
|
||||
cvars::ssaa_scale_gradients;
|
||||
#if 0
|
||||
// Extract gradient exponent biases from the fetch constant and merge
|
||||
// them with the LOD bias.
|
||||
DxbcOpIBFE(DxbcDest::R(grad_h_lod_temp, 0b0011), DxbcSrc::LU(5),
|
||||
DxbcSrc::LU(22, 27, 0, 0),
|
||||
RequestTextureFetchConstantWord(tfetch_index, 4));
|
||||
if (ssaa_scale_gradients) {
|
||||
// Adjust the gradient scales to include the SSAA scale.
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
DxbcOpIAdd(DxbcDest::R(grad_h_lod_temp, 0b0011),
|
||||
DxbcSrc::R(grad_h_lod_temp),
|
||||
DxbcSrc::CB(
|
||||
cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_SampleCountLog2_Vec,
|
||||
kSysConst_SampleCountLog2_Comp |
|
||||
((kSysConst_SampleCountLog2_Comp + 1) << 2)));
|
||||
}
|
||||
DxbcOpIMAd(DxbcDest::R(grad_h_lod_temp, 0b0011),
|
||||
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::LI(int32_t(1) << 23),
|
||||
DxbcSrc::LF(1.0f));
|
||||
|
@ -1400,6 +1439,32 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpMul(lod_dest, lod_src,
|
||||
DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kXXXX));
|
||||
#else
|
||||
if (ssaa_scale_gradients) {
|
||||
// Adjust the gradient scales in each direction to include the SSAA
|
||||
// scale - for ddy scale, grad_v_temp.w, not grad_h_lod_temp.w, must
|
||||
// be used.
|
||||
// ddy.
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
DxbcOpMovC(DxbcDest::R(grad_v_temp, 0b1000),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_SampleCountLog2_Vec)
|
||||
.Select(kSysConst_SampleCountLog2_Comp + 1),
|
||||
DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f));
|
||||
DxbcOpMul(DxbcDest::R(grad_v_temp, 0b1000), lod_src,
|
||||
DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW));
|
||||
// ddx (after ddy handling, because the ddy code uses lod_src, and
|
||||
// it's being overwritten now).
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
DxbcOpIf(true,
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_SampleCountLog2_Vec)
|
||||
.Select(kSysConst_SampleCountLog2_Comp));
|
||||
DxbcOpMul(lod_dest, lod_src, DxbcSrc::LF(2.0f));
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
#endif
|
||||
// Obtain the gradients and apply biases to them.
|
||||
if (instr.attributes.use_register_gradients) {
|
||||
|
@ -1458,8 +1523,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
DxbcSrc::R(grad_v_temp),
|
||||
DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW));
|
||||
#else
|
||||
DxbcOpMul(DxbcDest::R(grad_v_temp, grad_mask),
|
||||
DxbcSrc::R(grad_v_temp), lod_src);
|
||||
// With SSAA gradient scaling, the scale is separate in each
|
||||
// direction.
|
||||
DxbcOpMul(
|
||||
DxbcDest::R(grad_v_temp, grad_mask), DxbcSrc::R(grad_v_temp),
|
||||
ssaa_scale_gradients ? DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW)
|
||||
: lod_src);
|
||||
#endif
|
||||
}
|
||||
if (instr.dimension == xenos::FetchOpDimension::k1D) {
|
||||
|
|
|
@ -40,6 +40,14 @@ DEFINE_bool(
|
|||
"be fully covered when MSAA is used with fullscreen passes.",
|
||||
"GPU");
|
||||
|
||||
DEFINE_bool(
|
||||
ssaa_scale_gradients, true,
|
||||
"When using SSAA instead of native MSAA, adjust texture coordinate "
|
||||
"derivatives used for mipmap selection, and getGradients results, to guest "
|
||||
"pixels as if true MSAA rather than SSAA was used.\n"
|
||||
"Reduces bandwidth usage of texture fetching.",
|
||||
"GPU");
|
||||
|
||||
DEFINE_string(
|
||||
depth_float24_conversion, "",
|
||||
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
||||
|
|
|
@ -22,6 +22,8 @@ DECLARE_bool(gpu_allow_invalid_fetch_constants);
|
|||
|
||||
DECLARE_bool(half_pixel_offset);
|
||||
|
||||
DECLARE_bool(ssaa_scale_gradients);
|
||||
|
||||
DECLARE_string(depth_float24_conversion);
|
||||
|
||||
DECLARE_int32(query_occlusion_fake_sample_count);
|
||||
|
|
|
@ -551,6 +551,12 @@ enum class FetchOpcode : uint32_t {
|
|||
kGetTextureComputedLod = 17,
|
||||
|
||||
// Source is 2-component. XZ = ddx(source.xy), YW = ddy(source.xy).
|
||||
// TODO(Triang3l): Verify whether it's coarse or fine (on Adreno 200, for
|
||||
// instance). This is using the texture unit, where the LOD is computed for
|
||||
// the whole quad (according to the Direct3D 11.3 specification), so likely
|
||||
// coarse; ddx / ddy from the Shader Model 4 era is also compiled by FXC to
|
||||
// deriv_rtx/rty_coarse when targeting Shader Model 5, and on TeraScale,
|
||||
// coarse / fine selection only appeared on Direct3D 11 GPUs.
|
||||
kGetTextureGradients = 18,
|
||||
|
||||
// Gets the weights used in a bilinear fetch.
|
||||
|
|
Loading…
Reference in New Issue