[D3D12] DXBC: Fix two uninitialized register usages in ROV code, better comments

This commit is contained in:
Triang3l 2020-02-13 08:31:01 +03:00
parent cc47704a6b
commit b59ae30ec3
3 changed files with 24 additions and 9 deletions

View File

@ -1333,14 +1333,16 @@ void DxbcShaderTranslator::StartTranslation() {
system_temp_position_ = PushSystemTemp(0b1111);
} else if (IsDxbcPixelShader()) {
if (edram_rov_used_) {
// Will be initialized unconditionally.
system_temp_rov_params_ = PushSystemTemp();
// If the shader doesn't write to depth, StartPixelShader will load the
// depth/stencil for early test, so no need to initialize. If it does,
// initialize it to something consistent - depth must be written on every
// shader execution path (at least in PC ps_3_0 and later shader models)
// and to make compilation easier.
// If the shader doesn't write to oDepth, each component will be written
// to if depth/stencil is enabled and the respective sample is covered -
// so need to initialize now because the first writes will be conditional.
// If the shader writes to oDepth, this is oDepth of the shader, written
// by the guest code, so initialize because assumptions can't be made
// about the integrity of the guest code.
system_temp_rov_depth_stencil_ =
PushSystemTemp(writes_depth() ? 0b0001 : 0);
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
}
for (uint32_t i = 0; i < 4; ++i) {
if (writes_color_target(i)) {
@ -1377,7 +1379,10 @@ void DxbcShaderTranslator::StartTranslation() {
}
}
// Allocate system temporary variables for the translated code.
// Allocate system temporary variables for the translated code. Since access
// depends on the guest code (thus no guarantees), initialize everything
// now (except for pv, it's an internal temporary variable, not accessible
// by the guest).
system_temp_pv_ = PushSystemTemp();
system_temp_ps_pc_p0_a0_ = PushSystemTemp(0b1111);
system_temp_aL_ = PushSystemTemp(0b1111);

View File

@ -49,6 +49,10 @@ namespace gpu {
// optimization possibilities as this may result in false dependencies. Always
// mov l(0, 0, 0, 0) to such components before potential branching -
// PushSystemTemp accepts a zero mask for this purpose.
//
// Clamping must be done first to the lower bound (using max), then to the upper
// bound (using min), to match the saturate modifier behavior, which results in
// 0 for NaN.
class DxbcShaderTranslator : public ShaderTranslator {
public:
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
@ -1626,7 +1630,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
uint32_t temp2_component);
// Packs a float32x4 color value to 32bpp or a 64bpp in color_temp to
// packed_temp.packed_temp_components, using 2 temporary VGPR. color_temp and
// packed_temp may be the same if packed_temp_components is 0.
// packed_temp may be the same if packed_temp_components is 0. If the format
// is 32bpp, will still the high part to break register dependency.
void ROV_PackPreClampedColor(uint32_t rt_index, uint32_t color_temp,
uint32_t packed_temp,
uint32_t packed_temp_components, uint32_t temp1,
@ -2002,7 +2007,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// W - Base-relative resolution-scaled EDRAM offset for 64bpp color data, in
// dwords.
uint32_t system_temp_rov_params_;
// ROV only - new depth/stencil data. 4 VGPRs.
// ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1
// VGPR when writing to oDepth.
// When not writing to oDepth: New per-sample depth/stencil values, generated
// during early depth/stencil test (actual writing checks coverage bits).
// When writing to oDepth: X also used to hold the depth written by the

View File

@ -880,6 +880,10 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
DxbcDest temp2_dest(DxbcDest::R(temp2, 1 << temp2_component));
DxbcSrc temp2_src(DxbcSrc::R(temp2).Select(temp2_component));
// Break register dependency after 32bpp cases.
DxbcOpMov(DxbcDest::R(packed_temp, 1 << (packed_temp_components + 1)),
DxbcSrc::LU(0));
// Choose the packing based on the render target's format.
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFormatFlags_Index;
DxbcOpSwitch(DxbcSrc::CB(cbuffer_index_system_constants_,