[D3D12] DXBC: Fix two uninitialized register usages in ROV code, better comments
This commit is contained in:
parent
cc47704a6b
commit
b59ae30ec3
|
@ -1333,14 +1333,16 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
system_temp_position_ = PushSystemTemp(0b1111);
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
if (edram_rov_used_) {
|
||||
// Will be initialized unconditionally.
|
||||
system_temp_rov_params_ = PushSystemTemp();
|
||||
// If the shader doesn't write to depth, StartPixelShader will load the
|
||||
// depth/stencil for early test, so no need to initialize. If it does,
|
||||
// initialize it to something consistent - depth must be written on every
|
||||
// shader execution path (at least in PC ps_3_0 and later shader models)
|
||||
// and to make compilation easier.
|
||||
// If the shader doesn't write to oDepth, each component will be written
|
||||
// to if depth/stencil is enabled and the respective sample is covered -
|
||||
// so need to initialize now because the first writes will be conditional.
|
||||
// If the shader writes to oDepth, this is oDepth of the shader, written
|
||||
// by the guest code, so initialize because assumptions can't be made
|
||||
// about the integrity of the guest code.
|
||||
system_temp_rov_depth_stencil_ =
|
||||
PushSystemTemp(writes_depth() ? 0b0001 : 0);
|
||||
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
|
||||
}
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (writes_color_target(i)) {
|
||||
|
@ -1377,7 +1379,10 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
}
|
||||
}
|
||||
|
||||
// Allocate system temporary variables for the translated code.
|
||||
// Allocate system temporary variables for the translated code. Since access
|
||||
// depends on the guest code (thus no guarantees), initialize everything
|
||||
// now (except for pv, it's an internal temporary variable, not accessible
|
||||
// by the guest).
|
||||
system_temp_pv_ = PushSystemTemp();
|
||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(0b1111);
|
||||
system_temp_aL_ = PushSystemTemp(0b1111);
|
||||
|
|
|
@ -49,6 +49,10 @@ namespace gpu {
|
|||
// optimization possibilities as this may result in false dependencies. Always
|
||||
// mov l(0, 0, 0, 0) to such components before potential branching -
|
||||
// PushSystemTemp accepts a zero mask for this purpose.
|
||||
//
|
||||
// Clamping must be done first to the lower bound (using max), then to the upper
|
||||
// bound (using min), to match the saturate modifier behavior, which results in
|
||||
// 0 for NaN.
|
||||
class DxbcShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
|
||||
|
@ -1626,7 +1630,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t temp2_component);
|
||||
// Packs a float32x4 color value to 32bpp or a 64bpp in color_temp to
|
||||
// packed_temp.packed_temp_components, using 2 temporary VGPR. color_temp and
|
||||
// packed_temp may be the same if packed_temp_components is 0.
|
||||
// packed_temp may be the same if packed_temp_components is 0. If the format
|
||||
// is 32bpp, will still the high part to break register dependency.
|
||||
void ROV_PackPreClampedColor(uint32_t rt_index, uint32_t color_temp,
|
||||
uint32_t packed_temp,
|
||||
uint32_t packed_temp_components, uint32_t temp1,
|
||||
|
@ -2002,7 +2007,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// W - Base-relative resolution-scaled EDRAM offset for 64bpp color data, in
|
||||
// dwords.
|
||||
uint32_t system_temp_rov_params_;
|
||||
// ROV only - new depth/stencil data. 4 VGPRs.
|
||||
// ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1
|
||||
// VGPR when writing to oDepth.
|
||||
// When not writing to oDepth: New per-sample depth/stencil values, generated
|
||||
// during early depth/stencil test (actual writing checks coverage bits).
|
||||
// When writing to oDepth: X also used to hold the depth written by the
|
||||
|
|
|
@ -880,6 +880,10 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
|||
DxbcDest temp2_dest(DxbcDest::R(temp2, 1 << temp2_component));
|
||||
DxbcSrc temp2_src(DxbcSrc::R(temp2).Select(temp2_component));
|
||||
|
||||
// Break register dependency after 32bpp cases.
|
||||
DxbcOpMov(DxbcDest::R(packed_temp, 1 << (packed_temp_components + 1)),
|
||||
DxbcSrc::LU(0));
|
||||
|
||||
// Choose the packing based on the render target's format.
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFormatFlags_Index;
|
||||
DxbcOpSwitch(DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
|
|
Loading…
Reference in New Issue