[DXBC] ROV: Inline color ROP

This commit is contained in:
Triang3l 2020-08-16 17:10:07 +03:00
parent 9428af52e4
commit 6dc94d9154
3 changed files with 731 additions and 953 deletions

View File

@ -870,20 +870,12 @@ void DxbcShaderTranslator::StartPixelShader() {
void DxbcShaderTranslator::StartTranslation() { void DxbcShaderTranslator::StartTranslation() {
// Allocate labels and registers for subroutines. // Allocate labels and registers for subroutines.
label_rov_depth_stencil_sample_ = UINT32_MAX; label_rov_depth_stencil_sample_ = UINT32_MAX;
std::memset(label_rov_color_sample_, 0xFF, sizeof(label_rov_color_sample_));
uint32_t label_index = 0; uint32_t label_index = 0;
system_temps_subroutine_count_ = 0; system_temps_subroutine_count_ = 0;
if (IsDxbcPixelShader() && edram_rov_used_) { if (IsDxbcPixelShader() && edram_rov_used_) {
label_rov_depth_stencil_sample_ = label_index++; label_rov_depth_stencil_sample_ = label_index++;
system_temps_subroutine_count_ = system_temps_subroutine_count_ =
std::max((uint32_t)2, system_temps_subroutine_count_); std::max((uint32_t)2, system_temps_subroutine_count_);
for (uint32_t i = 0; i < xe::countof(label_rov_color_sample_); ++i) {
if (writes_color_target(i)) {
label_rov_color_sample_[i] = label_index++;
system_temps_subroutine_count_ =
std::max((uint32_t)4, system_temps_subroutine_count_);
}
}
} }
system_temps_subroutine_ = PushSystemTemp(0, system_temps_subroutine_count_); system_temps_subroutine_ = PushSystemTemp(0, system_temps_subroutine_count_);
@ -1213,11 +1205,6 @@ void DxbcShaderTranslator::CompleteShaderCode() {
if (label_rov_depth_stencil_sample_ != UINT32_MAX) { if (label_rov_depth_stencil_sample_ != UINT32_MAX) {
CompleteShaderCode_ROV_DepthStencilSampleSubroutine(); CompleteShaderCode_ROV_DepthStencilSampleSubroutine();
} }
for (uint32_t i = 0; i < 4; ++i) {
if (label_rov_color_sample_[i] != UINT32_MAX) {
CompleteShaderCode_ROV_ColorSampleSubroutine(i);
}
}
if (IsDxbcVertexOrDomainShader()) { if (IsDxbcVertexOrDomainShader()) {
// Release system_temp_position_ and // Release system_temp_position_ and

View File

@ -2217,7 +2217,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Packs a float32x4 color value to 32bpp or a 64bpp in color_temp to // Packs a float32x4 color value to 32bpp or a 64bpp in color_temp to
// packed_temp.packed_temp_components, using 2 temporary VGPR. color_temp and // packed_temp.packed_temp_components, using 2 temporary VGPR. color_temp and
// packed_temp may be the same if packed_temp_components is 0. If the format // packed_temp may be the same if packed_temp_components is 0. If the format
// is 32bpp, will still the high part to break register dependency. // is 32bpp, will still write the high part to break register dependency.
void ROV_PackPreClampedColor(uint32_t rt_index, uint32_t color_temp, void ROV_PackPreClampedColor(uint32_t rt_index, uint32_t color_temp,
uint32_t packed_temp, uint32_t packed_temp,
uint32_t packed_temp_components, uint32_t temp1, uint32_t packed_temp_components, uint32_t temp1,
@ -2280,25 +2280,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
// - system_temps_subroutine_[0].zw. // - system_temps_subroutine_[0].zw.
// - system_temps_subroutine_[1].xy. // - system_temps_subroutine_[1].xy.
void CompleteShaderCode_ROV_DepthStencilSampleSubroutine(); void CompleteShaderCode_ROV_DepthStencilSampleSubroutine();
// Writes a function that does loading, blending, write masking and storing
// for one color sample of the specified render target.
// Input:
// - system_temps_subroutine_[0].xy:
// - If not blending, packed source color (will be masked by the function).
// - If blending, used as a temporary.
// - system_temp_rov_params_.zw - color sample 32bpp and 64bpp EDRAM
// addresses.
// - system_temps_color_[rt_index]:
// - If blending (blend control is 0x00010001), source color clamped to the
// render target's representable range if it's fixed-point, unclamped
// source color if it's floating-point, not modified.
// - If not blending, ignored.
// Local temps:
// - system_temps_subroutine_[0].zw.
// - system_temps_subroutine_[1].xyzw.
// - system_temps_subroutine_[2].xyz.
// - system_temps_subroutine_[3].xyz.
void CompleteShaderCode_ROV_ColorSampleSubroutine(uint32_t rt_index);
void CompleteShaderCode(); void CompleteShaderCode();
// Writes the original instruction disassembly in the output DXBC if enabled, // Writes the original instruction disassembly in the output DXBC if enabled,
@ -2527,7 +2508,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Subroutine labels. D3D10_SB_OPCODE_LABEL is not counted as an instruction // Subroutine labels. D3D10_SB_OPCODE_LABEL is not counted as an instruction
// in STAT. // in STAT.
uint32_t label_rov_depth_stencil_sample_; uint32_t label_rov_depth_stencil_sample_;
uint32_t label_rov_color_sample_[4];
// Number of currently allocated Xenia internal r# registers. // Number of currently allocated Xenia internal r# registers.
uint32_t system_temp_count_current_; uint32_t system_temp_count_current_;

File diff suppressed because it is too large Load Diff