[DXBC] Inline depth/stencil test

This commit is contained in:
Triang3l 2020-08-27 23:46:41 +03:00
parent fd14a68420
commit a1d33615b0
3 changed files with 605 additions and 732 deletions

View File

@ -868,17 +868,6 @@ void DxbcShaderTranslator::StartPixelShader() {
}
void DxbcShaderTranslator::StartTranslation() {
// Allocate labels and registers for subroutines.
label_rov_depth_stencil_sample_ = UINT32_MAX;
uint32_t label_index = 0;
system_temps_subroutine_count_ = 0;
if (IsDxbcPixelShader() && edram_rov_used_) {
label_rov_depth_stencil_sample_ = label_index++;
system_temps_subroutine_count_ =
std::max((uint32_t)2, system_temps_subroutine_count_);
}
system_temps_subroutine_ = PushSystemTemp(0, system_temps_subroutine_count_);
// Allocate global system temporary registers that may also be used in the
// epilogue.
if (IsDxbcVertexOrDomainShader()) {
@ -1198,14 +1187,6 @@ void DxbcShaderTranslator::CompleteShaderCode() {
// Return from `main`.
DxbcOpRet();
// Write subroutines - can only do this immediately after `ret`. They still
// need the global system temps, and can't allocate their own temps (since
// they may be called from anywhere and don't know anything about the caller's
// register allocation).
if (label_rov_depth_stencil_sample_ != UINT32_MAX) {
CompleteShaderCode_ROV_DepthStencilSampleSubroutine();
}
if (IsDxbcVertexOrDomainShader()) {
// Release system_temp_position_ and
// system_temp_point_size_edge_flag_kill_vertex_.
@ -1226,9 +1207,6 @@ void DxbcShaderTranslator::CompleteShaderCode() {
PopSystemTemp();
}
}
// Release system_temps_subroutine_.
PopSystemTemp(system_temps_subroutine_count_);
}
std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {

View File

@ -2265,21 +2265,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
void CompletePixelShader_WriteToROV();
void CompletePixelShader();
// Writes a function that does early (or both early and late, when not
// separating) depth/stencil testing for one sample (ROV only).
// Input:
// - system_temps_subroutine_[0].x - depth converted to 24 bits in bits 0:23.
// - system_temp_rov_params_.y - depth sample EDRAM address.
// Output:
// - system_temps_subroutine_[0].x - resulting packed depth/stencil.
// - system_temps_subroutine_[0].y - test result, bit 0 if test FAILED (so
// coverage can be updated with XOR), and if depth/stencil is early, also
// bit 4 if the pixel shader still needs to be done to check for
// kills/alphatest/AtoC before writing the new stencil.
// Local temps:
// - system_temps_subroutine_[0].zw.
// - system_temps_subroutine_[1].xy.
void CompleteShaderCode_ROV_DepthStencilSampleSubroutine();
void CompleteShaderCode();
// Writes the original instruction disassembly in the output DXBC if enabled,
@ -2505,22 +2490,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether the faceness has been used in the pixel shader.
bool in_front_face_used_;
// Subroutine labels. D3D10_SB_OPCODE_LABEL is not counted as an instruction
// in STAT.
uint32_t label_rov_depth_stencil_sample_;
// Number of currently allocated Xenia internal r# registers.
uint32_t system_temp_count_current_;
// Total maximum number of temporary registers ever used during this
// translation (for the declaration).
uint32_t system_temp_count_max_;
// Registers for the needed count of non-main-subroutine-local variables.
// This includes arguments.
uint32_t system_temps_subroutine_;
// Number of registers allocated for subroutines other than main.
uint32_t system_temps_subroutine_count_;
// Position in vertex shaders (because viewport and W transformations can be
// applied in the end of the shader).
uint32_t system_temp_position_;

File diff suppressed because it is too large Load Diff