[GPU] Refactor: Register structs in D3D12CommandProcessor and some other places

This commit is contained in:
Triang3l 2019-10-19 23:30:53 +03:00
parent 3481f739a1
commit f83269cf8c
17 changed files with 716 additions and 498 deletions

View File

@ -20,7 +20,6 @@
#include "xenia/base/ring_buffer.h" #include "xenia/base/ring_buffer.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/graphics_system.h" #include "xenia/gpu/graphics_system.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/sampler_info.h" #include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/texture_info.h" #include "xenia/gpu/texture_info.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"

View File

@ -1142,8 +1142,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
auto enable_mode = static_cast<xenos::ModeControl>( xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
if (enable_mode == xenos::ModeControl::kIgnore) { if (enable_mode == xenos::ModeControl::kIgnore) {
// Ignored. // Ignored.
return true; return true;
@ -1153,7 +1152,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
return IssueCopy(); return IssueCopy();
} }
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { if (regs.Get<reg::RB_SURFACE_INFO>().surface_pitch == 0) {
// Doesn't actually draw. // Doesn't actually draw.
// TODO(Triang3l): Do something so memexport still works in this case maybe? // TODO(Triang3l): Do something so memexport still works in this case maybe?
// Unlikely that zero would even really be legal though. // Unlikely that zero would even really be legal though.
@ -1164,7 +1163,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
bool tessellated; bool tessellated;
if (uint32_t(primitive_type) >= if (uint32_t(primitive_type) >=
uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) {
tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1; tessellated = regs.Get<reg::VGT_OUTPUT_PATH_CNTL>().path_select ==
xenos::VGTOutputPath::kTessellationEnable;
} else { } else {
tessellated = false; tessellated = false;
} }
@ -1202,8 +1202,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool memexport_used = memexport_used_vertex || memexport_used_pixel;
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (!memexport_used_vertex && primitive_two_faced && if (!memexport_used_vertex && primitive_two_faced &&
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) { pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) {
// Both sides are culled - can't be expressed in the pipeline state. // Both sides are culled - can't be expressed in the pipeline state.
return true; return true;
} }
@ -1223,9 +1224,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// tessellation factors (as floats) instead of control point indices. // tessellation factors (as floats) instead of control point indices.
bool adaptive_tessellation; bool adaptive_tessellation;
if (tessellated) { if (tessellated) {
TessellationMode tessellation_mode = xenos::TessellationMode tessellation_mode =
TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3); regs.Get<reg::VGT_HOS_CNTL>().tess_mode;
adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive; adaptive_tessellation =
tessellation_mode == xenos::TessellationMode::kAdaptive;
if (adaptive_tessellation && if (adaptive_tessellation &&
(!indexed || index_buffer_info->format != IndexFormat::kInt32)) { (!indexed || index_buffer_info->format != IndexFormat::kInt32)) {
return false; return false;
@ -1235,7 +1237,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// passed to vertex shader registers, especially if patches are drawn with // passed to vertex shader registers, especially if patches are drawn with
// an index buffer. // an index buffer.
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
if (tessellation_mode != TessellationMode::kAdaptive) { if (tessellation_mode != xenos::TessellationMode::kAdaptive) {
XELOGE( XELOGE(
"Tessellation mode %u is not implemented yet, only adaptive is " "Tessellation mode %u is not implemented yet, only adaptive is "
"partially available now - report the game to Xenia developers!", "partially available now - report the game to Xenia developers!",
@ -1309,20 +1311,16 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
vertex_shader->GetUsedTextureMask(), vertex_shader->GetUsedTextureMask(),
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
// Check if early depth/stencil can be enabled explicitly by RB_DEPTHCONTROL // Check if early depth/stencil can be enabled.
// or implicitly when alpha test and alpha to coverage are disabled. bool early_z;
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; if (pixel_shader) {
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
bool early_z = false; early_z = pixel_shader->implicit_early_z_allowed() &&
if (pixel_shader == nullptr) { (!rb_colorcontrol.alpha_test_enable ||
rb_colorcontrol.alpha_func == CompareFunction::kAlways) &&
!rb_colorcontrol.alpha_to_mask_enable;
} else {
early_z = true; early_z = true;
} else if (!pixel_shader->writes_depth()) {
if (rb_depthcontrol & 0x8) {
early_z = true;
} else if (pixel_shader->implicit_early_z_allowed()) {
early_z = (!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7) &&
!(rb_colorcontrol & 0x10);
}
} }
// Create the pipeline if needed and bind it. // Create the pipeline if needed and bind it.
@ -1366,22 +1364,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
(1ull << (vfetch_index & 63))) { (1ull << (vfetch_index & 63))) {
continue; continue;
} }
uint32_t vfetch_constant_index = const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2; XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
if ((regs[vfetch_constant_index].u32 & 0x3) != 3) { if (vfetch_constant.type != 3) {
XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!", XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!",
vfetch_index, regs[vfetch_constant_index].u32, vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
regs[vfetch_constant_index + 1].u32);
return false; return false;
} }
if (!shared_memory_->RequestRange( if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
regs[vfetch_constant_index].u32 & 0x1FFFFFFC, vfetch_constant.size << 2)) {
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) {
XELOGE( XELOGE(
"Failed to request vertex buffer at 0x%.8X (size %u) in the shared " "Failed to request vertex buffer at 0x%.8X (size %u) in the shared "
"memory", "memory",
regs[vfetch_constant_index].u32 & 0x1FFFFFFC, vfetch_constant.address << 2, vfetch_constant.size << 2);
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
return false; return false;
} }
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
@ -1400,31 +1395,29 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
const std::vector<uint32_t>& memexport_stream_constants_vertex = const std::vector<uint32_t>& memexport_stream_constants_vertex =
vertex_shader->memexport_stream_constants(); vertex_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_vertex) { for (uint32_t constant_index : memexport_stream_constants_vertex) {
const xenos::xe_gpu_memexport_stream_t* memexport_stream = const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>( XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]); if (memexport_stream.index_count == 0) {
if (memexport_stream->index_count == 0) {
continue; continue;
} }
uint32_t memexport_format_size = uint32_t memexport_format_size =
GetSupportedMemExportFormatSize(memexport_stream->format); GetSupportedMemExportFormatSize(memexport_stream.format);
if (memexport_format_size == 0) { if (memexport_format_size == 0) {
XELOGE( XELOGE("Unsupported memexport format %s",
"Unsupported memexport format %s", FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) ->name);
->name);
return false; return false;
} }
uint32_t memexport_base_address = memexport_stream->base_address;
uint32_t memexport_size_dwords = uint32_t memexport_size_dwords =
memexport_stream->index_count * memexport_format_size; memexport_stream.index_count * memexport_format_size;
// Try to reduce the number of shared memory operations when writing // Try to reduce the number of shared memory operations when writing
// different elements into the same buffer through different exports // different elements into the same buffer through different exports
// (happens in Halo 3). // (happens in Halo 3).
bool memexport_range_reused = false; bool memexport_range_reused = false;
for (uint32_t i = 0; i < memexport_range_count; ++i) { for (uint32_t i = 0; i < memexport_range_count; ++i) {
MemExportRange& memexport_range = memexport_ranges[i]; MemExportRange& memexport_range = memexport_ranges[i];
if (memexport_range.base_address_dwords == memexport_base_address) { if (memexport_range.base_address_dwords ==
memexport_stream.base_address) {
memexport_range.size_dwords = memexport_range.size_dwords =
std::max(memexport_range.size_dwords, memexport_size_dwords); std::max(memexport_range.size_dwords, memexport_size_dwords);
memexport_range_reused = true; memexport_range_reused = true;
@ -1435,7 +1428,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
if (!memexport_range_reused) { if (!memexport_range_reused) {
MemExportRange& memexport_range = MemExportRange& memexport_range =
memexport_ranges[memexport_range_count++]; memexport_ranges[memexport_range_count++];
memexport_range.base_address_dwords = memexport_base_address; memexport_range.base_address_dwords = memexport_stream.base_address;
memexport_range.size_dwords = memexport_size_dwords; memexport_range.size_dwords = memexport_size_dwords;
} }
} }
@ -1444,28 +1437,26 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
const std::vector<uint32_t>& memexport_stream_constants_pixel = const std::vector<uint32_t>& memexport_stream_constants_pixel =
pixel_shader->memexport_stream_constants(); pixel_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_pixel) { for (uint32_t constant_index : memexport_stream_constants_pixel) {
const xenos::xe_gpu_memexport_stream_t* memexport_stream = const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>( XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]); if (memexport_stream.index_count == 0) {
if (memexport_stream->index_count == 0) {
continue; continue;
} }
uint32_t memexport_format_size = uint32_t memexport_format_size =
GetSupportedMemExportFormatSize(memexport_stream->format); GetSupportedMemExportFormatSize(memexport_stream.format);
if (memexport_format_size == 0) { if (memexport_format_size == 0) {
XELOGE( XELOGE("Unsupported memexport format %s",
"Unsupported memexport format %s", FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) ->name);
->name);
return false; return false;
} }
uint32_t memexport_base_address = memexport_stream->base_address;
uint32_t memexport_size_dwords = uint32_t memexport_size_dwords =
memexport_stream->index_count * memexport_format_size; memexport_stream.index_count * memexport_format_size;
bool memexport_range_reused = false; bool memexport_range_reused = false;
for (uint32_t i = 0; i < memexport_range_count; ++i) { for (uint32_t i = 0; i < memexport_range_count; ++i) {
MemExportRange& memexport_range = memexport_ranges[i]; MemExportRange& memexport_range = memexport_ranges[i];
if (memexport_range.base_address_dwords == memexport_base_address) { if (memexport_range.base_address_dwords ==
memexport_stream.base_address) {
memexport_range.size_dwords = memexport_range.size_dwords =
std::max(memexport_range.size_dwords, memexport_size_dwords); std::max(memexport_range.size_dwords, memexport_size_dwords);
memexport_range_reused = true; memexport_range_reused = true;
@ -1475,7 +1466,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
if (!memexport_range_reused) { if (!memexport_range_reused) {
MemExportRange& memexport_range = MemExportRange& memexport_range =
memexport_ranges[memexport_range_count++]; memexport_ranges[memexport_range_count++];
memexport_range.base_address_dwords = memexport_base_address; memexport_range.base_address_dwords = memexport_stream.base_address;
memexport_range.size_dwords = memexport_size_dwords; memexport_range.size_dwords = memexport_size_dwords;
} }
} }
@ -1850,15 +1841,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow: // See r200UpdateWindow:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
int16_t window_offset_x = pa_sc_window_offset & 0x7FFF;
int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) {
window_offset_x |= 0x8000;
}
if (window_offset_y & 0x4000) {
window_offset_y |= 0x8000;
}
// Supersampling replacing multisampling due to difficulties of emulating // Supersampling replacing multisampling due to difficulties of emulating
// EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also
@ -1868,8 +1851,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
pixel_size_x = 1; pixel_size_x = 1;
pixel_size_y = 1; pixel_size_y = 1;
} else { } else {
MsaaSamples msaa_samples = MsaaSamples msaa_samples = regs.Get<reg::RB_SURFACE_INFO>().msaa_samples;
MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3);
pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
} }
@ -1889,30 +1871,30 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// box. If it's not, the position is in screen space. Since we can only use // box. If it's not, the position is in screen space. Since we can only use
// the NDC in PC APIs, we use a viewport of the largest possible size, and // the NDC in PC APIs, we use a viewport of the largest possible size, and
// divide the position by it in translated shaders. // divide the position by it in translated shaders.
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_scale_x = float viewport_scale_x =
(pa_cl_vte_cntl & (1 << 0)) pa_cl_vte_cntl.vport_x_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
: 1280.0f; : 1280.0f;
float viewport_scale_y = float viewport_scale_y =
(pa_cl_vte_cntl & (1 << 2)) pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 1280.0f; : 1280.0f;
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
: 1.0f; : 1.0f;
float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: std::abs(viewport_scale_x); : std::abs(viewport_scale_x);
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: std::abs(viewport_scale_y); : std::abs(viewport_scale_y);
float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
: 0.0f; : 0.0f;
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
viewport_offset_x += float(window_offset_x); viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
viewport_offset_y += float(window_offset_y); viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
} }
D3D12_VIEWPORT viewport; D3D12_VIEWPORT viewport;
viewport.TopLeftX = viewport.TopLeftX =
@ -1941,21 +1923,22 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
} }
// Scissor. // Scissor.
uint32_t pa_sc_window_scissor_tl = auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
uint32_t pa_sc_window_scissor_br =
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
D3D12_RECT scissor; D3D12_RECT scissor;
scissor.left = pa_sc_window_scissor_tl & 0x7FFF; scissor.left = pa_sc_window_scissor_tl.tl_x;
scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF; scissor.top = pa_sc_window_scissor_tl.tl_y;
scissor.right = pa_sc_window_scissor_br & 0x7FFF; scissor.right = pa_sc_window_scissor_br.br_x;
scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF; scissor.bottom = pa_sc_window_scissor_br.br_y;
if (!(pa_sc_window_scissor_tl & (1u << 31))) { if (!pa_sc_window_scissor_tl.window_offset_disable) {
// !WINDOW_OFFSET_DISABLE. scissor.left =
scissor.left = std::max(scissor.left + window_offset_x, LONG(0)); std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.top = std::max(scissor.top + window_offset_y, LONG(0)); scissor.top =
scissor.right = std::max(scissor.right + window_offset_x, LONG(0)); std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0)); scissor.right =
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.bottom =
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
} }
scissor.left *= pixel_size_x; scissor.left *= pixel_size_x;
scissor.top *= pixel_size_y; scissor.top *= pixel_size_y;
@ -1992,13 +1975,17 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// Stencil reference value. Per-face reference not supported by Direct3D 12, // Stencil reference value. Per-face reference not supported by Direct3D 12,
// choose the back face one only if drawing only back faces. // choose the back face one only if drawing only back faces.
uint32_t stencil_ref; uint32_t stencil_ref_mask_reg;
if (primitive_two_faced && (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & 0x80) && auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 1) { if (primitive_two_faced &&
stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32 & 0xFF; regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
} else { } else {
stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF; stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
} }
uint32_t stencil_ref =
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg).stencilref;
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
if (ff_stencil_ref_update_needed_) { if (ff_stencil_ref_update_needed_) {
ff_stencil_ref_ = stencil_ref; ff_stencil_ref_ = stencil_ref;
@ -2019,64 +2006,55 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
uint32_t pa_su_point_minmax = regs[XE_GPU_REG_PA_SU_POINT_MINMAX].u32; auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32; auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; auto pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
uint32_t rb_stencilrefmask_bf = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; auto rb_stencilrefmask_bf =
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; auto sq_context_misc = regs.Get<reg::SQ_CONTEXT_MISC>();
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
// Get the color info register values for each render target, and also put // Get the color info register values for each render target, and also put
// some safety measures for the ROV path - disable fully aliased render // some safety measures for the ROV path - disable fully aliased render
// targets. Also, for ROV, exclude components that don't exist in the format // targets. Also, for ROV, exclude components that don't exist in the format
// from the write mask. // from the write mask.
uint32_t color_infos[4]; reg::RB_COLOR_INFO color_infos[4];
ColorRenderTargetFormat color_formats[4];
float rt_clamp[4][4]; float rt_clamp[4][4];
uint32_t rt_keep_masks[4][2]; uint32_t rt_keep_masks[4][2];
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info; static const uint32_t kColorInfoRegs[] = {
switch (i) { XE_GPU_REG_RB_COLOR_INFO,
case 1: XE_GPU_REG_RB_COLOR1_INFO,
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; XE_GPU_REG_RB_COLOR2_INFO,
break; XE_GPU_REG_RB_COLOR3_INFO,
case 2: };
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; auto color_info = regs.Get<reg::RB_COLOR_INFO>(kColorInfoRegs[i]);
break;
case 3:
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
break;
default:
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
}
color_infos[i] = color_info; color_infos[i] = color_info;
color_formats[i] = ColorRenderTargetFormat((color_info >> 16) & 0xF);
if (IsROVUsedForEDRAM()) { if (IsROVUsedForEDRAM()) {
// Get the mask for keeping previous color's components unmodified, // Get the mask for keeping previous color's components unmodified,
// or two UINT32_MAX if no colors actually existing in the RT are written. // or two UINT32_MAX if no colors actually existing in the RT are written.
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants( DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
color_formats[i], (color_mask >> (i * 4)) & 0b1111, rt_clamp[i][0], color_info.color_format, (color_mask >> (i * 4)) & 0b1111,
rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], rt_keep_masks[i][0], rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
rt_keep_masks[i][1]); rt_keep_masks[i][0], rt_keep_masks[i][1]);
// Disable the render target if it has the same EDRAM base as another one // Disable the render target if it has the same EDRAM base as another one
// (with a smaller index - assume it's more important). // (with a smaller index - assume it's more important).
if (rt_keep_masks[i][0] == UINT32_MAX && if (rt_keep_masks[i][0] == UINT32_MAX &&
rt_keep_masks[i][1] == UINT32_MAX) { rt_keep_masks[i][1] == UINT32_MAX) {
uint32_t edram_base = color_info & 0xFFF;
for (uint32_t j = 0; j < i; ++j) { for (uint32_t j = 0; j < i; ++j) {
if (edram_base == (color_infos[j] & 0xFFF) && if (color_info.color_base == color_infos[j].color_base &&
(rt_keep_masks[j][0] != UINT32_MAX || (rt_keep_masks[j][0] != UINT32_MAX ||
rt_keep_masks[j][1] != UINT32_MAX)) { rt_keep_masks[j][1] != UINT32_MAX)) {
rt_keep_masks[i][0] = UINT32_MAX; rt_keep_masks[i][0] = UINT32_MAX;
@ -2091,20 +2069,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Disable depth and stencil if it aliases a color render target (for // Disable depth and stencil if it aliases a color render target (for
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is // instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
// already disabled there). // already disabled there).
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { bool depth_stencil_enabled =
uint32_t edram_base_depth = rb_depth_info & 0xFFF; rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
if (edram_base_depth == (color_infos[i] & 0xFFF) && if (rb_depth_info.depth_base == color_infos[i].color_base &&
(rt_keep_masks[i][0] != UINT32_MAX || (rt_keep_masks[i][0] != UINT32_MAX ||
rt_keep_masks[i][1] != UINT32_MAX)) { rt_keep_masks[i][1] != UINT32_MAX)) {
rb_depthcontrol &= ~(uint32_t(0x1 | 0x2)); depth_stencil_enabled = false;
break; break;
} }
} }
} }
// Get viewport Z scale - needed for flags and ROV output. // Get viewport Z scale - needed for flags and ROV output.
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
: 1.0f; : 1.0f;
@ -2126,18 +2105,18 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// = false: multiply the Z coordinate by 1/W0. // = false: multiply the Z coordinate by 1/W0.
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
// to get 1/W0. // to get 1/W0.
if (pa_cl_vte_cntl & (1 << 8)) { if (pa_cl_vte_cntl.vtx_xy_fmt) {
flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW; flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW;
} }
if (pa_cl_vte_cntl & (1 << 9)) { if (pa_cl_vte_cntl.vtx_z_fmt) {
flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW; flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW;
} }
if (pa_cl_vte_cntl & (1 << 10)) { if (pa_cl_vte_cntl.vtx_w0_fmt) {
flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal; flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal;
} }
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE. // User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!(pa_cl_clip_cntl & (1 << 16))) { if (!pa_cl_clip_cntl.clip_disable) {
flags |= (pa_cl_clip_cntl & 0b111111) flags |= (pa_cl_clip_cntl.value & 0b111111)
<< DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift;
} }
// Reversed depth. // Reversed depth.
@ -2145,8 +2124,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; flags |= DxbcShaderTranslator::kSysFlag_ReverseZ;
} }
// Alpha test. // Alpha test.
if (rb_colorcontrol & 0x8) { if (rb_colorcontrol.alpha_test_enable) {
flags |= (rb_colorcontrol & 0x7) flags |= uint32_t(rb_colorcontrol.alpha_func.value())
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
} else { } else {
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
@ -2154,25 +2133,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
} }
// Alpha to coverage. // Alpha to coverage.
if (rb_colorcontrol & 0x10) { if (rb_colorcontrol.alpha_to_mask_enable) {
flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage; flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage;
} }
// Gamma writing. // Gamma writing.
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
if (color_formats[i] == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { if (color_infos[i].color_format ==
ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i; flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i;
} }
} }
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) {
DepthRenderTargetFormat::kD24FS8) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
} }
if (rb_depthcontrol & 0x2) { if (rb_depthcontrol.z_enable) {
flags |= ((rb_depthcontrol >> 4) & 0x7) flags |= uint32_t(rb_depthcontrol.zfunc.value())
<< DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift;
if (rb_depthcontrol & 0x4) { if (rb_depthcontrol.z_write_enable) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite;
} }
} else { } else {
@ -2182,7 +2161,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual | DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual |
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater; DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater;
} }
if (rb_depthcontrol & 0x1) { if (rb_depthcontrol.stencil_enable) {
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest; flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
} }
if (early_z) { if (early_z) {
@ -2223,9 +2202,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
index_endian_and_edge_factors; index_endian_and_edge_factors;
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE. // User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!(pa_cl_clip_cntl & (1 << 16))) { if (!pa_cl_clip_cntl.clip_disable) {
for (uint32_t i = 0; i < 6; ++i) { for (uint32_t i = 0; i < 6; ++i) {
if (!(pa_cl_clip_cntl & (1 << i))) { if (!(pa_cl_clip_cntl.value & (1 << i))) {
continue; continue;
} }
const float* ucp = &regs[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32; const float* ucp = &regs[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32;
@ -2249,45 +2228,49 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// different register (and if there's such register at all). // different register (and if there's such register at all).
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
// When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be
// written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't
// set in this case in draws in games.
bool gl_clip_space_def = bool gl_clip_space_def =
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena;
float ndc_scale_x, ndc_scale_y, ndc_scale_z; float ndc_scale_x, ndc_scale_y, ndc_scale_z;
if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) { if (primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back) {
// Kill all primitives if both faces are culled, but the vertex shader still // Kill all primitives if both faces are culled, but the vertex shader still
// needs to do memexport (not NaN because of comparison for setting the // needs to do memexport (not NaN because of comparison for setting the
// dirty flag). // dirty flag).
ndc_scale_x = ndc_scale_y = ndc_scale_z = 0; ndc_scale_x = ndc_scale_y = ndc_scale_z = 0;
} else { } else {
if (pa_cl_vte_cntl & (1 << 0)) { if (pa_cl_vte_cntl.vport_x_scale_ena) {
ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f;
} else { } else {
ndc_scale_x = 1.0f / 1280.0f; ndc_scale_x = 1.0f / 1280.0f;
} }
if (pa_cl_vte_cntl & (1 << 2)) { if (pa_cl_vte_cntl.vport_y_scale_ena) {
ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f;
} else { } else {
ndc_scale_y = -1.0f / 1280.0f; ndc_scale_y = -1.0f / 1280.0f;
} }
ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
} }
float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f;
float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f;
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
// Like in OpenGL - VPOS giving pixel centers. // Like in OpenGL - VPOS giving pixel centers.
// TODO(Triang3l): Check if ps_param_gen should give center positions in // TODO(Triang3l): Check if ps_param_gen should give center positions in
// OpenGL mode on the Xbox 360. // OpenGL mode on the Xbox 360.
float pixel_half_pixel_offset = 0.5f; float pixel_half_pixel_offset = 0.5f;
if (cvars::d3d12_half_pixel_offset && !(pa_su_vtx_cntl & (1 << 0))) { if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) {
// Signs are hopefully correct here, tested in GTA IV on both clearing // Signs are hopefully correct here, tested in GTA IV on both clearing
// (without a viewport) and drawing things near the edges of the screen. // (without a viewport) and drawing things near the edges of the screen.
if (pa_cl_vte_cntl & (1 << 0)) { if (pa_cl_vte_cntl.vport_x_scale_ena) {
if (viewport_scale_x != 0.0f) { if (viewport_scale_x != 0.0f) {
ndc_offset_x += 0.5f / viewport_scale_x; ndc_offset_x += 0.5f / viewport_scale_x;
} }
} else { } else {
ndc_offset_x += 1.0f / 2560.0f; ndc_offset_x += 1.0f / 2560.0f;
} }
if (pa_cl_vte_cntl & (1 << 2)) { if (pa_cl_vte_cntl.vport_y_scale_ena) {
if (viewport_scale_y != 0.0f) { if (viewport_scale_y != 0.0f) {
ndc_offset_y += 0.5f / viewport_scale_y; ndc_offset_y += 0.5f / viewport_scale_y;
} }
@ -2313,10 +2296,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset;
// Point size. // Point size.
float point_size_x = float(pa_su_point_size >> 16) * 0.125f; float point_size_x = float(pa_su_point_size.width) * 0.125f;
float point_size_y = float(pa_su_point_size & 0xFFFF) * 0.125f; float point_size_y = float(pa_su_point_size.height) * 0.125f;
float point_size_min = float(pa_su_point_minmax & 0xFFFF) * 0.125f; float point_size_min = float(pa_su_point_minmax.min_size) * 0.125f;
float point_size_max = float(pa_su_point_minmax >> 16) * 0.125f; float point_size_max = float(pa_su_point_minmax.max_size) * 0.125f;
dirty |= system_constants_.point_size[0] != point_size_x; dirty |= system_constants_.point_size[0] != point_size_x;
dirty |= system_constants_.point_size[1] != point_size_y; dirty |= system_constants_.point_size[1] != point_size_y;
dirty |= system_constants_.point_size_min_max[0] != point_size_min; dirty |= system_constants_.point_size_min_max[0] != point_size_min;
@ -2326,13 +2309,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[0] = point_size_min;
system_constants_.point_size_min_max[1] = point_size_max; system_constants_.point_size_min_max[1] = point_size_max;
float point_screen_to_ndc_x, point_screen_to_ndc_y; float point_screen_to_ndc_x, point_screen_to_ndc_y;
if (pa_cl_vte_cntl & (1 << 0)) { if (pa_cl_vte_cntl.vport_x_scale_ena) {
point_screen_to_ndc_x = point_screen_to_ndc_x =
(viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f;
} else { } else {
point_screen_to_ndc_x = 1.0f / 2560.0f; point_screen_to_ndc_x = 1.0f / 2560.0f;
} }
if (pa_cl_vte_cntl & (1 << 2)) { if (pa_cl_vte_cntl.vport_y_scale_ena) {
point_screen_to_ndc_y = point_screen_to_ndc_y =
(viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f;
} else { } else {
@ -2345,15 +2328,16 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Pixel position register. // Pixel position register.
uint32_t pixel_pos_reg = uint32_t pixel_pos_reg =
(sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX;
dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg; dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg;
system_constants_.pixel_pos_reg = pixel_pos_reg; system_constants_.pixel_pos_reg = pixel_pos_reg;
// Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for // Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for
// EDRAM address calculation with MSAA (with ROV). // EDRAM address calculation with MSAA (with ROV).
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); uint32_t sample_count_log2_x =
uint32_t sample_count_log2_x = msaa_samples >= MsaaSamples::k4X ? 1 : 0; rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 1 : 0;
uint32_t sample_count_log2_y = msaa_samples >= MsaaSamples::k2X ? 1 : 0; uint32_t sample_count_log2_y =
rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 1 : 0;
dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x; dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x;
dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y; dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y;
system_constants_.sample_count_log2[0] = sample_count_log2_x; system_constants_.sample_count_log2[0] = sample_count_log2_x;
@ -2365,43 +2349,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// EDRAM pitch for ROV writing. // EDRAM pitch for ROV writing.
if (IsROVUsedForEDRAM()) { if (IsROVUsedForEDRAM()) {
uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) * uint32_t edram_pitch_tiles =
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + ((std::min(rb_surface_info.surface_pitch.value(), 2560u) *
79) / (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
80; 79) /
80;
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
system_constants_.edram_pitch_tiles = edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles;
} }
// Color exponent bias and output index mapping or ROV render target writing. // Color exponent bias and output index mapping or ROV render target writing.
bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0;
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info = color_infos[i]; reg::RB_COLOR_INFO color_info = color_infos[i];
uint32_t blend_factors_ops;
if (colorcontrol_blend_enable) {
switch (i) {
case 1:
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
break;
case 2:
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
break;
case 3:
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
break;
default:
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
break;
}
blend_factors_ops &= 0x1FFF1FFF;
} else {
blend_factors_ops = 0x00010001;
}
// Exponent bias is in bits 20:25 of RB_COLOR_INFO. // Exponent bias is in bits 20:25 of RB_COLOR_INFO.
int32_t color_exp_bias = int32_t(color_info << 6) >> 26; int32_t color_exp_bias = color_info.color_exp_bias;
ColorRenderTargetFormat color_format = color_formats[i]; if (color_info.color_format == ColorRenderTargetFormat::k_16_16 ||
if (color_format == ColorRenderTargetFormat::k_16_16 || color_info.color_format == ColorRenderTargetFormat::k_16_16_16_16) {
color_format == ColorRenderTargetFormat::k_16_16_16_16) {
// On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have // On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have
// -32...32 range and expect shaders to give -32...32 values, but they're // -32...32 range and expect shaders to give -32...32 values, but they're
// emulated using normalized RG16/RGBA16 when not using the ROV, so the // emulated using normalized RG16/RGBA16 when not using the ROV, so the
@ -2427,7 +2390,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1]; system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1];
if (rt_keep_masks[i][0] != UINT32_MAX || if (rt_keep_masks[i][0] != UINT32_MAX ||
rt_keep_masks[i][1] != UINT32_MAX) { rt_keep_masks[i][1] != UINT32_MAX) {
uint32_t rt_base_dwords_scaled = (color_info & 0xFFF) * 1280; uint32_t rt_base_dwords_scaled = color_info.color_base * 1280;
if (texture_cache_->IsResolutionScale2X()) { if (texture_cache_->IsResolutionScale2X()) {
rt_base_dwords_scaled <<= 2; rt_base_dwords_scaled <<= 2;
} }
@ -2435,8 +2398,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
rt_base_dwords_scaled; rt_base_dwords_scaled;
system_constants_.edram_rt_base_dwords_scaled[i] = system_constants_.edram_rt_base_dwords_scaled[i] =
rt_base_dwords_scaled; rt_base_dwords_scaled;
uint32_t format_flags = uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags(
DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format); color_info.color_format);
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags; system_constants_.edram_rt_format_flags[i] = format_flags;
// Can't do float comparisons here because NaNs would result in always // Can't do float comparisons here because NaNs would result in always
@ -2445,6 +2408,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
4 * sizeof(float)) != 0; 4 * sizeof(float)) != 0;
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float)); 4 * sizeof(float));
static const uint32_t kBlendControlRegs[] = {
XE_GPU_REG_RB_BLENDCONTROL_0,
XE_GPU_REG_RB_BLENDCONTROL_1,
XE_GPU_REG_RB_BLENDCONTROL_2,
XE_GPU_REG_RB_BLENDCONTROL_3,
};
uint32_t blend_factors_ops =
regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] != dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops; blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -2465,7 +2436,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
resolution_square_scale; resolution_square_scale;
system_constants_.edram_resolution_square_scale = resolution_square_scale; system_constants_.edram_resolution_square_scale = resolution_square_scale;
uint32_t depth_base_dwords = (rb_depth_info & 0xFFF) * 1280; uint32_t depth_base_dwords = rb_depth_info.depth_base * 1280;
dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords;
system_constants_.edram_depth_base_dwords = depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords;
@ -2474,7 +2445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float depth_range_scale = std::abs(viewport_scale_z); float depth_range_scale = std::abs(viewport_scale_z);
dirty |= system_constants_.edram_depth_range_scale != depth_range_scale; dirty |= system_constants_.edram_depth_range_scale != depth_range_scale;
system_constants_.edram_depth_range_scale = depth_range_scale; system_constants_.edram_depth_range_scale = depth_range_scale;
float depth_range_offset = (pa_cl_vte_cntl & (1 << 5)) float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
: 0.0f; : 0.0f;
if (viewport_scale_z < 0.0f) { if (viewport_scale_z < 0.0f) {
@ -2490,20 +2461,20 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
if (primitive_two_faced) { if (primitive_two_faced) {
if (pa_su_sc_mode_cntl & (1 << 11)) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
} }
if (pa_su_sc_mode_cntl & (1 << 12)) { if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale = poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
} }
} else { } else {
if (pa_su_sc_mode_cntl & (1 << 13)) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
poly_offset_front_offset = poly_offset_front_offset =
@ -2533,39 +2504,43 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
poly_offset_back_offset; poly_offset_back_offset;
system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset; system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset;
if (rb_depthcontrol & 0x1) { if (depth_stencil_enabled && rb_depthcontrol.stencil_enable) {
uint32_t stencil_value; dirty |= system_constants_.edram_stencil_front_reference !=
rb_stencilrefmask.stencilref;
stencil_value = rb_stencilrefmask & 0xFF; system_constants_.edram_stencil_front_reference =
dirty |= system_constants_.edram_stencil_front_reference != stencil_value; rb_stencilrefmask.stencilref;
system_constants_.edram_stencil_front_reference = stencil_value; dirty |= system_constants_.edram_stencil_front_read_mask !=
stencil_value = (rb_stencilrefmask >> 8) & 0xFF; rb_stencilrefmask.stencilmask;
dirty |= system_constants_.edram_stencil_front_read_mask != stencil_value; system_constants_.edram_stencil_front_read_mask =
system_constants_.edram_stencil_front_read_mask = stencil_value; rb_stencilrefmask.stencilmask;
stencil_value = (rb_stencilrefmask >> 16) & 0xFF; dirty |= system_constants_.edram_stencil_front_write_mask !=
rb_stencilrefmask.stencilwritemask;
system_constants_.edram_stencil_front_write_mask =
rb_stencilrefmask.stencilwritemask;
uint32_t stencil_func_ops =
(rb_depthcontrol.value >> 8) & ((1 << 12) - 1);
dirty |= dirty |=
system_constants_.edram_stencil_front_write_mask != stencil_value; system_constants_.edram_stencil_front_func_ops != stencil_func_ops;
system_constants_.edram_stencil_front_write_mask = stencil_value; system_constants_.edram_stencil_front_func_ops = stencil_func_ops;
stencil_value = (rb_depthcontrol >> 8) & ((1 << 12) - 1);
dirty |= system_constants_.edram_stencil_front_func_ops != stencil_value;
system_constants_.edram_stencil_front_func_ops = stencil_value;
if (primitive_two_faced && (rb_depthcontrol & 0x80)) { if (primitive_two_faced && rb_depthcontrol.backface_enable) {
stencil_value = rb_stencilrefmask_bf & 0xFF; dirty |= system_constants_.edram_stencil_back_reference !=
dirty |= rb_stencilrefmask_bf.stencilref;
system_constants_.edram_stencil_back_reference != stencil_value; system_constants_.edram_stencil_back_reference =
system_constants_.edram_stencil_back_reference = stencil_value; rb_stencilrefmask_bf.stencilref;
stencil_value = (rb_stencilrefmask_bf >> 8) & 0xFF; dirty |= system_constants_.edram_stencil_back_read_mask !=
dirty |= rb_stencilrefmask_bf.stencilmask;
system_constants_.edram_stencil_back_read_mask != stencil_value; system_constants_.edram_stencil_back_read_mask =
system_constants_.edram_stencil_back_read_mask = stencil_value; rb_stencilrefmask_bf.stencilmask;
stencil_value = (rb_stencilrefmask_bf >> 16) & 0xFF; dirty |= system_constants_.edram_stencil_back_write_mask !=
dirty |= rb_stencilrefmask_bf.stencilwritemask;
system_constants_.edram_stencil_back_write_mask != stencil_value; system_constants_.edram_stencil_back_write_mask =
system_constants_.edram_stencil_back_write_mask = stencil_value; rb_stencilrefmask_bf.stencilwritemask;
stencil_value = (rb_depthcontrol >> 20) & ((1 << 12) - 1); uint32_t stencil_func_ops_bf =
dirty |= system_constants_.edram_stencil_back_func_ops != stencil_value; (rb_depthcontrol.value >> 8) & ((1 << 12) - 1);
system_constants_.edram_stencil_back_func_ops = stencil_value; dirty |= system_constants_.edram_stencil_back_func_ops !=
stencil_func_ops_bf;
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;
} else { } else {
dirty |= std::memcmp(system_constants_.edram_stencil_back, dirty |= std::memcmp(system_constants_.edram_stencil_back,
system_constants_.edram_stencil_front, system_constants_.edram_stencil_front,

View File

@ -207,8 +207,17 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
xenos::xe_gpu_program_cntl_t sq_program_cntl; auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
// Normal vertex shaders only, for now.
assert_true(sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kPosition1Vector ||
sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kPosition2VectorsSprite ||
sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kMultipass);
assert_false(sq_program_cntl.gen_index_vtx);
if (!vertex_shader->is_translated() && if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl, tessellated, !TranslateShader(vertex_shader, sq_program_cntl, tessellated,
primitive_type)) { primitive_type)) {
@ -294,8 +303,7 @@ bool PipelineCache::ConfigurePipeline(
} }
bool PipelineCache::TranslateShader(D3D12Shader* shader, bool PipelineCache::TranslateShader(D3D12Shader* shader,
xenos::xe_gpu_program_cntl_t cntl, reg::SQ_PROGRAM_CNTL cntl, bool tessellated,
bool tessellated,
PrimitiveType primitive_type) { PrimitiveType primitive_type) {
// Perform translation. // Perform translation.
// If this fails the shader will be marked as invalid and ignored later. // If this fails the shader will be marked as invalid and ignored later.
@ -385,12 +393,12 @@ bool PipelineCache::GetCurrentStateDescription(
// Primitive topology type, tessellation mode and geometry shader. // Primitive topology type, tessellation mode and geometry shader.
if (tessellated) { if (tessellated) {
switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { switch (regs.Get<reg::VGT_HOS_CNTL>().tess_mode) {
case TessellationMode::kContinuous: case xenos::TessellationMode::kContinuous:
description_out.tessellation_mode = description_out.tessellation_mode =
PipelineTessellationMode::kContinuous; PipelineTessellationMode::kContinuous;
break; break;
case TessellationMode::kAdaptive: case xenos::TessellationMode::kAdaptive:
description_out.tessellation_mode = description_out.tessellation_mode =
cvars::d3d12_tessellation_adaptive cvars::d3d12_tessellation_adaptive
? PipelineTessellationMode::kAdaptive ? PipelineTessellationMode::kAdaptive
@ -559,20 +567,10 @@ bool PipelineCache::GetCurrentStateDescription(
// CLIP_DISABLE // CLIP_DISABLE
description_out.depth_clip = description_out.depth_clip =
(regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0; (regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0;
// TODO(DrChat): This seem to differ. Need to examine this.
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
// ZCLIP_NEAR_DISABLE
// description_out.depth_clip = (PA_CL_CLIP_CNTL & (1 << 26)) == 0;
// RASTERIZER_DISABLE
// Disable rendering in command processor if PA_CL_CLIP_CNTL & (1 << 22)?
if (edram_rov_used_) { if (edram_rov_used_) {
description_out.rov_msaa = description_out.rov_msaa =
((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0;
} } else {
if (!edram_rov_used_) {
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
// Depth/stencil. No stencil, always passing depth test and no depth writing // Depth/stencil. No stencil, always passing depth test and no depth writing
// means depth disabled. // means depth disabled.
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
@ -711,7 +709,7 @@ bool PipelineCache::GetCurrentStateDescription(
rt.format = RenderTargetCache::GetBaseColorFormat( rt.format = RenderTargetCache::GetBaseColorFormat(
ColorRenderTargetFormat((color_info >> 16) & 0xF)); ColorRenderTargetFormat((color_info >> 16) & 0xF));
rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF;
if (!(rb_colorcontrol & 0x20) && rt.write_mask) { if (rt.write_mask) {
rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F];
rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F];
rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7);

View File

@ -171,7 +171,7 @@ class PipelineCache {
PipelineRenderTarget render_targets[4]; PipelineRenderTarget render_targets[4];
}; };
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
bool tessellated, PrimitiveType primitive_type); bool tessellated, PrimitiveType primitive_type);
bool GetCurrentStateDescription( bool GetCurrentStateDescription(

View File

@ -34,7 +34,6 @@ namespace d3d12 {
constexpr uint32_t SharedMemory::kBufferSizeLog2; constexpr uint32_t SharedMemory::kBufferSizeLog2;
constexpr uint32_t SharedMemory::kBufferSize; constexpr uint32_t SharedMemory::kBufferSize;
constexpr uint32_t SharedMemory::kAddressMask;
constexpr uint32_t SharedMemory::kHeapSizeLog2; constexpr uint32_t SharedMemory::kHeapSizeLog2;
constexpr uint32_t SharedMemory::kHeapSize; constexpr uint32_t SharedMemory::kHeapSize;
constexpr uint32_t SharedMemory::kWatchBucketSizeLog2; constexpr uint32_t SharedMemory::kWatchBucketSizeLog2;
@ -198,10 +197,9 @@ void SharedMemory::UnregisterGlobalWatch(GlobalWatchHandle handle) {
SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
uint32_t start, uint32_t length, WatchCallback callback, uint32_t start, uint32_t length, WatchCallback callback,
void* callback_context, void* callback_data, uint64_t callback_argument) { void* callback_context, void* callback_data, uint64_t callback_argument) {
if (length == 0) { if (length == 0 || start >= kBufferSize) {
return nullptr; return nullptr;
} }
start &= kAddressMask;
length = std::min(length, kBufferSize - start); length = std::min(length, kBufferSize - start);
uint32_t watch_page_first = start >> page_size_log2_; uint32_t watch_page_first = start >> page_size_log2_;
uint32_t watch_page_last = (start + length - 1) >> page_size_log2_; uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
@ -278,9 +276,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
// Some texture is empty, for example - safe to draw in this case. // Some texture is empty, for example - safe to draw in this case.
return true; return true;
} }
start &= kAddressMask; if (start > kBufferSize || (kBufferSize - start) < length) {
if ((kBufferSize - start) < length) {
// Exceeds the physical address space.
return false; return false;
} }
@ -343,9 +339,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
// Some texture is empty, for example - safe to draw in this case. // Some texture is empty, for example - safe to draw in this case.
return true; return true;
} }
start &= kAddressMask; if (start > kBufferSize || (kBufferSize - start) < length) {
if ((kBufferSize - start) < length) {
// Exceeds the physical address space.
return false; return false;
} }
uint32_t last = start + length - 1; uint32_t last = start + length - 1;
@ -433,8 +427,7 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last,
} }
void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) {
start &= kAddressMask; if (length == 0 || start >= kBufferSize) {
if (length == 0) {
return; return;
} }
length = std::min(length, kBufferSize - start); length = std::min(length, kBufferSize - start);

View File

@ -138,7 +138,6 @@ class SharedMemory {
// The 512 MB tiled buffer. // The 512 MB tiled buffer.
static constexpr uint32_t kBufferSizeLog2 = 29; static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
static constexpr uint32_t kAddressMask = kBufferSize - 1;
ID3D12Resource* buffer_ = nullptr; ID3D12Resource* buffer_ = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;

View File

@ -416,9 +416,13 @@ void DxbcShaderTranslator::ConvertPWLGamma(
} }
void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
if (register_count() < 1) {
return;
}
// Vertex index is in an input bound to SV_VertexID, byte swapped according to // Vertex index is in an input bound to SV_VertexID, byte swapped according to
// xe_vertex_index_endian_and_edge_factors system constant and written to GPR // xe_vertex_index_endian_and_edge_factors system constant and written to
// 0 (which is always present because register_count includes +1). // GPR 0.
// xe_vertex_index_endian_and_edge_factors & 0b11 is: // xe_vertex_index_endian_and_edge_factors & 0b11 is:
// - 00 for no swap. // - 00 for no swap.
@ -756,157 +760,161 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// Write the vertex index to GPR 0. // Write the vertex index to GPR 0.
StartVertexShader_LoadVertexIndex(); StartVertexShader_LoadVertexIndex();
} else if (IsDxbcDomainShader()) { } else if (IsDxbcDomainShader()) {
uint32_t temp_register_operand_length =
uses_register_dynamic_addressing() ? 3 : 2;
// Copy the domain location to r0.yz (for quad patches) or r0.xyz (for
// triangle patches), and also set the domain in STAT.
uint32_t domain_location_mask, domain_location_swizzle;
if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
domain_location_mask = 0b0111;
// ZYX swizzle with r1.y == 0, according to the water shader in
// Banjo-Kazooie: Nuts & Bolts.
domain_location_swizzle = 0b00000110;
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
} else {
// TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
// According to the ground shader in Viva Pinata, though it's impossible
// (as of December 12th, 2018) to test there since it possibly requires
// memexport for ground control points (the memory region with them is
// filled with zeros).
domain_location_mask = 0b0110;
domain_location_swizzle = 0b00000100;
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD;
}
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
2 + temp_register_operand_length));
if (uses_register_dynamic_addressing()) {
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2));
shader_code_.push_back(0);
} else {
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1));
}
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, domain_location_swizzle, 0));
++stat_.instruction_count;
if (uses_register_dynamic_addressing()) {
++stat_.array_instruction_count;
} else {
++stat_.mov_instruction_count;
}
assert_true(register_count() >= 2); assert_true(register_count() >= 2);
if (register_count() != 0) {
uint32_t temp_register_operand_length =
uses_register_dynamic_addressing() ? 3 : 2;
// Copy the primitive index to r0.x (for quad patches) or r1.x (for // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for
// triangle patches) as a float. // triangle patches), and also set the domain in STAT.
// When using indexable temps, copy through a r# because x# are apparently uint32_t domain_location_mask, domain_location_swizzle;
// only accessible via mov. if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
// TODO(Triang3l): Investigate what should be written for primitives (or domain_location_mask = 0b0111;
// even control points) for non-adaptive tessellation modes (they may // ZYX swizzle with r1.y == 0, according to the water shader in
// possibly have an index buffer). // Banjo-Kazooie: Nuts & Bolts.
// TODO(Triang3l): Support line patches. domain_location_swizzle = 0b00000110;
uint32_t primitive_id_gpr_index = stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; } else {
// TODO(Triang3l): Support line patches.
if (register_count() > primitive_id_gpr_index) { assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
uint32_t primitive_id_temp = uses_register_dynamic_addressing() // According to the ground shader in Viva Pinata, though it's impossible
? PushSystemTemp() // (as of December 12th, 2018) to test there since it possibly requires
: primitive_id_gpr_index; // memexport for ground control points (the memory region with them is
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) | // filled with zeros).
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); domain_location_mask = 0b0110;
shader_code_.push_back( domain_location_swizzle = 0b00000100;
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD;
shader_code_.push_back(primitive_id_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0));
++stat_.instruction_count;
++stat_.conversion_instruction_count;
if (uses_register_dynamic_addressing()) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2));
shader_code_.push_back(0);
shader_code_.push_back(primitive_id_gpr_index);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(primitive_id_temp);
++stat_.instruction_count;
++stat_.array_instruction_count;
// Release primitive_id_temp.
PopSystemTemp();
} }
}
if (register_count() >= 2) {
// Write the swizzle of the barycentric/UV coordinates to r1.x (for quad
// patches) or r1.y (for triangle patches). It appears that the
// tessellator offloads the reordering of coordinates for edges to game
// shaders.
//
// In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge
// factors), the shader multiplies the first control point's position by
// r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before
// doing that it swizzles r0.xyz the following way depending on the value
// in r1.y:
// - ZXY for 1.0.
// - YZX for 2.0.
// - XZY for 4.0.
// - YXZ for 5.0.
// - ZYX for 6.0.
// Possibly, the logic here is that the value itself is the amount of
// rotation of the swizzle to the right, and 1 << 2 is set when the
// swizzle needs to be flipped before rotating.
//
// In Viva Pinata (quad patches with per-edge factors - not possible to
// test however as of December 12th, 2018), if we assume that r0.y is V
// and r0.z is U, the factors each control point value is multiplied by
// are the following:
// - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle).
// - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ).
// - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX).
// - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY).
// According to the control point order at
// https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt
// the first is located at (0,0), the second at (0,1), the third at (1,0)
// and the fourth at (1,1). So, swizzle index 0 appears to be the correct
// one. But, this hasn't been tested yet.
//
// Direct3D 12 appears to be passing the coordinates in a consistent
// order, so we can just use ZYX for triangle patches.
//
// TODO(Triang3l): Support line patches.
uint32_t domain_location_swizzle_mask =
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010
: 0b0001;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + temp_register_operand_length)); 2 + temp_register_operand_length));
if (uses_register_dynamic_addressing()) { if (uses_register_dynamic_addressing()) {
shader_code_.push_back( shader_code_.push_back(EncodeVectorMaskedOperand(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2));
domain_location_swizzle_mask, 2));
shader_code_.push_back(0); shader_code_.push_back(0);
} else { } else {
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1));
} }
shader_code_.push_back(1);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0); shader_code_.push_back(0);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT,
domain_location_swizzle, 0));
++stat_.instruction_count; ++stat_.instruction_count;
if (uses_register_dynamic_addressing()) { if (uses_register_dynamic_addressing()) {
++stat_.array_instruction_count; ++stat_.array_instruction_count;
} else { } else {
++stat_.mov_instruction_count; ++stat_.mov_instruction_count;
} }
// Copy the primitive index to r0.x (for quad patches) or r1.x (for
// triangle patches) as a float.
// When using indexable temps, copy through a r# because x# are apparently
// only accessible via mov.
// TODO(Triang3l): Investigate what should be written for primitives (or
// even control points) for non-adaptive tessellation modes (they may
// possibly have an index buffer).
// TODO(Triang3l): Support line patches.
uint32_t primitive_id_gpr_index =
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0;
if (register_count() > primitive_id_gpr_index) {
uint32_t primitive_id_temp = uses_register_dynamic_addressing()
? PushSystemTemp()
: primitive_id_gpr_index;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(primitive_id_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0));
++stat_.instruction_count;
++stat_.conversion_instruction_count;
if (uses_register_dynamic_addressing()) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2));
shader_code_.push_back(0);
shader_code_.push_back(primitive_id_gpr_index);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(primitive_id_temp);
++stat_.instruction_count;
++stat_.array_instruction_count;
// Release primitive_id_temp.
PopSystemTemp();
}
}
if (register_count() >= 2) {
// Write the swizzle of the barycentric/UV coordinates to r1.x (for quad
// patches) or r1.y (for triangle patches). It appears that the
// tessellator offloads the reordering of coordinates for edges to game
// shaders.
//
// In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge
// factors), the shader multiplies the first control point's position by
// r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before
// doing that it swizzles r0.xyz the following way depending on the
// value in r1.y:
// - ZXY for 1.0.
// - YZX for 2.0.
// - XZY for 4.0.
// - YXZ for 5.0.
// - ZYX for 6.0.
// Possibly, the logic here is that the value itself is the amount of
// rotation of the swizzle to the right, and 1 << 2 is set when the
// swizzle needs to be flipped before rotating.
//
// In Viva Pinata (quad patches with per-edge factors - not possible to
// test however as of December 12th, 2018), if we assume that r0.y is V
// and r0.z is U, the factors each control point value is multiplied by
// are the following:
// - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle).
// - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ).
// - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX).
// - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY).
// According to the control point order at
// https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt
// the first is located at (0,0), the second at (0,1), the third at
// (1,0) and the fourth at (1,1). So, swizzle index 0 appears to be the
// correct one. But, this hasn't been tested yet.
//
// Direct3D 12 appears to be passing the coordinates in a consistent
// order, so we can just use ZYX for triangle patches.
//
// TODO(Triang3l): Support line patches.
uint32_t domain_location_swizzle_mask =
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010
: 0b0001;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + temp_register_operand_length));
if (uses_register_dynamic_addressing()) {
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP,
domain_location_swizzle_mask, 2));
shader_code_.push_back(0);
} else {
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1));
}
shader_code_.push_back(1);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
if (uses_register_dynamic_addressing()) {
++stat_.array_instruction_count;
} else {
++stat_.mov_instruction_count;
}
}
} }
} }
} }
@ -4796,6 +4804,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
// General-purpose registers if using dynamic indexing (x0). // General-purpose registers if using dynamic indexing (x0).
if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) { if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) {
assert_true(register_count() != 0);
shader_object_.push_back( shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));

View File

@ -503,6 +503,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
kVSOutPosition, kVSOutPosition,
kVSOutClipDistance0123, kVSOutClipDistance0123,
kVSOutClipDistance45, kVSOutClipDistance45,
// TODO(Triang3l): Use SV_CullDistance instead for
// PA_CL_CLIP_CNTL::UCP_CULL_ONLY_ENA, but can't have more than 8 clip and
// cull distances in total.
kPSInInterpolators = 0, kPSInInterpolators = 0,
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount, kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,

View File

@ -13,15 +13,11 @@
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include "xenia/gpu/registers.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
enum Register {
#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index,
#include "xenia/gpu/register_table.inc"
#undef XE_GPU_REGISTER
};
struct RegisterInfo { struct RegisterInfo {
enum class Type { enum class Type {
kDword, kDword,
@ -44,8 +40,20 @@ class RegisterFile {
}; };
RegisterValue values[kRegisterCount]; RegisterValue values[kRegisterCount];
RegisterValue& operator[](int reg) { return values[reg]; } RegisterValue& operator[](uint32_t reg) { return values[reg]; }
RegisterValue& operator[](Register reg) { return values[reg]; } RegisterValue& operator[](Register reg) { return values[reg]; }
template <typename T>
T& Get(uint32_t reg) {
return *reinterpret_cast<T*>(&values[reg]);
}
template <typename T>
T& Get(Register reg) {
return *reinterpret_cast<T*>(&values[reg]);
}
template <typename T>
T& Get() {
return *reinterpret_cast<T*>(&values[T::register_index]);
}
}; };
} // namespace gpu } // namespace gpu

View File

@ -0,0 +1,51 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2019 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/registers.h"
namespace xe {
namespace gpu {
namespace reg {
constexpr uint32_t COHER_STATUS_HOST::register_index;
constexpr uint32_t WAIT_UNTIL::register_index;
constexpr uint32_t SQ_PROGRAM_CNTL::register_index;
constexpr uint32_t SQ_CONTEXT_MISC::register_index;
constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index;
constexpr uint32_t VGT_HOS_CNTL::register_index;
constexpr uint32_t PA_SU_POINT_MINMAX::register_index;
constexpr uint32_t PA_SU_POINT_SIZE::register_index;
constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index;
constexpr uint32_t PA_SU_VTX_CNTL::register_index;
constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index;
constexpr uint32_t PA_SC_VIZ_QUERY::register_index;
constexpr uint32_t PA_CL_CLIP_CNTL::register_index;
constexpr uint32_t PA_CL_VTE_CNTL::register_index;
constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index;
constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index;
constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index;
constexpr uint32_t RB_MODECONTROL::register_index;
constexpr uint32_t RB_SURFACE_INFO::register_index;
constexpr uint32_t RB_COLORCONTROL::register_index;
constexpr uint32_t RB_COLOR_INFO::register_index;
constexpr uint32_t RB_COLOR_MASK::register_index;
constexpr uint32_t RB_DEPTHCONTROL::register_index;
constexpr uint32_t RB_STENCILREFMASK::register_index;
constexpr uint32_t RB_DEPTH_INFO::register_index;
constexpr uint32_t RB_COPY_CONTROL::register_index;
constexpr uint32_t RB_COPY_DEST_INFO::register_index;
constexpr uint32_t RB_COPY_DEST_PITCH::register_index;
} // namespace reg
} // namespace gpu
} // namespace xe

View File

@ -20,15 +20,22 @@
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h
namespace xe { namespace xe {
namespace gpu { namespace gpu {
enum Register {
#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index,
#include "xenia/gpu/register_table.inc"
#undef XE_GPU_REGISTER
};
namespace reg { namespace reg {
/************************************************** /*******************************************************************************
___ ___ _ _ _____ ___ ___ _ ___ ___ _ _ _____ ___ ___ _
/ __/ _ \| \| |_ _| _ \/ _ \| | / __/ _ \| \| |_ _| _ \/ _ \| |
| (_| (_) | .` | | | | / (_) | |__ | (_| (_) | .` | | | | / (_) | |__
\___\___/|_|\_| |_| |_|_\\___/|____| \___\___/|_|\_| |_| |_|_\\___/|____|
***************************************************/ *******************************************************************************/
union COHER_STATUS_HOST { union COHER_STATUS_HOST {
xe::bf<uint32_t, 0, 8> matching_contexts; xe::bf<uint32_t, 0, 8> matching_contexts;
@ -49,6 +56,7 @@ union COHER_STATUS_HOST {
xe::bf<uint32_t, 31, 1> status; xe::bf<uint32_t, 31, 1> status;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST;
}; };
union WAIT_UNTIL { union WAIT_UNTIL {
@ -69,9 +77,82 @@ union WAIT_UNTIL {
xe::bf<uint32_t, 20, 4> cmdfifo_entries; xe::bf<uint32_t, 20, 4> cmdfifo_entries;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL;
}; };
/************************************************** /*******************************************************************************
___ ___ ___ _ _ ___ _ _ ___ ___ ___
/ __| __/ _ \| | | | __| \| |/ __| __| _ \
\__ \ _| (_) | |_| | _|| .` | (__| _|| /
|___/___\__\_\\___/|___|_|\_|\___|___|_|_\
*******************************************************************************/
union SQ_PROGRAM_CNTL {
// Note from a2xx.xml:
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but
// high bit is set to indicate "0 registers used".
xe::bf<uint32_t, 0, 8> vs_num_reg;
xe::bf<uint32_t, 8, 8> ps_num_reg;
xe::bf<uint32_t, 16, 1> vs_resource;
xe::bf<uint32_t, 17, 1> ps_resource;
xe::bf<uint32_t, 18, 1> param_gen;
xe::bf<uint32_t, 19, 1> gen_index_pix;
xe::bf<uint32_t, 20, 4> vs_export_count;
xe::bf<xenos::VertexShaderExportMode, 24, 3> vs_export_mode;
xe::bf<uint32_t, 27, 4> ps_export_mode;
xe::bf<uint32_t, 31, 1> gen_index_vtx;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL;
};
union SQ_CONTEXT_MISC {
xe::bf<uint32_t, 0, 1> inst_pred_optimize;
xe::bf<uint32_t, 1, 1> sc_output_screen_xy;
xe::bf<xenos::SampleControl, 2, 2> sc_sample_cntl;
xe::bf<uint32_t, 8, 8> param_gen_pos;
xe::bf<uint32_t, 16, 1> perfcounter_ref;
xe::bf<uint32_t, 17, 1> yeild_optimize; // sic
xe::bf<uint32_t, 18, 1> tx_cache_sel;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC;
};
/*******************************************************************************
__ _____ ___ _____ _____ __
\ \ / / __| _ \_ _| __\ \/ /
\ V /| _|| / | | | _| > <
\_/ |___|_|_\ |_| |___/_/\_\
___ ___ ___ _ _ ___ ___ ___ _ _ _ ___
/ __| _ \/ _ \| | | | _ \ __| _ \ /_\ | \| | \
| (_ | / (_) | |_| | _/ _|| / / _ \| .` | |) |
\___|_|_\\___/ \___/|_| |___|_|_\ /_/ \_\_|\_|___/
_____ ___ ___ ___ ___ _ _ _ _____ ___ ___
|_ _| __/ __/ __| __| | | | /_\_ _/ _ \| _ \
| | | _|\__ \__ \ _|| |__| |__ / _ \| || (_) | /
|_| |___|___/___/___|____|____/_/ \_\_| \___/|_|_\
*******************************************************************************/
union VGT_OUTPUT_PATH_CNTL {
xe::bf<xenos::VGTOutputPath, 0, 2> path_select;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL;
};
union VGT_HOS_CNTL {
xe::bf<xenos::TessellationMode, 0, 2> tess_mode;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL;
};
/*******************************************************************************
___ ___ ___ __ __ ___ _____ _____ _____ ___ ___ ___ __ __ ___ _____ _____ _____
| _ \ _ \_ _| \/ |_ _|_ _|_ _\ \ / / __| | _ \ _ \_ _| \/ |_ _|_ _|_ _\ \ / / __|
| _/ /| || |\/| || | | | | | \ V /| _| | _/ /| || |\/| || | | | | | \ V /| _|
@ -82,7 +163,25 @@ union WAIT_UNTIL {
/ _ \\__ \__ \ _|| |\/| | _ \ |__| _|| / / _ \\__ \__ \ _|| |\/| | _ \ |__| _|| /
/_/ \_\___/___/___|_| |_|___/____|___|_|_\ /_/ \_\___/___/___|_| |_|___/____|___|_|_\
***************************************************/ *******************************************************************************/
union PA_SU_POINT_MINMAX {
// Radius, 12.4 fixed point.
xe::bf<uint32_t, 0, 16> min_size;
xe::bf<uint32_t, 16, 16> max_size;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX;
};
union PA_SU_POINT_SIZE {
// 1/2 width or height, 12.4 fixed point.
xe::bf<uint32_t, 0, 16> height;
xe::bf<uint32_t, 16, 16> width;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE;
};
// Setup Unit / Scanline Converter mode cntl // Setup Unit / Scanline Converter mode cntl
union PA_SU_SC_MODE_CNTL { union PA_SU_SC_MODE_CNTL {
@ -110,6 +209,7 @@ union PA_SU_SC_MODE_CNTL {
xe::bf<uint32_t, 26, 1> wait_rb_idle_first_tri_new_state; xe::bf<uint32_t, 26, 1> wait_rb_idle_first_tri_new_state;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
}; };
// Setup Unit Vertex Control // Setup Unit Vertex Control
@ -119,6 +219,7 @@ union PA_SU_VTX_CNTL {
xe::bf<uint32_t, 3, 3> quant_mode; xe::bf<uint32_t, 3, 3> quant_mode;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
}; };
union PA_SC_MPASS_PS_CNTL { union PA_SC_MPASS_PS_CNTL {
@ -126,6 +227,7 @@ union PA_SC_MPASS_PS_CNTL {
xe::bf<uint32_t, 31, 1> mpass_ps_ena; xe::bf<uint32_t, 31, 1> mpass_ps_ena;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL;
}; };
// Scanline converter viz query // Scanline converter viz query
@ -135,11 +237,10 @@ union PA_SC_VIZ_QUERY {
xe::bf<uint32_t, 7, 1> kill_pix_post_early_z; xe::bf<uint32_t, 7, 1> kill_pix_post_early_z;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
}; };
// Clipper clip control // Clipper clip control
// TODO(DrChat): This seem to differ. Need to examine this.
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
union PA_CL_CLIP_CNTL { union PA_CL_CLIP_CNTL {
xe::bf<uint32_t, 0, 1> ucp_ena_0; xe::bf<uint32_t, 0, 1> ucp_ena_0;
xe::bf<uint32_t, 1, 1> ucp_ena_1; xe::bf<uint32_t, 1, 1> ucp_ena_1;
@ -160,6 +261,7 @@ union PA_CL_CLIP_CNTL {
xe::bf<uint32_t, 24, 1> w_nan_retain; xe::bf<uint32_t, 24, 1> w_nan_retain;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL;
}; };
// Viewport transform engine control // Viewport transform engine control
@ -177,6 +279,7 @@ union PA_CL_VTE_CNTL {
xe::bf<uint32_t, 11, 1> perfcounter_ref; xe::bf<uint32_t, 11, 1> perfcounter_ref;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL;
}; };
union PA_SC_WINDOW_OFFSET { union PA_SC_WINDOW_OFFSET {
@ -184,6 +287,7 @@ union PA_SC_WINDOW_OFFSET {
xe::bf<int32_t, 16, 15> window_y_offset; xe::bf<int32_t, 16, 15> window_y_offset;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET;
}; };
union PA_SC_WINDOW_SCISSOR_TL { union PA_SC_WINDOW_SCISSOR_TL {
@ -192,6 +296,7 @@ union PA_SC_WINDOW_SCISSOR_TL {
xe::bf<uint32_t, 31, 1> window_offset_disable; xe::bf<uint32_t, 31, 1> window_offset_disable;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL;
}; };
union PA_SC_WINDOW_SCISSOR_BR { union PA_SC_WINDOW_SCISSOR_BR {
@ -199,20 +304,22 @@ union PA_SC_WINDOW_SCISSOR_BR {
xe::bf<uint32_t, 16, 14> br_y; xe::bf<uint32_t, 16, 14> br_y;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR;
}; };
/************************************************** /*******************************************************************************
___ ___ ___ ___
| _ \ _ ) | _ \ _ )
| / _ \ | / _ \
|_|_\___/ |_|_\___/
***************************************************/ *******************************************************************************/
union RB_MODECONTROL { union RB_MODECONTROL {
xe::bf<xenos::ModeControl, 0, 3> edram_mode; xe::bf<xenos::ModeControl, 0, 3> edram_mode;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL;
}; };
union RB_SURFACE_INFO { union RB_SURFACE_INFO {
@ -221,27 +328,83 @@ union RB_SURFACE_INFO {
xe::bf<uint32_t, 18, 14> hiz_pitch; xe::bf<uint32_t, 18, 14> hiz_pitch;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO;
}; };
union RB_COLORCONTROL { union RB_COLORCONTROL {
xe::bf<uint32_t, 0, 3> alpha_func; xe::bf<CompareFunction, 0, 3> alpha_func;
xe::bf<uint32_t, 3, 1> alpha_test_enable; xe::bf<uint32_t, 3, 1> alpha_test_enable;
xe::bf<uint32_t, 4, 1> alpha_to_mask_enable; xe::bf<uint32_t, 4, 1> alpha_to_mask_enable;
// Everything in between was added on Adreno, not in game PDBs and never set.
xe::bf<uint32_t, 24, 2> alpha_to_mask_offset0; xe::bf<uint32_t, 24, 2> alpha_to_mask_offset0;
xe::bf<uint32_t, 26, 2> alpha_to_mask_offset1; xe::bf<uint32_t, 26, 2> alpha_to_mask_offset1;
xe::bf<uint32_t, 28, 2> alpha_to_mask_offset2; xe::bf<uint32_t, 28, 2> alpha_to_mask_offset2;
xe::bf<uint32_t, 30, 2> alpha_to_mask_offset3; xe::bf<uint32_t, 30, 2> alpha_to_mask_offset3;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL;
}; };
union RB_COLOR_INFO { union RB_COLOR_INFO {
xe::bf<uint32_t, 0, 12> color_base; xe::bf<uint32_t, 0, 12> color_base;
xe::bf<ColorRenderTargetFormat, 16, 4> color_format; xe::bf<ColorRenderTargetFormat, 16, 4> color_format;
xe::bf<uint32_t, 20, 6> color_exp_bias; xe::bf<int32_t, 20, 6> color_exp_bias;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO;
// RB_COLOR[1-3]_INFO also use this format.
};
union RB_COLOR_MASK {
xe::bf<uint32_t, 0, 1> write_red0;
xe::bf<uint32_t, 1, 1> write_green0;
xe::bf<uint32_t, 2, 1> write_blue0;
xe::bf<uint32_t, 3, 1> write_alpha0;
xe::bf<uint32_t, 4, 1> write_red1;
xe::bf<uint32_t, 5, 1> write_green1;
xe::bf<uint32_t, 6, 1> write_blue1;
xe::bf<uint32_t, 7, 1> write_alpha1;
xe::bf<uint32_t, 8, 1> write_red2;
xe::bf<uint32_t, 9, 1> write_green2;
xe::bf<uint32_t, 10, 1> write_blue2;
xe::bf<uint32_t, 11, 1> write_alpha2;
xe::bf<uint32_t, 12, 1> write_red3;
xe::bf<uint32_t, 13, 1> write_green3;
xe::bf<uint32_t, 14, 1> write_blue3;
xe::bf<uint32_t, 15, 1> write_alpha3;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK;
};
union RB_DEPTHCONTROL {
xe::bf<uint32_t, 0, 1> stencil_enable;
xe::bf<uint32_t, 1, 1> z_enable;
xe::bf<uint32_t, 2, 1> z_write_enable;
// EARLY_Z_ENABLE was added on Adreno.
xe::bf<CompareFunction, 4, 3> zfunc;
xe::bf<uint32_t, 7, 1> backface_enable;
xe::bf<CompareFunction, 8, 3> stencilfunc;
xe::bf<StencilOp, 11, 3> stencilfail;
xe::bf<StencilOp, 14, 3> stencilzpass;
xe::bf<StencilOp, 17, 3> stencilzfail;
xe::bf<CompareFunction, 20, 3> stencilfunc_bf;
xe::bf<StencilOp, 23, 3> stencilfail_bf;
xe::bf<StencilOp, 26, 3> stencilzpass_bf;
xe::bf<StencilOp, 29, 3> stencilzfail_bf;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL;
};
union RB_STENCILREFMASK {
xe::bf<uint32_t, 0, 8> stencilref;
xe::bf<uint32_t, 8, 8> stencilmask;
xe::bf<uint32_t, 16, 8> stencilwritemask;
uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK;
// RB_STENCILREFMASK_BF also uses this format.
}; };
union RB_DEPTH_INFO { union RB_DEPTH_INFO {
@ -249,6 +412,7 @@ union RB_DEPTH_INFO {
xe::bf<DepthRenderTargetFormat, 16, 1> depth_format; xe::bf<DepthRenderTargetFormat, 16, 1> depth_format;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO;
}; };
union RB_COPY_CONTROL { union RB_COPY_CONTROL {
@ -260,6 +424,7 @@ union RB_COPY_CONTROL {
xe::bf<xenos::CopyCommand, 20, 2> copy_command; xe::bf<xenos::CopyCommand, 20, 2> copy_command;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL;
}; };
union RB_COPY_DEST_INFO { union RB_COPY_DEST_INFO {
@ -268,10 +433,11 @@ union RB_COPY_DEST_INFO {
xe::bf<uint32_t, 4, 3> copy_dest_slice; xe::bf<uint32_t, 4, 3> copy_dest_slice;
xe::bf<ColorFormat, 7, 6> copy_dest_format; xe::bf<ColorFormat, 7, 6> copy_dest_format;
xe::bf<uint32_t, 13, 3> copy_dest_number; xe::bf<uint32_t, 13, 3> copy_dest_number;
xe::bf<uint32_t, 16, 6> copy_dest_exp_bias; xe::bf<int32_t, 16, 6> copy_dest_exp_bias;
xe::bf<uint32_t, 24, 1> copy_dest_swap; xe::bf<uint32_t, 24, 1> copy_dest_swap;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO;
}; };
union RB_COPY_DEST_PITCH { union RB_COPY_DEST_PITCH {
@ -279,9 +445,11 @@ union RB_COPY_DEST_PITCH {
xe::bf<uint32_t, 16, 14> copy_dest_height; xe::bf<uint32_t, 16, 14> copy_dest_height;
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH;
}; };
} // namespace reg } // namespace reg
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -108,10 +108,12 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
} }
bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type,
xenos::xe_gpu_program_cntl_t cntl) { reg::SQ_PROGRAM_CNTL cntl) {
Reset(); Reset();
register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex
: cntl.ps_regs + 1; ? cntl.vs_num_reg.value()
: cntl.ps_num_reg.value();
register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1);
return TranslateInternal(shader, patch_type); return TranslateInternal(shader, patch_type);
} }

View File

@ -17,6 +17,7 @@
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/string_buffer.h" #include "xenia/base/string_buffer.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/ucode.h" #include "xenia/gpu/ucode.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
@ -33,7 +34,7 @@ class ShaderTranslator {
bool GatherAllBindingInformation(Shader* shader); bool GatherAllBindingInformation(Shader* shader);
bool Translate(Shader* shader, PrimitiveType patch_type, bool Translate(Shader* shader, PrimitiveType patch_type,
xenos::xe_gpu_program_cntl_t cntl); reg::SQ_PROGRAM_CNTL cntl);
bool Translate(Shader* shader, PrimitiveType patch_type); bool Translate(Shader* shader, PrimitiveType patch_type);
protected: protected:
@ -232,7 +233,7 @@ class ShaderTranslator {
PrimitiveType patch_primitive_type_; PrimitiveType patch_primitive_type_;
const uint32_t* ucode_dwords_; const uint32_t* ucode_dwords_;
size_t ucode_dword_count_; size_t ucode_dword_count_;
xenos::xe_gpu_program_cntl_t program_cntl_; reg::SQ_PROGRAM_CNTL program_cntl_;
uint32_t register_count_; uint32_t register_count_;
// Accumulated translation errors. // Accumulated translation errors.

View File

@ -93,6 +93,7 @@ void SpirvShaderTranslator::StartTranslation() {
b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main",
{}, {}, &function_block); {}, {}, &function_block);
assert_not_zero(register_count());
registers_type_ = b.makeArrayType(vec4_float_type_, registers_type_ = b.makeArrayType(vec4_float_type_,
b.makeUintConstant(register_count()), 0); b.makeUintConstant(register_count()), 0);
registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction,

View File

@ -364,7 +364,7 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
} }
bool PipelineCache::TranslateShader(VulkanShader* shader, bool PipelineCache::TranslateShader(VulkanShader* shader,
xenos::xe_gpu_program_cntl_t cntl) { reg::SQ_PROGRAM_CNTL cntl) {
// Perform translation. // Perform translation.
// If this fails the shader will be marked as invalid and ignored later. // If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) { if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) {
@ -808,8 +808,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
} }
bool push_constants_dirty = full_update || viewport_state_dirty; bool push_constants_dirty = full_update || viewport_state_dirty;
push_constants_dirty |= push_constants_dirty |= SetShadowRegister(&regs.sq_program_cntl.value,
SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); XE_GPU_REG_SQ_PROGRAM_CNTL);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
push_constants_dirty |= push_constants_dirty |=
@ -827,25 +827,14 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE); SetShadowRegister(&regs.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE);
if (push_constants_dirty) { if (push_constants_dirty) {
xenos::xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = regs.sq_program_cntl;
// Normal vertex shaders only, for now. // Normal vertex shaders only, for now.
// TODO(benvanik): transform feedback/memexport. assert_true(regs.sq_program_cntl.vs_export_mode ==
// https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h xenos::VertexShaderExportMode::kPosition1Vector ||
// Draw calls skipped if they have unsupported export modes. regs.sq_program_cntl.vs_export_mode ==
// 0 = positionOnly xenos::VertexShaderExportMode::kPosition2VectorsSprite ||
// 1 = unused regs.sq_program_cntl.vs_export_mode ==
// 2 = sprite xenos::VertexShaderExportMode::kMultipass);
// 3 = edge assert_false(regs.sq_program_cntl.gen_index_vtx);
// 4 = kill
// 5 = spriteKill
// 6 = edgeKill
// 7 = multipass
assert_true(program_cntl.vs_export_mode == 0 ||
program_cntl.vs_export_mode == 2 ||
program_cntl.vs_export_mode == 7);
assert_false(program_cntl.gen_index_vtx);
SpirvPushConstants push_constants = {}; SpirvPushConstants push_constants = {};
@ -909,7 +898,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
// Whether to populate a register in the pixel shader with frag coord. // Whether to populate a register in the pixel shader with frag coord.
int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF;
push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1; push_constants.ps_param_gen =
regs.sq_program_cntl.param_gen ? ps_param_gen : -1;
vkCmdPushConstants(command_buffer, pipeline_layout_, vkCmdPushConstants(command_buffer, pipeline_layout_,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_VERTEX_BIT |
@ -1061,7 +1051,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
bool dirty = false; bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl, dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL); XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); dirty |= SetShadowRegister(&regs.sq_program_cntl.value,
XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.vertex_shader != vertex_shader;
dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.pixel_shader != pixel_shader;
dirty |= regs.primitive_type != primitive_type; dirty |= regs.primitive_type != primitive_type;
@ -1073,17 +1064,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
xenos::xe_gpu_program_cntl_t sq_program_cntl;
sq_program_cntl.dword_0 = regs.sq_program_cntl;
if (!vertex_shader->is_translated() && if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl)) { !TranslateShader(vertex_shader, regs.sq_program_cntl)) {
XELOGE("Failed to translate the vertex shader!"); XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError; return UpdateStatus::kError;
} }
if (pixel_shader && !pixel_shader->is_translated() && if (pixel_shader && !pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, sq_program_cntl)) { !TranslateShader(pixel_shader, regs.sq_program_cntl)) {
XELOGE("Failed to translate the pixel shader!"); XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError; return UpdateStatus::kError;
} }
@ -1513,7 +1501,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
auto& state_info = update_color_blend_state_info_; auto& state_info = update_color_blend_state_info_;
bool dirty = false; bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL);
dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
@ -1568,7 +1555,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
uint32_t blend_control = regs.rb_blendcontrol[i]; uint32_t blend_control = regs.rb_blendcontrol[i];
auto& attachment_state = attachment_states[i]; auto& attachment_state = attachment_states[i];
attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20); attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001;
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
attachment_state.srcColorBlendFactor = attachment_state.srcColorBlendFactor =
kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; kBlendFactorMap[(blend_control & 0x0000001F) >> 0];

View File

@ -79,7 +79,7 @@ class PipelineCache {
// state. // state.
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl); bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl);
void DumpShaderDisasmAMD(VkPipeline pipeline); void DumpShaderDisasmAMD(VkPipeline pipeline);
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
@ -170,7 +170,7 @@ class PipelineCache {
struct UpdateShaderStagesRegisters { struct UpdateShaderStagesRegisters {
PrimitiveType primitive_type; PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl; uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl; reg::SQ_PROGRAM_CNTL sq_program_cntl;
VulkanShader* vertex_shader; VulkanShader* vertex_shader;
VulkanShader* pixel_shader; VulkanShader* pixel_shader;
@ -256,7 +256,6 @@ class PipelineCache {
VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_;
struct UpdateColorBlendStateRegisters { struct UpdateColorBlendStateRegisters {
uint32_t rb_colorcontrol;
uint32_t rb_color_mask; uint32_t rb_color_mask;
uint32_t rb_blendcontrol[4]; uint32_t rb_blendcontrol[4];
uint32_t rb_modecontrol; uint32_t rb_modecontrol;
@ -290,7 +289,7 @@ class PipelineCache {
float rb_blend_rgba[4]; float rb_blend_rgba[4];
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;
uint32_t sq_program_cntl; reg::SQ_PROGRAM_CNTL sq_program_cntl;
uint32_t sq_context_misc; uint32_t sq_context_misc;
uint32_t rb_colorcontrol; uint32_t rb_colorcontrol;
uint32_t rb_color_info; uint32_t rb_color_info;

View File

@ -80,12 +80,6 @@ inline bool IsPrimitiveTwoFaced(bool tessellated, PrimitiveType type) {
return false; return false;
} }
enum class TessellationMode : uint32_t {
kDiscrete = 0,
kContinuous = 1,
kAdaptive = 2,
};
enum class Dimension : uint32_t { enum class Dimension : uint32_t {
k1D = 0, k1D = 0,
k2D = 1, k2D = 1,
@ -334,6 +328,28 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) {
} }
} }
enum class CompareFunction : uint32_t {
kNever = 0b000,
kLess = 0b001,
kEqual = 0b010,
kLessEqual = 0b011,
kGreater = 0b100,
kNotEqual = 0b101,
kGreaterEqual = 0b110,
kAlways = 0b111,
};
enum class StencilOp : uint32_t {
kKeep = 0,
kZero = 1,
kReplace = 2,
kIncrementClamp = 3,
kDecrementClamp = 4,
kInvert = 5,
kIncrementWrap = 6,
kDecrementWrap = 7,
};
// adreno_rb_blend_factor // adreno_rb_blend_factor
enum class BlendFactor : uint32_t { enum class BlendFactor : uint32_t {
kZero = 0, kZero = 0,
@ -375,6 +391,35 @@ typedef enum {
XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF,
} XE_GPU_INVALIDATE_MASK; } XE_GPU_INVALIDATE_MASK;
// a2xx_sq_ps_vtx_mode
enum class VertexShaderExportMode : uint32_t {
kPosition1Vector = 0,
kPosition2VectorsSprite = 2,
kPosition2VectorsEdge = 3,
kPosition2VectorsKill = 4,
kPosition2VectorsSpriteKill = 5,
kPosition2VectorsEdgeKill = 6,
kMultipass = 7,
};
enum class SampleControl : uint32_t {
kCentroidsOnly = 0,
kCentersOnly = 1,
kCentroidsAndCenters = 2,
};
enum class VGTOutputPath : uint32_t {
kVertexReuse = 0,
kTessellationEnable = 1,
kPassthru = 2,
};
enum class TessellationMode : uint32_t {
kDiscrete = 0,
kContinuous = 1,
kAdaptive = 2,
};
enum class ModeControl : uint32_t { enum class ModeControl : uint32_t {
kIgnore = 0, kIgnore = 0,
kColorDepth = 4, kColorDepth = 4,
@ -471,26 +516,6 @@ inline uint32_t GpuToCpu(uint32_t p) { return p; }
inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; } inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; }
// XE_GPU_REG_SQ_PROGRAM_CNTL
typedef union {
XEPACKEDSTRUCTANONYMOUS({
uint32_t vs_regs : 6;
uint32_t unk_0 : 2;
uint32_t ps_regs : 6;
uint32_t unk_1 : 2;
uint32_t vs_resource : 1;
uint32_t ps_resource : 1;
uint32_t param_gen : 1;
uint32_t gen_index_pix : 1;
uint32_t vs_export_count : 4;
uint32_t vs_export_mode : 3;
uint32_t ps_export_depth : 1;
uint32_t ps_export_count : 3;
uint32_t gen_index_vtx : 1;
});
XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; });
} xe_gpu_program_cntl_t;
// XE_GPU_REG_SHADER_CONSTANT_FETCH_* // XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({