[GPU] Remove register reinterpret_casts + WAIT_REG_MEM volatility

Hopefully prevents some potential #1971-like situations.

WAIT_REG_MEM's implementation also allowed the compiler to load the value
only once, which caused an infinite loop with the other changes in the
commit (even in debug builds), so it's now accessed as volatile. Possibly
it would be even better to replace it with some (acquire/release?) atomic
load/store some day at least for the registers actually seen as
participating in those waits.

Also fixes the endianness being handled only on the first wait iteration in
WAIT_REG_MEM.
This commit is contained in:
Triang3l 2024-05-12 17:17:30 +03:00
parent f0ad4f4587
commit 376bad5056
19 changed files with 336 additions and 318 deletions

View File

@ -18,6 +18,7 @@
#include "xenia/base/byte_stream.h" #include "xenia/base/byte_stream.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/base/ring_buffer.h" #include "xenia/base/ring_buffer.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
@ -334,7 +335,8 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
return; return;
} }
regs.values[index].u32 = value; // Volatile for the WAIT_REG_MEM loop.
const_cast<volatile uint32_t&>(regs.values[index]) = value;
if (!regs.GetRegisterInfo(index)) { if (!regs.GetRegisterInfo(index)) {
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value); XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
} }
@ -342,19 +344,20 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// Scratch register writeback. // Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) { if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
// Enabled - write to address. // Enabled - write to address.
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32; uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
uint32_t mem_addr = scratch_addr + (scratch_reg * 4); uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value); xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
} }
} else { } else {
switch (index) { switch (index) {
// If this is a COHER register, set the dirty flag. // If this is a COHER register, set the dirty flag.
// This will block the command processor the next time it WAIT_MEM_REGs // This will block the command processor the next time it WAIT_REG_MEMs
// and allow us to synchronize the memory. // and allow us to synchronize the memory.
case XE_GPU_REG_COHER_STATUS_HOST: { case XE_GPU_REG_COHER_STATUS_HOST: {
regs.values[index].u32 |= UINT32_C(0x80000000); const_cast<volatile uint32_t&>(regs.values[index]) |=
UINT32_C(0x80000000);
} break; } break;
case XE_GPU_REG_DC_LUT_RW_INDEX: { case XE_GPU_REG_DC_LUT_RW_INDEX: {
@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_SEQ_COLOR: { case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
// Should be in the 256-entry table writing mode. // Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
// enable mask is blue, green, red. // enable mask is blue, green, red.
bool write_gamma_ramp_component = bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry = reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_PWL_DATA: { case XE_GPU_REG_DC_LUT_PWL_DATA: {
// Should be in the PWL writing mode. // Should be in the PWL writing mode.
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// Bit 7 of the index is ignored for PWL. // Bit 7 of the index is ignored for PWL.
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F; uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
// DC_LUT_PWL_DATA is likely in the red, green, blue order because // DC_LUT_PWL_DATA is likely in the red, green, blue order because
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red. // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
bool write_gamma_ramp_component = bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
reg::DC_LUT_PWL_DATA& gamma_ramp_entry = reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_30_COLOR: { case XE_GPU_REG_DC_LUT_30_COLOR: {
// Should be in the 256-entry table writing mode. // Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
uint32_t gamma_ramp_write_enable_mask = uint32_t gamma_ramp_write_enable_mask =
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111; regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
if (gamma_ramp_write_enable_mask) { if (gamma_ramp_write_enable_mask) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry = reg::DC_LUT_30_COLOR& gamma_ramp_entry =
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index]; gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() {
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
RegisterFile* regs = register_file_; // Volatile because this may be called from the WAIT_REG_MEM loop.
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>(); volatile uint32_t* regs_volatile = register_file_->values;
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
if (!status_host.status) { if (!status_host.status) {
return; return;
@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action); base_host + size_host, size_host, action);
// Mark coherent. // Mark coherent.
status_host.status = 0; regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
} }
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
// wait until a register or memory location is a specific value // wait until a register or memory location is a specific value
uint32_t wait_info = reader->ReadAndSwap<uint32_t>(); uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>(); uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
uint32_t ref = reader->ReadAndSwap<uint32_t>(); uint32_t ref = reader->ReadAndSwap<uint32_t>();
uint32_t mask = reader->ReadAndSwap<uint32_t>(); uint32_t mask = reader->ReadAndSwap<uint32_t>();
uint32_t wait = reader->ReadAndSwap<uint32_t>(); uint32_t wait = reader->ReadAndSwap<uint32_t>();
bool is_memory = (wait_info & 0x10) != 0;
assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
const volatile uint32_t& value_ref =
is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
poll_reg_addr & ~uint32_t(0x3)))
: register_file_->values[poll_reg_addr];
bool matched = false; bool matched = false;
do { do {
uint32_t value; uint32_t value = value_ref;
if (wait_info & 0x10) { if (is_memory) {
// Memory. trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3); sizeof(uint32_t));
poll_reg_addr &= ~0x3; value = xenos::GpuSwap(value,
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr)); static_cast<xenos::Endian>(poll_reg_addr & 0x3));
value = GpuSwap(value, endianness);
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
} else { } else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
MakeCoherent(); MakeCoherent();
value = register_file_->values[poll_reg_addr].u32; value = value_ref;
} }
} }
switch (wait_info & 0x7) { switch (wait_info & 0x7) {
@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
uint32_t rmw_info = reader->ReadAndSwap<uint32_t>(); uint32_t rmw_info = reader->ReadAndSwap<uint32_t>();
uint32_t and_mask = reader->ReadAndSwap<uint32_t>(); uint32_t and_mask = reader->ReadAndSwap<uint32_t>();
uint32_t or_mask = reader->ReadAndSwap<uint32_t>(); uint32_t or_mask = reader->ReadAndSwap<uint32_t>();
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; uint32_t value = register_file_->values[rmw_info & 0x1FFF];
if ((rmw_info >> 31) & 0x1) { if ((rmw_info >> 31) & 0x1) {
// & reg // & reg
value &= register_file_->values[and_mask & 0x1FFF].u32; value &= register_file_->values[and_mask & 0x1FFF];
} else { } else {
// & imm // & imm
value &= and_mask; value &= and_mask;
} }
if ((rmw_info >> 30) & 0x1) { if ((rmw_info >> 30) & 0x1) {
// | reg // | reg
value |= register_file_->values[or_mask & 0x1FFF].u32; value |= register_file_->values[or_mask & 0x1FFF];
} else { } else {
// | imm // | imm
value |= or_mask; value |= or_mask;
@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
uint32_t reg_val; uint32_t reg_val;
assert_true(reg_addr < RegisterFile::kRegisterCount); assert_true(reg_addr < RegisterFile::kRegisterCount);
reg_val = register_file_->values[reg_addr].u32; reg_val = register_file_->values[reg_addr];
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3); auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
mem_addr &= ~0x3; mem_addr &= ~0x3;
@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
} else { } else {
// Register. // Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount); assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32; value = register_file_->values[poll_reg_addr];
} }
bool matched = false; bool matched = false;
switch (wait_info & 0x7) { switch (wait_info & 0x7) {
@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
if (fake_sample_count >= 0) { if (fake_sample_count >= 0) {
auto* pSampleCounts = auto* pSampleCounts =
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>( memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32); register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
// and used to detect a finished query. // and used to detect a finished query.
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
// The scan converter writes the internal result back to the register here. // The scan converter writes the internal result back to the register here.
// We just fake it and say it was visible in case it is read back. // We just fake it and say it was visible in case it is read back.
if (id < 32) { if (id < 32) {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |= register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
uint32_t(1) << id; << id;
} else { } else {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |= register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
uint32_t(1) << (id - 32); uint32_t(1) << (id - 32);
} }
} }
@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() {
// Write the initial register values, to be loaded directly into the // Write the initial register values, to be loaded directly into the
// RegisterFile since all registers, including those that may have side // RegisterFile since all registers, including those that may have side
// effects on setting, will be saved. // effects on setting, will be saved.
trace_writer_.WriteRegisters( trace_writer_.WriteRegisters(0, register_file_->values,
0, reinterpret_cast<const uint32_t*>(register_file_->values), RegisterFile::kRegisterCount, false);
RegisterFile::kRegisterCount, false);
trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(), trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(),
gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_); gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_);

View File

@ -17,6 +17,7 @@
#include "xenia/base/cvar.h" #include "xenia/base/cvar.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h"
@ -2306,8 +2307,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
vfetch_bits_remaining &= ~(uint32_t(1) << j); vfetch_bits_remaining &= ~(uint32_t(1) << j);
uint32_t vfetch_index = i * 32 + j; uint32_t vfetch_index = i * 32 + j;
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t vfetch_constant =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) { switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex: case xenos::FetchConstantType::kVertex:
break; break;
@ -3050,10 +3051,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
// Blend factor. // Blend factor.
float blend_factor[] = { float blend_factor[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
}; };
// std::memcmp instead of != so in case of NaN, every draw won't be // std::memcmp instead of != so in case of NaN, every draw won't be
// invalidating it. // invalidating it.
@ -3100,7 +3101,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>(); auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>(); auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3241,9 +3242,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Tessellation factor range, plus 1.0 according to the images in // Tessellation factor range, plus 1.0 according to the images in
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
float tessellation_factor_min = float tessellation_factor_min =
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f; regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
float tessellation_factor_max = float tessellation_factor_max =
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f; regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
dirty |= system_constants_.tessellation_factor_range_min != dirty |= system_constants_.tessellation_factor_range_min !=
tessellation_factor_min; tessellation_factor_min;
system_constants_.tessellation_factor_range_min = tessellation_factor_min; system_constants_.tessellation_factor_range_min = tessellation_factor_min;
@ -3280,12 +3281,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
while (xe::bit_scan_forward(user_clip_planes_remaining, while (xe::bit_scan_forward(user_clip_planes_remaining,
&user_clip_plane_index)) { &user_clip_plane_index)) {
user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index); user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index);
const float* user_clip_plane = const void* user_clip_plane_regs =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32; &regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane, if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float))) { 4 * sizeof(float))) {
dirty = true; dirty = true;
std::memcpy(user_clip_plane_write_ptr, user_clip_plane, std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float)); 4 * sizeof(float));
} }
user_clip_plane_write_ptr += 4; user_clip_plane_write_ptr += 4;
@ -3423,9 +3424,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
color_exp_bias -= 5; color_exp_bias -= 5;
} }
} }
float color_exp_bias_scale; auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) = int32_t(0x3F800000 + (color_exp_bias << 23)));
0x3F800000 + (color_exp_bias << 23);
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
system_constants_.color_exp_bias[i] = color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale;
if (edram_rov_used) { if (edram_rov_used) {
@ -3454,7 +3454,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float)); 4 * sizeof(float));
uint32_t blend_factors_ops = uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] != dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops; blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3477,22 +3477,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) { if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable) { if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale = poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset; poly_offset_back_offset = poly_offset_front_offset;
} }
@ -3567,21 +3567,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
} }
dirty |= system_constants_.edram_blend_constant[0] != dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] = system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] != dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] = system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] != dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] = system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] != dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] = system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
} }
cbuffer_binding_system_.up_to_date &= !dirty; cbuffer_binding_system_.up_to_date &= !dirty;
@ -3638,10 +3638,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
// These are the constant base addresses/ranges for shaders. // These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox // We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts. // Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex = const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map(); vertex_shader->constant_register_map();
@ -3715,8 +3715,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants, std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
4 * sizeof(float)); 4 * sizeof(float));
float_constants += 4 * sizeof(float); float_constants += 4 * sizeof(float);
} }
@ -3746,8 +3745,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants, std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
4 * sizeof(float)); 4 * sizeof(float));
float_constants += 4 * sizeof(float); float_constants += 4 * sizeof(float);
} }
@ -3767,7 +3765,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
return false; return false;
} }
std::memcpy(bool_loop_constants, std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize); kBoolLoopConstantsSize);
cbuffer_binding_bool_loop_.up_to_date = true; cbuffer_binding_bool_loop_.up_to_date = true;
current_graphics_root_up_to_date_ &= current_graphics_root_up_to_date_ &=
@ -3782,8 +3780,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
if (fetch_constants == nullptr) { if (fetch_constants == nullptr) {
return false; return false;
} }
std::memcpy(fetch_constants, std::memcpy(fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
&regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
kFetchConstantsSize); kFetchConstantsSize);
cbuffer_binding_fetch_.up_to_date = true; cbuffer_binding_fetch_.up_to_date = true;
current_graphics_root_up_to_date_ &= current_graphics_root_up_to_date_ &=

View File

@ -960,8 +960,8 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const { const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters; SamplerParameters parameters;
@ -1441,8 +1441,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out, D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
xenos::TextureFormat& format_out) { xenos::TextureFormat& format_out) {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
TextureKey key; TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr); BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 || if (!key.is_valid || key.base_page == 0 ||

View File

@ -15,6 +15,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/cvar.h" #include "xenia/base/cvar.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/ucode.h" #include "xenia/gpu/ucode.h"
@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
point_size_ = value[0]; point_size_ = value[0];
} }
if (value_mask & 0b0100) { if (value_mask & 0b0100) {
vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]); vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
} }
} }
} }
@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
xenos::Endian index_endian = vgt_dma_size.swap_mode; xenos::Endian index_endian = vgt_dma_size.swap_mode;
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) { if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
xenos::IndexFormat index_format = vgt_draw_initiator.index_size; xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32; uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
uint32_t index_buffer_read_count = uint32_t index_buffer_read_count =
std::min(uint32_t(vgt_draw_initiator.num_indices), std::min(uint32_t(vgt_draw_initiator.num_indices),
uint32_t(vgt_dma_size.num_words)); uint32_t(vgt_dma_size.num_words));
@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f; : 1.0f;
float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena float viewport_y_offset =
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 pa_cl_vte_cntl.vport_y_offset_ena
: 0.0f; ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f;
int32_t point_vertex_min_diameter_float = 0; int32_t point_vertex_min_diameter_float = 0;
int32_t point_vertex_max_diameter_float = 0; int32_t point_vertex_max_diameter_float = 0;
float point_constant_radius_y = 0.0f; float point_constant_radius_y = 0.0f;
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>(); auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
*reinterpret_cast<float*>(&point_vertex_min_diameter_float) = point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
*reinterpret_cast<float*>(&point_vertex_max_diameter_float) = point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
point_constant_radius_y = point_constant_radius_y =
float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f); float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
} }
@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
// Vertex-specified diameter. Clamped effectively as a signed integer in // Vertex-specified diameter. Clamped effectively as a signed integer in
// the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN // the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
// to the maximum. // to the maximum.
point_radius_y = position_y_export_sink.point_size().value(); point_radius_y =
*reinterpret_cast<int32_t*>(&point_radius_y) = std::min( 0.5f *
point_vertex_max_diameter_float, xe::memory::Reinterpret<float>(std::min(
std::max(point_vertex_min_diameter_float, point_vertex_max_diameter_float,
*reinterpret_cast<const int32_t*>(&point_radius_y))); std::max(point_vertex_min_diameter_float,
point_radius_y *= 0.5f; xe::memory::Reinterpret<int32_t>(
position_y_export_sink.point_size().value()))));
} else { } else {
// Constant radius. // Constant radius.
point_radius_y = point_constant_radius_y; point_radius_y = point_constant_radius_y;
@ -331,11 +334,12 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
} }
// Then apply the floating-point viewport offset. // Then apply the floating-point viewport offset.
if (pa_cl_vte_cntl.vport_y_offset_ena) { if (pa_cl_vte_cntl.vport_y_offset_ena) {
viewport_bottom += regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; viewport_bottom += regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
} }
viewport_bottom += pa_cl_vte_cntl.vport_y_scale_ena viewport_bottom +=
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) pa_cl_vte_cntl.vport_y_scale_ena
: 1.0f; ? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
: 1.0f;
// Using floor, or, rather, truncation (because maxing with zero anyway) // Using floor, or, rather, truncation (because maxing with zero anyway)
// similar to how viewport scissoring behaves on real AMD, Intel and Nvidia // similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
// GPUs on Direct3D 12 (but not WARP), also like in // GPUs on Direct3D 12 (but not WARP), also like in

View File

@ -11,7 +11,6 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstring>
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/cvar.h" #include "xenia/base/cvar.h"
@ -100,20 +99,20 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
// ones that are rendered (except for shadow volumes). // ones that are rendered (except for shadow volumes).
if (pa_su_sc_mode_cntl.poly_offset_front_enable && if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
!pa_su_sc_mode_cntl.cull_front) { !pa_su_sc_mode_cntl.cull_front) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable && if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
!pa_su_sc_mode_cntl.cull_back && !scale && !offset) { !pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
// Non-triangle primitives use the front offset, but it's toggled via // Non-triangle primitives use the front offset, but it's toggled via
// poly_offset_para_enable. // poly_offset_para_enable.
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
} }
scale_out = scale; scale_out = scale;
@ -148,7 +147,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
} }
// Check if a color target is actually written. // Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t rts_remaining = shader.writes_color_targets(); uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index; uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) { while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
@ -311,24 +310,26 @@ void GetHostViewportInfo(const RegisterFile& regs,
// Obtain the original viewport values in a normalized way. // Obtain the original viewport values in a normalized way.
float scale_xy[] = { float scale_xy[] = {
pa_cl_vte_cntl.vport_x_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 pa_cl_vte_cntl.vport_x_scale_ena
: 1.0f, ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1.0f,
: 1.0f, pa_cl_vte_cntl.vport_y_scale_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
}; };
float scale_z = pa_cl_vte_cntl.vport_z_scale_ena float scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f; : 1.0f;
float offset_base_xy[] = { float offset_base_xy[] = {
pa_cl_vte_cntl.vport_x_offset_ena pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
: 0.0f, : 0.0f,
pa_cl_vte_cntl.vport_y_offset_ena pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f, : 0.0f,
}; };
float offset_z = pa_cl_vte_cntl.vport_z_offset_ena float offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f; : 0.0f;
// Calculate all the integer.0 or integer.5 offsetting exactly at full // Calculate all the integer.0 or integer.5 offsetting exactly at full
// precision, separately so it can be used in other integer calculations // precision, separately so it can be used in other integer calculations
@ -615,7 +616,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
return 0; return 0;
} }
uint32_t normalized_color_mask = 0; uint32_t normalized_color_mask = 0;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
// Exclude the render targets not statically written to by the pixel shader. // Exclude the render targets not statically written to by the pixel shader.
// If the shader doesn't write to a render target, it shouldn't be written // If the shader doesn't write to a render target, it shouldn't be written
@ -661,9 +662,8 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
? regs.Get<reg::SQ_VS_CONST>().base ? regs.Get<reg::SQ_VS_CONST>().base
: regs.Get<reg::SQ_PS_CONST>().base; : regs.Get<reg::SQ_PS_CONST>().base;
for (uint32_t constant_index : shader.memexport_stream_constants()) { for (uint32_t constant_index : shader.memexport_stream_constants()) {
const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>( xenos::xe_gpu_memexport_stream_t stream =
XE_GPU_REG_SHADER_CONSTANT_000_X + regs.GetMemExportStream(float_constants_base + constant_index);
(float_constants_base + constant_index) * 4);
if (!stream.index_count) { if (!stream.index_count) {
continue; continue;
} }
@ -705,7 +705,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
} }
// Add a new range if haven't expanded an existing one. // Add a new range if haven't expanded an existing one.
if (!range_reused) { if (!range_reused) {
ranges_out.emplace_back(stream.base_address, stream_size_bytes); ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
} }
} }
} }
@ -824,8 +824,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
// Get the extent of pixels covered by the resolve rectangle, according to the // Get the extent of pixels covered by the resolve rectangle, according to the
// top-left rasterization rule. // top-left rasterization rule.
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU. // D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) { if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
XELOGE("Unsupported resolve vertex buffer format"); XELOGE("Unsupported resolve vertex buffer format");
assert_always(); assert_always();
@ -994,7 +993,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
} }
// Calculate the destination memory extent. // Calculate the destination memory extent.
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
uint32_t copy_dest_base_adjusted = rb_copy_dest_base; uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
uint32_t copy_dest_extent_start, copy_dest_extent_end; uint32_t copy_dest_extent_start, copy_dest_extent_end;
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>(); auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
@ -1164,9 +1163,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
info_out.copy_dest_info.copy_dest_swap = false; info_out.copy_dest_info.copy_dest_swap = false;
} }
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
XELOGD( XELOGD(
"Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially " "Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "

View File

@ -17,6 +17,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -1102,10 +1103,10 @@ struct Src : OperandAddress {
} }
static Src LI(int32_t x) { return LI(x, x, x, x); } static Src LI(int32_t x) { return LI(x, x, x, x); }
static Src LF(float x, float y, float z, float w) { static Src LF(float x, float y, float z, float w) {
return LU(*reinterpret_cast<const uint32_t*>(&x), return LU(xe::memory::Reinterpret<uint32_t>(x),
*reinterpret_cast<const uint32_t*>(&y), xe::memory::Reinterpret<uint32_t>(y),
*reinterpret_cast<const uint32_t*>(&z), xe::memory::Reinterpret<uint32_t>(z),
*reinterpret_cast<const uint32_t*>(&w)); xe::memory::Reinterpret<uint32_t>(w));
} }
static Src LF(float x) { return LF(x, x, x, x); } static Src LF(float x) { return LF(x, x, x, x); }
static Src LP(const uint32_t* xyzw) { static Src LP(const uint32_t* xyzw) {
@ -1222,12 +1223,10 @@ struct Src : OperandAddress {
bool negate) { bool negate) {
if (is_integer) { if (is_integer) {
if (absolute) { if (absolute) {
*reinterpret_cast<int32_t*>(&value) = value = uint32_t(std::abs(int32_t(value)));
std::abs(*reinterpret_cast<const int32_t*>(&value));
} }
if (negate) { if (negate) {
*reinterpret_cast<int32_t*>(&value) = value = uint32_t(-int32_t(value));
-*reinterpret_cast<const int32_t*>(&value);
} }
} else { } else {
if (absolute) { if (absolute) {

View File

@ -201,7 +201,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
} }
assert_true(r < RegisterFile::kRegisterCount); assert_true(r < RegisterFile::kRegisterCount);
return register_file_.values[r].u32; return register_file_.values[r];
} }
void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
@ -219,7 +219,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
} }
assert_true(r < RegisterFile::kRegisterCount); assert_true(r < RegisterFile::kRegisterCount);
register_file_.values[r].u32 = value; register_file_.values[r] = value;
} }
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) { void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {

View File

@ -42,7 +42,7 @@ struct PacketAction {
union { union {
struct { struct {
uint32_t index; uint32_t index;
RegisterFile::RegisterValue value; uint32_t value;
} register_write; } register_write;
struct { struct {
uint64_t value; uint64_t value;
@ -56,7 +56,7 @@ struct PacketAction {
PacketAction action; PacketAction action;
action.type = Type::kRegisterWrite; action.type = Type::kRegisterWrite;
action.register_write.index = index; action.register_write.index = index;
action.register_write.value.u32 = value; action.register_write.value = value;
return action; return action;
} }

View File

@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 = uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety. // The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & guest_index_base =
~uint32_t((1 << index_size_log2) - 1); regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2; << index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize || if (guest_index_base > SharedMemory::kBufferSize ||
@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 = uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety. // The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & guest_index_base =
~uint32_t((1 << index_size_log2) - 1); regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2; << index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize || if (guest_index_base > SharedMemory::kBufferSize ||

View File

@ -12,8 +12,12 @@
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/memory.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -34,39 +38,53 @@ class RegisterFile {
static const RegisterInfo* GetRegisterInfo(uint32_t index); static const RegisterInfo* GetRegisterInfo(uint32_t index);
static constexpr size_t kRegisterCount = 0x5003; static constexpr size_t kRegisterCount = 0x5003;
union RegisterValue { uint32_t values[kRegisterCount];
uint32_t u32;
float f32; const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
}; uint32_t& operator[](uint32_t reg) { return values[reg]; }
RegisterValue values[kRegisterCount];
const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](Register reg) const { return values[reg]; }
RegisterValue& operator[](Register reg) { return values[reg]; }
template <typename T> template <typename T>
const T& Get(uint32_t reg) const { T Get(uint32_t reg) const {
return *reinterpret_cast<const T*>(&values[reg]); return xe::memory::Reinterpret<T>(values[reg]);
} }
template <typename T> template <typename T>
T& Get(uint32_t reg) { T Get(Register reg) const {
return *reinterpret_cast<T*>(&values[reg]); return Get<T>(static_cast<uint32_t>(reg));
} }
template <typename T> template <typename T>
const T& Get(Register reg) const { T Get() const {
return *reinterpret_cast<const T*>(&values[reg]); return Get<T>(T::register_index);
} }
template <typename T>
T& Get(Register reg) { xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
return *reinterpret_cast<T*>(&values[reg]); assert_true(index < 96);
xenos::xe_gpu_vertex_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
} }
template <typename T>
const T& Get() const { xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
return *reinterpret_cast<const T*>(&values[T::register_index]); assert_true(index < 32);
xenos::xe_gpu_texture_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
} }
template <typename T>
T& Get() { xenos::xe_gpu_memexport_stream_t GetMemExportStream(
return *reinterpret_cast<T*>(&values[T::register_index]); uint32_t float_constant_index) const {
assert_true(float_constant_index < 512);
xenos::xe_gpu_memexport_stream_t stream;
std::memcpy(
&stream,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
sizeof(stream));
return stream;
} }
}; };

View File

@ -28,10 +28,7 @@ void ShaderInterpreter::Execute() {
state_.Reset(); state_.Reset();
const uint32_t* bool_constants = const uint32_t* bool_constants =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32; &register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
const xenos::LoopConstant* loop_constants =
reinterpret_cast<const xenos::LoopConstant*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
bool exec_ended = false; bool exec_ended = false;
uint32_t cf_index_next = 1; uint32_t cf_index_next = 1;
@ -140,8 +137,8 @@ void ShaderInterpreter::Execute() {
cf_index_next = cf_loop_start.address(); cf_index_next = cf_loop_start.address();
continue; continue;
} }
xenos::LoopConstant loop_constant = auto loop_constant = register_file_.Get<xenos::LoopConstant>(
loop_constants[cf_loop_start.loop_id()]; XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
state_.loop_constants[state_.loop_stack_depth] = loop_constant; state_.loop_constants[state_.loop_stack_depth] = loop_constant;
uint32_t& loop_iterator_ref = uint32_t& loop_iterator_ref =
state_.loop_iterators[state_.loop_stack_depth]; state_.loop_iterators[state_.loop_stack_depth];
@ -170,8 +167,11 @@ void ShaderInterpreter::Execute() {
&cf_instr); &cf_instr);
xenos::LoopConstant loop_constant = xenos::LoopConstant loop_constant =
state_.loop_constants[state_.loop_stack_depth - 1]; state_.loop_constants[state_.loop_stack_depth - 1];
assert_true(loop_constant.value == assert_zero(
loop_constants[cf_loop_end.loop_id()].value); std::memcmp(&loop_constant,
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
cf_loop_end.loop_id()],
sizeof(loop_constant)));
uint32_t loop_iterator = uint32_t loop_iterator =
++state_.loop_iterators[state_.loop_stack_depth - 1]; ++state_.loop_iterators[state_.loop_stack_depth - 1];
if (loop_iterator < loop_constant.count && if (loop_iterator < loop_constant.count &&
@ -257,28 +257,31 @@ void ShaderInterpreter::Execute() {
} }
} }
const float* ShaderInterpreter::GetFloatConstant( const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const { uint32_t address, bool is_relative, bool relative_address_is_a0) const {
static const float zero[4] = {};
int32_t index = int32_t(address); int32_t index = int32_t(address);
if (is_relative) { if (is_relative) {
index += relative_address_is_a0 ? state_.address_register index += relative_address_is_a0 ? state_.address_register
: state_.GetLoopAddress(); : state_.GetLoopAddress();
} }
if (index < 0) { if (index < 0) {
return zero; return std::array<float, 4>();
} }
auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>( auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
: XE_GPU_REG_SQ_PS_CONST); : XE_GPU_REG_SQ_PS_CONST);
if (uint32_t(index) > base_and_size_minus_1.size) { if (uint32_t(index) > base_and_size_minus_1.size) {
return zero; return std::array<float, 4>();
} }
index += base_and_size_minus_1.base; index += base_and_size_minus_1.base;
if (index >= 512) { if (index >= 512) {
return zero; return std::array<float, 4>();
} }
return &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32; std::array<float, 4> value;
std::memcpy(value.data(),
&register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
sizeof(float) * 4);
return value;
} }
void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
@ -297,6 +300,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
const float* vector_src_ptr; const float* vector_src_ptr;
uint32_t vector_src_register = instr.src_reg(1 + i); uint32_t vector_src_register = instr.src_reg(1 + i);
bool vector_src_absolute = false; bool vector_src_absolute = false;
std::array<float, 4> vector_src_float_constant;
if (instr.src_is_temp(1 + i)) { if (instr.src_is_temp(1 + i)) {
vector_src_ptr = GetTempRegister( vector_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(vector_src_register), ucode::AluInstruction::src_temp_reg(vector_src_register),
@ -304,9 +308,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
vector_src_register); vector_src_register);
} else { } else {
vector_src_ptr = GetFloatConstant( vector_src_float_constant = GetFloatConstant(
vector_src_register, instr.src_const_is_addressed(1 + i), vector_src_register, instr.src_const_is_addressed(1 + i),
instr.is_const_address_register_relative()); instr.is_const_address_register_relative());
vector_src_ptr = vector_src_float_constant.data();
} }
uint32_t vector_src_absolute_mask = uint32_t vector_src_absolute_mask =
~(uint32_t(vector_src_absolute) << 31); ~(uint32_t(vector_src_absolute) << 31);
@ -618,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
// r#/c#.w or r#/c#.wx. // r#/c#.w or r#/c#.wx.
const float* scalar_src_ptr; const float* scalar_src_ptr;
uint32_t scalar_src_register = instr.src_reg(3); uint32_t scalar_src_register = instr.src_reg(3);
std::array<float, 4> scalar_src_float_constant;
if (instr.src_is_temp(3)) { if (instr.src_is_temp(3)) {
scalar_src_ptr = GetTempRegister( scalar_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(scalar_src_register), ucode::AluInstruction::src_temp_reg(scalar_src_register),
@ -625,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
scalar_src_register); scalar_src_register);
} else { } else {
scalar_src_ptr = GetFloatConstant( scalar_src_float_constant = GetFloatConstant(
scalar_src_register, instr.src_const_is_addressed(3), scalar_src_register, instr.src_const_is_addressed(3),
instr.is_const_address_register_relative()); instr.is_const_address_register_relative());
scalar_src_ptr = scalar_src_float_constant.data();
} }
uint32_t scalar_src_swizzle = instr.src_swizzle(3); uint32_t scalar_src_swizzle = instr.src_swizzle(3);
scalar_operand_component_count = scalar_operand_component_count =
@ -984,10 +991,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
state_.vfetch_full_last = instr; state_.vfetch_full_last = instr;
} }
xenos::xe_gpu_vertex_fetch_t fetch_constant = xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
*reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>( state_.vfetch_full_last.fetch_constant_index());
&register_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
state_.vfetch_full_last.fetch_constant_index()]);
if (!instr.is_mini_fetch()) { if (!instr.is_mini_fetch()) {
// Get the part of the address that depends on vfetch_full data. // Get the part of the address that depends on vfetch_full data.

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_INTERPRETER_H_ #define XENIA_GPU_SHADER_INTERPRETER_H_
#include <algorithm> #include <algorithm>
#include <array>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
@ -120,8 +121,8 @@ class ShaderInterpreter {
float* GetTempRegister(uint32_t address, bool is_relative) { float* GetTempRegister(uint32_t address, bool is_relative) {
return temp_registers_[GetTempRegisterIndex(address, is_relative)]; return temp_registers_[GetTempRegisterIndex(address, is_relative)];
} }
const float* GetFloatConstant(uint32_t address, bool is_relative, const std::array<float, 4> GetFloatConstant(
bool relative_address_is_a0) const; uint32_t address, bool is_relative, bool relative_address_is_a0) const;
void ExecuteAluInstruction(ucode::AluInstruction instr); void ExecuteAluInstruction(ucode::AluInstruction instr);
void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle, void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,

View File

@ -333,8 +333,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
uint32_t index_bit = UINT32_C(1) << index; uint32_t index_bit = UINT32_C(1) << index;
textures_remaining &= ~index_bit; textures_remaining &= ~index_bit;
TextureBinding& binding = texture_bindings_[index]; TextureBinding& binding = texture_bindings_[index];
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
TextureKey old_key = binding.key; TextureKey old_key = binding.key;
uint8_t old_swizzled_signs = binding.swizzled_signs; uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs); BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);

View File

@ -19,6 +19,7 @@
#include "xenia/base/filesystem.h" #include "xenia/base/filesystem.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform.h" #include "xenia/base/platform.h"
#include "xenia/base/string.h" #include "xenia/base/string.h"
#include "xenia/base/system.h" #include "xenia/base/system.h"
@ -357,9 +358,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
ImGui::NextColumn(); ImGui::NextColumn();
if (!register_info || if (!register_info ||
register_info->type == RegisterInfo::Type::kDword) { register_info->type == RegisterInfo::Type::kDword) {
ImGui::Text("%.8X", action.register_write.value.u32); ImGui::Text("%.8X", action.register_write.value);
} else { } else {
ImGui::Text("%8f", action.register_write.value.f32); ImGui::Text("%8f", xe::memory::Reinterpret<float>(
action.register_write.value));
} }
ImGui::Columns(1); ImGui::Columns(1);
break; break;
@ -709,10 +711,8 @@ void TraceViewer::DrawTextureInfo(
const Shader::TextureBinding& texture_binding) { const Shader::TextureBinding& texture_binding) {
auto& regs = *graphics_system_->register_file(); auto& regs = *graphics_system_->register_file();
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + xenos::xe_gpu_texture_fetch_t fetch =
texture_binding.fetch_constant * 6; regs.GetTextureFetch(texture_binding.fetch_constant);
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
if (fetch.type != xenos::FetchConstantType::kTexture && if (fetch.type != xenos::FetchConstantType::kTexture &&
(!cvars::gpu_allow_invalid_fetch_constants || (!cvars::gpu_allow_invalid_fetch_constants ||
fetch.type != xenos::FetchConstantType::kInvalidTexture)) { fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
@ -780,9 +780,9 @@ void TraceViewer::DrawFailedTextureInfo(
void TraceViewer::DrawVertexFetcher(Shader* shader, void TraceViewer::DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding, const Shader::VertexBinding& vertex_binding,
const xe_gpu_vertex_fetch_t* fetch) { const xe_gpu_vertex_fetch_t& fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2); const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
uint32_t vertex_count = fetch->size / vertex_binding.stride_words; uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
int column_count = 0; int column_count = 0;
for (const auto& attrib : vertex_binding.attributes) { for (const auto& attrib : vertex_binding.attributes) {
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
@ -883,7 +883,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
#define LOADEL(type, wo) \ #define LOADEL(type, wo) \
GpuSwap(xe::load<type>(vstart + \ GpuSwap(xe::load<type>(vstart + \
(attrib.fetch_instr.attributes.offset + wo) * 4), \ (attrib.fetch_instr.attributes.offset + wo) * 4), \
fetch->endian) fetch.endian)
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
case xenos::VertexFormat::k_32: case xenos::VertexFormat::k_32:
ImGui::Text("%.8X", LOADEL(uint32_t, 0)); ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@ -1187,7 +1187,7 @@ void TraceViewer::DrawStateUI() {
} }
auto enable_mode = auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
const char* mode_name = "Unknown"; const char* mode_name = "Unknown";
switch (enable_mode) { switch (enable_mode) {
@ -1210,7 +1210,7 @@ void TraceViewer::DrawStateUI() {
break; break;
} }
case ModeControl::kCopy: { case ModeControl::kCopy: {
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(), ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
copy_dest_base); copy_dest_base);
break; break;
@ -1221,9 +1221,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Viewport State:"); ImGui::BulletText("Viewport State:");
if (true) { if (true) {
ImGui::TreePush((const void*)0); ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if ((pa_su_sc_mode_cntl >> 16) & 1) { if ((pa_su_sc_mode_cntl >> 16) & 1) {
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
int16_t window_offset_x = window_offset & 0x7FFF; int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF; int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) { if (window_offset_x & 0x4000) {
@ -1237,8 +1237,8 @@ void TraceViewer::DrawStateUI() {
} else { } else {
ImGui::BulletText("Window Offset: disabled"); ImGui::BulletText("Window Offset: disabled");
} }
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
ImGui::BulletText( ImGui::BulletText(
"Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF, "Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
(window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
@ -1246,7 +1246,7 @@ void TraceViewer::DrawStateUI() {
(window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF), (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_br >> 16) & 0x7FFF) -
((window_scissor_tl >> 16) & 0x7FFF)); ((window_scissor_tl >> 16) & 0x7FFF));
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF; uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
uint32_t surface_pitch = surface_info & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = (surface_info >> 16) & 0x3; auto surface_msaa = (surface_info >> 16) & 0x3;
@ -1258,7 +1258,7 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Surface Pitch: %d", surface_pitch); ImGui::BulletText("Surface Pitch: %d", surface_pitch);
ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz); ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
@ -1273,14 +1273,20 @@ void TraceViewer::DrawStateUI() {
} }
ImGui::BulletText( ImGui::BulletText(
"Viewport Offset: %f, %f, %f", "Viewport Offset: %f, %f, %f",
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0, : 0.0f,
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0); vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f);
ImGui::BulletText( ImGui::BulletText(
"Viewport Scale: %f, %f, %f", "Viewport Scale: %f, %f, %f",
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, : 1.0f,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f);
if (!vport_xscale_enable) { if (!vport_xscale_enable) {
ImGui::PopStyleColor(); ImGui::PopStyleColor();
} }
@ -1290,7 +1296,7 @@ void TraceViewer::DrawStateUI() {
((vte_control >> 8) & 0x1) ? "y/w0" : "y", ((vte_control >> 8) & 0x1) ? "y/w0" : "y",
((vte_control >> 9) & 0x1) ? "z/w0" : "z", ((vte_control >> 9) & 0x1) ? "z/w0" : "z",
((vte_control >> 10) & 0x1) ? "w0" : "1/w0"); ((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0; bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
ImGui::BulletText("Clip Enabled: %s, DX Clip: %s", ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
@ -1302,11 +1308,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Rasterizer State:"); ImGui::BulletText("Rasterizer State:");
if (true) { if (true) {
ImGui::TreePush((const void*)0); ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
uint32_t pa_sc_screen_scissor_tl = uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
uint32_t pa_sc_screen_scissor_br =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) { if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF; int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@ -1361,7 +1365,7 @@ void TraceViewer::DrawStateUI() {
} }
ImGui::Columns(1); ImGui::Columns(1);
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_pitch = rb_surface_info & 0x3FFF; uint32_t surface_pitch = rb_surface_info & 0x3FFF;
auto surface_msaa = auto surface_msaa =
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3); static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
@ -1370,39 +1374,39 @@ void TraceViewer::DrawStateUI() {
if (enable_mode != ModeControl::kDepth) { if (enable_mode != ModeControl::kDepth) {
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32; uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
if ((color_control & 0x8) != 0) { if ((color_control & 0x8) != 0) {
ImGui::BulletText("Alpha Test: %s %.2f", ImGui::BulletText("Alpha Test: %s %.2f",
kCompareFuncNames[color_control & 0x7], kCompareFuncNames[color_control & 0x7],
regs[XE_GPU_REG_RB_ALPHA_REF].f32); regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
} else { } else {
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
ImGui::BulletText("Alpha Test: disabled"); ImGui::BulletText("Alpha Test: disabled");
ImGui::PopStyleColor(); ImGui::PopStyleColor();
} }
auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32, auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x, ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
blend_color.y, blend_color.z, blend_color.w); blend_color.y, blend_color.z, blend_color.w);
ImGui::SameLine(); ImGui::SameLine();
// TODO small_height (was true) parameter was removed // TODO small_height (was true) parameter was removed
ImGui::ColorButton(nullptr, blend_color); ImGui::ColorButton(nullptr, blend_color);
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t color_info[4] = { uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR_INFO],
regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO],
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO],
regs[XE_GPU_REG_RB_COLOR3_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO],
}; };
uint32_t rb_blendcontrol[4] = { uint32_t rb_blendcontrol[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, regs[XE_GPU_REG_RB_BLENDCONTROL0],
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, regs[XE_GPU_REG_RB_BLENDCONTROL1],
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, regs[XE_GPU_REG_RB_BLENDCONTROL2],
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, regs[XE_GPU_REG_RB_BLENDCONTROL3],
}; };
ImGui::Columns(2); ImGui::Columns(2);
for (int i = 0; i < xe::countof(color_info); ++i) { for (int i = 0; i < xe::countof(color_info); ++i) {
@ -1511,9 +1515,9 @@ void TraceViewer::DrawStateUI() {
} }
if (ImGui::CollapsingHeader("Depth/Stencil Target")) { if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
bool uses_depth = bool uses_depth =
(rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004); (rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
uint32_t stencil_ref = (rb_stencilrefmask & 0xFF); uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
@ -1697,10 +1701,9 @@ void TraceViewer::DrawStateUI() {
draw_info.index_buffer_size, draw_info.index_buffer_size,
kIndexFormatNames[int(draw_info.index_format)], kIndexFormatNames[int(draw_info.index_format)],
kEndiannessNames[int(draw_info.index_endianness)]); kEndiannessNames[int(draw_info.index_endianness)]);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if (pa_su_sc_mode_cntl & (1 << 21)) { if (pa_su_sc_mode_cntl & (1 << 21)) {
uint32_t reset_index = uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
if (draw_info.index_format == xenos::IndexFormat::kInt16) { if (draw_info.index_format == xenos::IndexFormat::kInt16) {
ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF); ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
} else { } else {
@ -1760,30 +1763,16 @@ void TraceViewer::DrawStateUI() {
auto shader = command_processor->active_vertex_shader(); auto shader = command_processor->active_vertex_shader();
if (shader) { if (shader) {
for (const auto& vertex_binding : shader->vertex_bindings()) { for (const auto& vertex_binding : shader->vertex_bindings()) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + xe_gpu_vertex_fetch_t fetch =
(vertex_binding.fetch_constant / 3) * 6; regs.GetVertexFetch(vertex_binding.fetch_constant);
const auto group = assert_true(fetch.endian == xenos::Endian::k8in32);
reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (vertex_binding.fetch_constant % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
assert_true(fetch->endian == xenos::Endian::k8in32);
char tree_root_id[32]; char tree_root_id[32];
sprintf(tree_root_id, "#vertices_root_%d", sprintf(tree_root_id, "#vertices_root_%d",
vertex_binding.fetch_constant); vertex_binding.fetch_constant);
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s", if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
vertex_binding.fetch_constant, fetch->address << 2, vertex_binding.fetch_constant, fetch.address << 2,
fetch->size * 4, fetch.size * 4,
kEndiannessNames[int(fetch->endian)])) { kEndiannessNames[int(fetch.endian)])) {
ImGui::BeginChild("#vertices", ImVec2(0, 300)); ImGui::BeginChild("#vertices", ImVec2(0, 300));
DrawVertexFetcher(shader, vertex_binding, fetch); DrawVertexFetcher(shader, vertex_binding, fetch);
ImGui::EndChild(); ImGui::EndChild();
@ -1831,7 +1820,7 @@ void TraceViewer::DrawStateUI() {
ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6, ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
(i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6); (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1842,8 +1831,9 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) { i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4); ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32, ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
regs[i + 2].f32, regs[i + 3].f32); regs.Get<float>(i + 1), regs.Get<float>(i + 2),
regs.Get<float>(i + 3));
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1856,7 +1846,7 @@ void TraceViewer::DrawStateUI() {
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32, (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31); (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1867,7 +1857,7 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) { i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00); ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);

View File

@ -122,7 +122,7 @@ class TraceViewer : public xe::ui::WindowedApp {
void DrawVertexFetcher(Shader* shader, void DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding, const Shader::VertexBinding& vertex_binding,
const xenos::xe_gpu_vertex_fetch_t* fetch); const xenos::xe_gpu_vertex_fetch_t& fetch);
TraceViewerWindowListener window_listener_; TraceViewerWindowListener window_listener_;

View File

@ -2486,8 +2486,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
(uint64_t(1) << (vfetch_index & 63))) { (uint64_t(1) << (vfetch_index & 63))) {
continue; continue;
} }
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t vfetch_constant =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) { switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex: case xenos::FetchConstantType::kVertex:
break; break;
@ -3285,10 +3285,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Blend constants. // Blend constants.
float blend_constants[] = { float blend_constants[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
}; };
dynamic_blend_constants_update_needed_ |= dynamic_blend_constants_update_needed_ |=
std::memcmp(dynamic_blend_constants_, blend_constants, std::memcmp(dynamic_blend_constants_, blend_constants,
@ -3434,7 +3434,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
const RegisterFile& regs = *register_file_; const RegisterFile& regs = *register_file_;
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>(); auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3442,7 +3442,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF); regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>(); auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>(); auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
bool edram_fragment_shader_interlock = bool edram_fragment_shader_interlock =
render_target_cache_->GetPath() == render_target_cache_->GetPath() ==
@ -3755,7 +3755,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags; system_constants_.edram_rt_format_flags[i] = format_flags;
uint32_t blend_factors_ops = uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] != dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops; blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3784,22 +3784,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) { if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable) { if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale = poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset; poly_offset_back_offset = poly_offset_front_offset;
} }
@ -3862,21 +3862,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
} }
dirty |= system_constants_.edram_blend_constant[0] != dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] = system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] != dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] = system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] != dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] = system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] != dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] = system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
} }
if (dirty) { if (dirty) {
@ -3903,10 +3903,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
// These are the constant base addresses/ranges for shaders. // These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox // We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts. // Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex = const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map(); vertex_shader->constant_register_map();
@ -4001,8 +4001,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping, std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
sizeof(float) * 4); sizeof(float) * 4);
mapping += sizeof(float) * 4; mapping += sizeof(float) * 4;
} }
@ -4033,8 +4032,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping, std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
sizeof(float) * 4); sizeof(float) * 4);
mapping += sizeof(float) * 4; mapping += sizeof(float) * 4;
} }
@ -4055,7 +4053,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false; return false;
} }
buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize); kBoolLoopConstantsSize);
current_constant_buffers_up_to_date_ |= current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
@ -4073,7 +4071,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false; return false;
} }
buffer_info.range = VkDeviceSize(kFetchConstantsSize); buffer_info.range = VkDeviceSize(kFetchConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize); kFetchConstantsSize);
current_constant_buffers_up_to_date_ |= current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;

View File

@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
[common_blend_rt_index]), [common_blend_rt_index]),
(((normalized_color_mask & (((normalized_color_mask &
~(uint32_t(0b1111) << (4 * common_blend_rt_index))) ~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
? regs[XE_GPU_REG_RB_COLOR_MASK].u32 ? regs[XE_GPU_REG_RB_COLOR_MASK]
: normalized_color_mask) >> : normalized_color_mask) >>
(4 * common_blend_rt_index)) & (4 * common_blend_rt_index)) &
0b1111, 0b1111,

View File

@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
const VulkanShader::SamplerBinding& binding) const { const VulkanShader::SamplerBinding& binding) const {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters; SamplerParameters parameters;
@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
uint32_t& width_scaled_out, uint32_t& height_scaled_out, uint32_t& width_scaled_out, uint32_t& height_scaled_out,
xenos::TextureFormat& format_out) { xenos::TextureFormat& format_out) {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
TextureKey key; TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr); BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 || if (!key.is_valid || key.base_page == 0 ||

View File

@ -12,6 +12,7 @@
#include <cmath> #include <cmath>
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -118,8 +119,8 @@ float Float7e3To32(uint32_t f10) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0x7F; mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
} }
uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3); return xe::memory::Reinterpret<float>(
return *reinterpret_cast<const float*>(&f32); uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
} }
// Based on CFloat24 from d3dref9.dll and the 6e4 code from: // Based on CFloat24 from d3dref9.dll and the 6e4 code from:
@ -131,7 +132,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) {
// Positive only, and not -0 or NaN. // Positive only, and not -0 or NaN.
return 0; return 0;
} }
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32); auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
if (f32u32 >= 0x3FFFFFF8) { if (f32u32 >= 0x3FFFFFF8) {
// Saturate. // Saturate.
return 0xFFFFFF; return 0xFFFFFF;
@ -165,8 +166,8 @@ float Float20e4To32(uint32_t f24) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF; mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
} }
uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3); return xe::memory::Reinterpret<float>(
return *reinterpret_cast<const float*>(&f32); uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
} }
const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) { const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {