[GPU] Remove register reinterpret_casts + WAIT_REG_MEM volatility

Hopefully prevents some potential #1971-like situations.

WAIT_REG_MEM's implementation also allowed the compiler to load the value
only once, which caused an infinite loop with the other changes in the
commit (even in debug builds), so it's now accessed as volatile. Possibly
it would be even better to replace it with some (acquire/release?) atomic
load/store some day at least for the registers actually seen as
participating in those waits.

Also fixes the endianness being handled only on the first wait iteration in
WAIT_REG_MEM.
This commit is contained in:
Triang3l 2024-05-12 17:17:30 +03:00
parent f0ad4f4587
commit 376bad5056
19 changed files with 336 additions and 318 deletions

View File

@ -18,6 +18,7 @@
#include "xenia/base/byte_stream.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/base/ring_buffer.h"
#include "xenia/gpu/gpu_flags.h"
@ -334,7 +335,8 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
return;
}
regs.values[index].u32 = value;
// Volatile for the WAIT_REG_MEM loop.
const_cast<volatile uint32_t&>(regs.values[index]) = value;
if (!regs.GetRegisterInfo(index)) {
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
}
@ -342,19 +344,20 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
// Enabled - write to address.
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
}
} else {
switch (index) {
// If this is a COHER register, set the dirty flag.
// This will block the command processor the next time it WAIT_MEM_REGs
// This will block the command processor the next time it WAIT_REG_MEMs
// and allow us to synchronize the memory.
case XE_GPU_REG_COHER_STATUS_HOST: {
regs.values[index].u32 |= UINT32_C(0x80000000);
const_cast<volatile uint32_t&>(regs.values[index]) |=
UINT32_C(0x80000000);
} break;
case XE_GPU_REG_DC_LUT_RW_INDEX: {
@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
// enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_PWL_DATA: {
// Should be in the PWL writing mode.
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// Bit 7 of the index is ignored for PWL.
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
// DC_LUT_PWL_DATA is likely in the red, green, blue order because
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_30_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
uint32_t gamma_ramp_write_enable_mask =
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
if (gamma_ramp_write_enable_mask) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() {
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
RegisterFile* regs = register_file_;
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
// Volatile because this may be called from the WAIT_REG_MEM loop.
volatile uint32_t* regs_volatile = register_file_->values;
auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
if (!status_host.status) {
return;
@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action);
// Mark coherent.
status_host.status = 0;
regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
}
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
SCOPE_profile_cpu_f("gpu");
// wait until a register or memory location is a specific value
uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
uint32_t ref = reader->ReadAndSwap<uint32_t>();
uint32_t mask = reader->ReadAndSwap<uint32_t>();
uint32_t wait = reader->ReadAndSwap<uint32_t>();
bool is_memory = (wait_info & 0x10) != 0;
assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
const volatile uint32_t& value_ref =
is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
poll_reg_addr & ~uint32_t(0x3)))
: register_file_->values[poll_reg_addr];
bool matched = false;
do {
uint32_t value;
if (wait_info & 0x10) {
// Memory.
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
value = GpuSwap(value, endianness);
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
uint32_t value = value_ref;
if (is_memory) {
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
sizeof(uint32_t));
value = xenos::GpuSwap(value,
static_cast<xenos::Endian>(poll_reg_addr & 0x3));
} else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
MakeCoherent();
value = register_file_->values[poll_reg_addr].u32;
value = value_ref;
}
}
switch (wait_info & 0x7) {
@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
uint32_t rmw_info = reader->ReadAndSwap<uint32_t>();
uint32_t and_mask = reader->ReadAndSwap<uint32_t>();
uint32_t or_mask = reader->ReadAndSwap<uint32_t>();
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
uint32_t value = register_file_->values[rmw_info & 0x1FFF];
if ((rmw_info >> 31) & 0x1) {
// & reg
value &= register_file_->values[and_mask & 0x1FFF].u32;
value &= register_file_->values[and_mask & 0x1FFF];
} else {
// & imm
value &= and_mask;
}
if ((rmw_info >> 30) & 0x1) {
// | reg
value |= register_file_->values[or_mask & 0x1FFF].u32;
value |= register_file_->values[or_mask & 0x1FFF];
} else {
// | imm
value |= or_mask;
@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
uint32_t reg_val;
assert_true(reg_addr < RegisterFile::kRegisterCount);
reg_val = register_file_->values[reg_addr].u32;
reg_val = register_file_->values[reg_addr];
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
mem_addr &= ~0x3;
@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
} else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
value = register_file_->values[poll_reg_addr];
}
bool matched = false;
switch (wait_info & 0x7) {
@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
if (fake_sample_count >= 0) {
auto* pSampleCounts =
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32);
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
// and used to detect a finished query.
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
// The scan converter writes the internal result back to the register here.
// We just fake it and say it was visible in case it is read back.
if (id < 32) {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |=
uint32_t(1) << id;
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
<< id;
} else {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |=
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
uint32_t(1) << (id - 32);
}
}
@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() {
// Write the initial register values, to be loaded directly into the
// RegisterFile since all registers, including those that may have side
// effects on setting, will be saved.
trace_writer_.WriteRegisters(
0, reinterpret_cast<const uint32_t*>(register_file_->values),
RegisterFile::kRegisterCount, false);
trace_writer_.WriteRegisters(0, register_file_->values,
RegisterFile::kRegisterCount, false);
trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(),
gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_);

View File

@ -17,6 +17,7 @@
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
@ -2306,8 +2307,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
vfetch_bits_remaining &= ~(uint32_t(1) << j);
uint32_t vfetch_index = i * 32 + j;
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex:
break;
@ -3050,10 +3051,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
// Blend factor.
float blend_factor[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
};
// std::memcmp instead of != so in case of NaN, every draw won't be
// invalidating it.
@ -3100,7 +3101,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3241,9 +3242,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Tessellation factor range, plus 1.0 according to the images in
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
float tessellation_factor_min =
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
float tessellation_factor_max =
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
dirty |= system_constants_.tessellation_factor_range_min !=
tessellation_factor_min;
system_constants_.tessellation_factor_range_min = tessellation_factor_min;
@ -3280,12 +3281,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
while (xe::bit_scan_forward(user_clip_planes_remaining,
&user_clip_plane_index)) {
user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index);
const float* user_clip_plane =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32;
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane,
const void* user_clip_plane_regs =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float))) {
dirty = true;
std::memcpy(user_clip_plane_write_ptr, user_clip_plane,
std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float));
}
user_clip_plane_write_ptr += 4;
@ -3423,9 +3424,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
color_exp_bias -= 5;
}
}
float color_exp_bias_scale;
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
0x3F800000 + (color_exp_bias << 23);
auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
int32_t(0x3F800000 + (color_exp_bias << 23)));
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
if (edram_rov_used) {
@ -3454,7 +3454,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float));
uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3477,22 +3477,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
@ -3567,21 +3567,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
}
cbuffer_binding_system_.up_to_date &= !dirty;
@ -3638,10 +3638,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map();
@ -3715,8 +3715,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
@ -3746,8 +3745,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
@ -3767,7 +3765,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
return false;
}
std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize);
cbuffer_binding_bool_loop_.up_to_date = true;
current_graphics_root_up_to_date_ &=
@ -3782,8 +3780,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
if (fetch_constants == nullptr) {
return false;
}
std::memcpy(fetch_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
std::memcpy(fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize);
cbuffer_binding_fetch_.up_to_date = true;
current_graphics_root_up_to_date_ &=

View File

@ -960,8 +960,8 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters;
@ -1441,8 +1441,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
xenos::TextureFormat& format_out) {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 ||

View File

@ -15,6 +15,7 @@
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/ucode.h"
@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
point_size_ = value[0];
}
if (value_mask & 0b0100) {
vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]);
vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
}
}
}
@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
xenos::Endian index_endian = vgt_dma_size.swap_mode;
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
uint32_t index_buffer_read_count =
std::min(uint32_t(vgt_draw_initiator.num_indices),
uint32_t(vgt_dma_size.num_words));
@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f;
float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: 0.0f;
float viewport_y_offset =
pa_cl_vte_cntl.vport_y_offset_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f;
int32_t point_vertex_min_diameter_float = 0;
int32_t point_vertex_max_diameter_float = 0;
float point_constant_radius_y = 0.0f;
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
*reinterpret_cast<float*>(&point_vertex_min_diameter_float) =
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
*reinterpret_cast<float*>(&point_vertex_max_diameter_float) =
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
point_constant_radius_y =
float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
}
@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
// Vertex-specified diameter. Clamped effectively as a signed integer in
// the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
// to the maximum.
point_radius_y = position_y_export_sink.point_size().value();
*reinterpret_cast<int32_t*>(&point_radius_y) = std::min(
point_vertex_max_diameter_float,
std::max(point_vertex_min_diameter_float,
*reinterpret_cast<const int32_t*>(&point_radius_y)));
point_radius_y *= 0.5f;
point_radius_y =
0.5f *
xe::memory::Reinterpret<float>(std::min(
point_vertex_max_diameter_float,
std::max(point_vertex_min_diameter_float,
xe::memory::Reinterpret<int32_t>(
position_y_export_sink.point_size().value()))));
} else {
// Constant radius.
point_radius_y = point_constant_radius_y;
@ -331,11 +334,12 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
}
// Then apply the floating-point viewport offset.
if (pa_cl_vte_cntl.vport_y_offset_ena) {
viewport_bottom += regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32;
viewport_bottom += regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
}
viewport_bottom += pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 1.0f;
viewport_bottom +=
pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
: 1.0f;
// Using floor, or, rather, truncation (because maxing with zero anyway)
// similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
// GPUs on Direct3D 12 (but not WARP), also like in

View File

@ -11,7 +11,6 @@
#include <algorithm>
#include <cmath>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
@ -100,20 +99,20 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
// ones that are rendered (except for shadow volumes).
if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
!pa_su_sc_mode_cntl.cull_front) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
!pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
// Non-triangle primitives use the front offset, but it's toggled via
// poly_offset_para_enable.
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
}
scale_out = scale;
@ -148,7 +147,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
}
// Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
@ -311,24 +310,26 @@ void GetHostViewportInfo(const RegisterFile& regs,
// Obtain the original viewport values in a normalized way.
float scale_xy[] = {
pa_cl_vte_cntl.vport_x_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32
: 1.0f,
pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: 1.0f,
pa_cl_vte_cntl.vport_x_scale_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
: 1.0f,
pa_cl_vte_cntl.vport_y_scale_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
};
float scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f;
float offset_base_xy[] = {
pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
: 0.0f,
pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
};
float offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f;
// Calculate all the integer.0 or integer.5 offsetting exactly at full
// precision, separately so it can be used in other integer calculations
@ -615,7 +616,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
return 0;
}
uint32_t normalized_color_mask = 0;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
// Exclude the render targets not statically written to by the pixel shader.
// If the shader doesn't write to a render target, it shouldn't be written
@ -661,9 +662,8 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
? regs.Get<reg::SQ_VS_CONST>().base
: regs.Get<reg::SQ_PS_CONST>().base;
for (uint32_t constant_index : shader.memexport_stream_constants()) {
const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_000_X +
(float_constants_base + constant_index) * 4);
xenos::xe_gpu_memexport_stream_t stream =
regs.GetMemExportStream(float_constants_base + constant_index);
if (!stream.index_count) {
continue;
}
@ -705,7 +705,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
}
// Add a new range if haven't expanded an existing one.
if (!range_reused) {
ranges_out.emplace_back(stream.base_address, stream_size_bytes);
ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
}
}
}
@ -824,8 +824,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
// Get the extent of pixels covered by the resolve rectangle, according to the
// top-left rasterization rule.
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
XELOGE("Unsupported resolve vertex buffer format");
assert_always();
@ -994,7 +993,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
}
// Calculate the destination memory extent.
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
uint32_t copy_dest_extent_start, copy_dest_extent_end;
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
@ -1164,9 +1163,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
info_out.copy_dest_info.copy_dest_swap = false;
}
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
XELOGD(
"Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "

View File

@ -17,6 +17,7 @@
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe {
namespace gpu {
@ -1102,10 +1103,10 @@ struct Src : OperandAddress {
}
static Src LI(int32_t x) { return LI(x, x, x, x); }
static Src LF(float x, float y, float z, float w) {
return LU(*reinterpret_cast<const uint32_t*>(&x),
*reinterpret_cast<const uint32_t*>(&y),
*reinterpret_cast<const uint32_t*>(&z),
*reinterpret_cast<const uint32_t*>(&w));
return LU(xe::memory::Reinterpret<uint32_t>(x),
xe::memory::Reinterpret<uint32_t>(y),
xe::memory::Reinterpret<uint32_t>(z),
xe::memory::Reinterpret<uint32_t>(w));
}
static Src LF(float x) { return LF(x, x, x, x); }
static Src LP(const uint32_t* xyzw) {
@ -1222,12 +1223,10 @@ struct Src : OperandAddress {
bool negate) {
if (is_integer) {
if (absolute) {
*reinterpret_cast<int32_t*>(&value) =
std::abs(*reinterpret_cast<const int32_t*>(&value));
value = uint32_t(std::abs(int32_t(value)));
}
if (negate) {
*reinterpret_cast<int32_t*>(&value) =
-*reinterpret_cast<const int32_t*>(&value);
value = uint32_t(-int32_t(value));
}
} else {
if (absolute) {

View File

@ -201,7 +201,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
}
assert_true(r < RegisterFile::kRegisterCount);
return register_file_.values[r].u32;
return register_file_.values[r];
}
void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
@ -219,7 +219,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
}
assert_true(r < RegisterFile::kRegisterCount);
register_file_.values[r].u32 = value;
register_file_.values[r] = value;
}
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {

View File

@ -42,7 +42,7 @@ struct PacketAction {
union {
struct {
uint32_t index;
RegisterFile::RegisterValue value;
uint32_t value;
} register_write;
struct {
uint64_t value;
@ -56,7 +56,7 @@ struct PacketAction {
PacketAction action;
action.type = Type::kRegisterWrite;
action.register_write.index = index;
action.register_write.value.u32 = value;
action.register_write.value = value;
return action;
}

View File

@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
~uint32_t((1 << index_size_log2) - 1);
guest_index_base =
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize ||
@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
~uint32_t((1 << index_size_log2) - 1);
guest_index_base =
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize ||

View File

@ -12,8 +12,12 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/memory.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
@ -34,39 +38,53 @@ class RegisterFile {
static const RegisterInfo* GetRegisterInfo(uint32_t index);
static constexpr size_t kRegisterCount = 0x5003;
union RegisterValue {
uint32_t u32;
float f32;
};
RegisterValue values[kRegisterCount];
uint32_t values[kRegisterCount];
const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
uint32_t& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](Register reg) const { return values[reg]; }
RegisterValue& operator[](Register reg) { return values[reg]; }
template <typename T>
const T& Get(uint32_t reg) const {
return *reinterpret_cast<const T*>(&values[reg]);
T Get(uint32_t reg) const {
return xe::memory::Reinterpret<T>(values[reg]);
}
template <typename T>
T& Get(uint32_t reg) {
return *reinterpret_cast<T*>(&values[reg]);
T Get(Register reg) const {
return Get<T>(static_cast<uint32_t>(reg));
}
template <typename T>
const T& Get(Register reg) const {
return *reinterpret_cast<const T*>(&values[reg]);
T Get() const {
return Get<T>(T::register_index);
}
template <typename T>
T& Get(Register reg) {
return *reinterpret_cast<T*>(&values[reg]);
xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
assert_true(index < 96);
xenos::xe_gpu_vertex_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
}
template <typename T>
const T& Get() const {
return *reinterpret_cast<const T*>(&values[T::register_index]);
xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
assert_true(index < 32);
xenos::xe_gpu_texture_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
}
template <typename T>
T& Get() {
return *reinterpret_cast<T*>(&values[T::register_index]);
xenos::xe_gpu_memexport_stream_t GetMemExportStream(
uint32_t float_constant_index) const {
assert_true(float_constant_index < 512);
xenos::xe_gpu_memexport_stream_t stream;
std::memcpy(
&stream,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
sizeof(stream));
return stream;
}
};

View File

@ -28,10 +28,7 @@ void ShaderInterpreter::Execute() {
state_.Reset();
const uint32_t* bool_constants =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
const xenos::LoopConstant* loop_constants =
reinterpret_cast<const xenos::LoopConstant*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
bool exec_ended = false;
uint32_t cf_index_next = 1;
@ -140,8 +137,8 @@ void ShaderInterpreter::Execute() {
cf_index_next = cf_loop_start.address();
continue;
}
xenos::LoopConstant loop_constant =
loop_constants[cf_loop_start.loop_id()];
auto loop_constant = register_file_.Get<xenos::LoopConstant>(
XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
state_.loop_constants[state_.loop_stack_depth] = loop_constant;
uint32_t& loop_iterator_ref =
state_.loop_iterators[state_.loop_stack_depth];
@ -170,8 +167,11 @@ void ShaderInterpreter::Execute() {
&cf_instr);
xenos::LoopConstant loop_constant =
state_.loop_constants[state_.loop_stack_depth - 1];
assert_true(loop_constant.value ==
loop_constants[cf_loop_end.loop_id()].value);
assert_zero(
std::memcmp(&loop_constant,
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
cf_loop_end.loop_id()],
sizeof(loop_constant)));
uint32_t loop_iterator =
++state_.loop_iterators[state_.loop_stack_depth - 1];
if (loop_iterator < loop_constant.count &&
@ -257,28 +257,31 @@ void ShaderInterpreter::Execute() {
}
}
const float* ShaderInterpreter::GetFloatConstant(
const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const {
static const float zero[4] = {};
int32_t index = int32_t(address);
if (is_relative) {
index += relative_address_is_a0 ? state_.address_register
: state_.GetLoopAddress();
}
if (index < 0) {
return zero;
return std::array<float, 4>();
}
auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
: XE_GPU_REG_SQ_PS_CONST);
if (uint32_t(index) > base_and_size_minus_1.size) {
return zero;
return std::array<float, 4>();
}
index += base_and_size_minus_1.base;
if (index >= 512) {
return zero;
return std::array<float, 4>();
}
return &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32;
std::array<float, 4> value;
std::memcpy(value.data(),
&register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
sizeof(float) * 4);
return value;
}
void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
@ -297,6 +300,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
const float* vector_src_ptr;
uint32_t vector_src_register = instr.src_reg(1 + i);
bool vector_src_absolute = false;
std::array<float, 4> vector_src_float_constant;
if (instr.src_is_temp(1 + i)) {
vector_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(vector_src_register),
@ -304,9 +308,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
vector_src_register);
} else {
vector_src_ptr = GetFloatConstant(
vector_src_float_constant = GetFloatConstant(
vector_src_register, instr.src_const_is_addressed(1 + i),
instr.is_const_address_register_relative());
vector_src_ptr = vector_src_float_constant.data();
}
uint32_t vector_src_absolute_mask =
~(uint32_t(vector_src_absolute) << 31);
@ -618,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
// r#/c#.w or r#/c#.wx.
const float* scalar_src_ptr;
uint32_t scalar_src_register = instr.src_reg(3);
std::array<float, 4> scalar_src_float_constant;
if (instr.src_is_temp(3)) {
scalar_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(scalar_src_register),
@ -625,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
scalar_src_register);
} else {
scalar_src_ptr = GetFloatConstant(
scalar_src_float_constant = GetFloatConstant(
scalar_src_register, instr.src_const_is_addressed(3),
instr.is_const_address_register_relative());
scalar_src_ptr = scalar_src_float_constant.data();
}
uint32_t scalar_src_swizzle = instr.src_swizzle(3);
scalar_operand_component_count =
@ -984,10 +991,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
state_.vfetch_full_last = instr;
}
xenos::xe_gpu_vertex_fetch_t fetch_constant =
*reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
state_.vfetch_full_last.fetch_constant_index()]);
xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
state_.vfetch_full_last.fetch_constant_index());
if (!instr.is_mini_fetch()) {
// Get the part of the address that depends on vfetch_full data.

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_INTERPRETER_H_
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
@ -120,8 +121,8 @@ class ShaderInterpreter {
float* GetTempRegister(uint32_t address, bool is_relative) {
return temp_registers_[GetTempRegisterIndex(address, is_relative)];
}
const float* GetFloatConstant(uint32_t address, bool is_relative,
bool relative_address_is_a0) const;
const std::array<float, 4> GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const;
void ExecuteAluInstruction(ucode::AluInstruction instr);
void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,

View File

@ -333,8 +333,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
uint32_t index_bit = UINT32_C(1) << index;
textures_remaining &= ~index_bit;
TextureBinding& binding = texture_bindings_[index];
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
TextureKey old_key = binding.key;
uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);

View File

@ -19,6 +19,7 @@
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h"
#include "xenia/base/system.h"
@ -357,9 +358,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
ImGui::NextColumn();
if (!register_info ||
register_info->type == RegisterInfo::Type::kDword) {
ImGui::Text("%.8X", action.register_write.value.u32);
ImGui::Text("%.8X", action.register_write.value);
} else {
ImGui::Text("%8f", action.register_write.value.f32);
ImGui::Text("%8f", xe::memory::Reinterpret<float>(
action.register_write.value));
}
ImGui::Columns(1);
break;
@ -709,10 +711,8 @@ void TraceViewer::DrawTextureInfo(
const Shader::TextureBinding& texture_binding) {
auto& regs = *graphics_system_->register_file();
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
texture_binding.fetch_constant * 6;
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(texture_binding.fetch_constant);
if (fetch.type != xenos::FetchConstantType::kTexture &&
(!cvars::gpu_allow_invalid_fetch_constants ||
fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
@ -780,9 +780,9 @@ void TraceViewer::DrawFailedTextureInfo(
void TraceViewer::DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding,
const xe_gpu_vertex_fetch_t* fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
uint32_t vertex_count = fetch->size / vertex_binding.stride_words;
const xe_gpu_vertex_fetch_t& fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
int column_count = 0;
for (const auto& attrib : vertex_binding.attributes) {
switch (attrib.fetch_instr.attributes.data_format) {
@ -883,7 +883,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
#define LOADEL(type, wo) \
GpuSwap(xe::load<type>(vstart + \
(attrib.fetch_instr.attributes.offset + wo) * 4), \
fetch->endian)
fetch.endian)
switch (attrib.fetch_instr.attributes.data_format) {
case xenos::VertexFormat::k_32:
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@ -1187,7 +1187,7 @@ void TraceViewer::DrawStateUI() {
}
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
const char* mode_name = "Unknown";
switch (enable_mode) {
@ -1210,7 +1210,7 @@ void TraceViewer::DrawStateUI() {
break;
}
case ModeControl::kCopy: {
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
copy_dest_base);
break;
@ -1221,9 +1221,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Viewport State:");
if (true) {
ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if ((pa_su_sc_mode_cntl >> 16) & 1) {
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) {
@ -1237,8 +1237,8 @@ void TraceViewer::DrawStateUI() {
} else {
ImGui::BulletText("Window Offset: disabled");
}
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
ImGui::BulletText(
"Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
(window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
@ -1246,7 +1246,7 @@ void TraceViewer::DrawStateUI() {
(window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
((window_scissor_br >> 16) & 0x7FFF) -
((window_scissor_tl >> 16) & 0x7FFF));
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = (surface_info >> 16) & 0x3;
@ -1258,7 +1258,7 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Surface Pitch: %d", surface_pitch);
ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
@ -1273,14 +1273,20 @@ void TraceViewer::DrawStateUI() {
}
ImGui::BulletText(
"Viewport Offset: %f, %f, %f",
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0,
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0);
vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
: 0.0f,
vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f);
ImGui::BulletText(
"Viewport Scale: %f, %f, %f",
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
: 1.0f,
vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f);
if (!vport_xscale_enable) {
ImGui::PopStyleColor();
}
@ -1290,7 +1296,7 @@ void TraceViewer::DrawStateUI() {
((vte_control >> 8) & 0x1) ? "y/w0" : "y",
((vte_control >> 9) & 0x1) ? "z/w0" : "z",
((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
@ -1302,11 +1308,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Rasterizer State:");
if (true) {
ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_sc_screen_scissor_tl =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
uint32_t pa_sc_screen_scissor_br =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@ -1361,7 +1365,7 @@ void TraceViewer::DrawStateUI() {
}
ImGui::Columns(1);
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
auto surface_msaa =
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
@ -1370,39 +1374,39 @@ void TraceViewer::DrawStateUI() {
if (enable_mode != ModeControl::kDepth) {
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
if ((color_control & 0x8) != 0) {
ImGui::BulletText("Alpha Test: %s %.2f",
kCompareFuncNames[color_control & 0x7],
regs[XE_GPU_REG_RB_ALPHA_REF].f32);
regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
} else {
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
ImGui::BulletText("Alpha Test: disabled");
ImGui::PopStyleColor();
}
auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32);
auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
blend_color.y, blend_color.z, blend_color.w);
ImGui::SameLine();
// TODO small_height (was true) parameter was removed
ImGui::ColorButton(nullptr, blend_color);
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32,
regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
regs[XE_GPU_REG_RB_COLOR_INFO],
regs[XE_GPU_REG_RB_COLOR1_INFO],
regs[XE_GPU_REG_RB_COLOR2_INFO],
regs[XE_GPU_REG_RB_COLOR3_INFO],
};
uint32_t rb_blendcontrol[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL0],
regs[XE_GPU_REG_RB_BLENDCONTROL1],
regs[XE_GPU_REG_RB_BLENDCONTROL2],
regs[XE_GPU_REG_RB_BLENDCONTROL3],
};
ImGui::Columns(2);
for (int i = 0; i < xe::countof(color_info); ++i) {
@ -1511,9 +1515,9 @@ void TraceViewer::DrawStateUI() {
}
if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
bool uses_depth =
(rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
@ -1697,10 +1701,9 @@ void TraceViewer::DrawStateUI() {
draw_info.index_buffer_size,
kIndexFormatNames[int(draw_info.index_format)],
kEndiannessNames[int(draw_info.index_endianness)]);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if (pa_su_sc_mode_cntl & (1 << 21)) {
uint32_t reset_index =
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
if (draw_info.index_format == xenos::IndexFormat::kInt16) {
ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
} else {
@ -1760,30 +1763,16 @@ void TraceViewer::DrawStateUI() {
auto shader = command_processor->active_vertex_shader();
if (shader) {
for (const auto& vertex_binding : shader->vertex_bindings()) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
const auto group =
reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (vertex_binding.fetch_constant % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
assert_true(fetch->endian == xenos::Endian::k8in32);
xe_gpu_vertex_fetch_t fetch =
regs.GetVertexFetch(vertex_binding.fetch_constant);
assert_true(fetch.endian == xenos::Endian::k8in32);
char tree_root_id[32];
sprintf(tree_root_id, "#vertices_root_%d",
vertex_binding.fetch_constant);
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
vertex_binding.fetch_constant, fetch->address << 2,
fetch->size * 4,
kEndiannessNames[int(fetch->endian)])) {
vertex_binding.fetch_constant, fetch.address << 2,
fetch.size * 4,
kEndiannessNames[int(fetch.endian)])) {
ImGui::BeginChild("#vertices", ImVec2(0, 300));
DrawVertexFetcher(shader, vertex_binding, fetch);
ImGui::EndChild();
@ -1831,7 +1820,7 @@ void TraceViewer::DrawStateUI() {
ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
(i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1842,8 +1831,9 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
ImGui::NextColumn();
ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32,
regs[i + 2].f32, regs[i + 3].f32);
ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
regs.Get<float>(i + 1), regs.Get<float>(i + 2),
regs.Get<float>(i + 3));
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1856,7 +1846,7 @@ void TraceViewer::DrawStateUI() {
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1867,7 +1857,7 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);

View File

@ -122,7 +122,7 @@ class TraceViewer : public xe::ui::WindowedApp {
void DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding,
const xenos::xe_gpu_vertex_fetch_t* fetch);
const xenos::xe_gpu_vertex_fetch_t& fetch);
TraceViewerWindowListener window_listener_;

View File

@ -2486,8 +2486,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex:
break;
@ -3285,10 +3285,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Blend constants.
float blend_constants[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
};
dynamic_blend_constants_update_needed_ |=
std::memcmp(dynamic_blend_constants_, blend_constants,
@ -3434,7 +3434,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
const RegisterFile& regs = *register_file_;
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3442,7 +3442,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
bool edram_fragment_shader_interlock =
render_target_cache_->GetPath() ==
@ -3755,7 +3755,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags;
uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3784,22 +3784,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
@ -3862,21 +3862,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
}
dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
}
if (dirty) {
@ -3903,10 +3903,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map();
@ -4001,8 +4001,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
sizeof(float) * 4);
mapping += sizeof(float) * 4;
}
@ -4033,8 +4032,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
sizeof(float) * 4);
mapping += sizeof(float) * 4;
}
@ -4055,7 +4053,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false;
}
buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize);
current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
@ -4073,7 +4071,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false;
}
buffer_info.range = VkDeviceSize(kFetchConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize);
current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;

View File

@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
[common_blend_rt_index]),
(((normalized_color_mask &
~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
? regs[XE_GPU_REG_RB_COLOR_MASK].u32
? regs[XE_GPU_REG_RB_COLOR_MASK]
: normalized_color_mask) >>
(4 * common_blend_rt_index)) &
0b1111,

View File

@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
const VulkanShader::SamplerBinding& binding) const {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters;
@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
uint32_t& width_scaled_out, uint32_t& height_scaled_out,
xenos::TextureFormat& format_out) {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 ||

View File

@ -12,6 +12,7 @@
#include <cmath>
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe {
namespace gpu {
@ -118,8 +119,8 @@ float Float7e3To32(uint32_t f10) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
}
uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3);
return *reinterpret_cast<const float*>(&f32);
return xe::memory::Reinterpret<float>(
uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
}
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
@ -131,7 +132,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) {
// Positive only, and not -0 or NaN.
return 0;
}
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32);
auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
if (f32u32 >= 0x3FFFFFF8) {
// Saturate.
return 0xFFFFFF;
@ -165,8 +166,8 @@ float Float20e4To32(uint32_t f24) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
}
uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3);
return *reinterpret_cast<const float*>(&f32);
return xe::memory::Reinterpret<float>(
uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
}
const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {