Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental

This commit is contained in:
Gliniak 2024-05-31 22:43:59 +02:00
commit b3f2ab0e96
38 changed files with 3073 additions and 1908 deletions

View File

@ -122,7 +122,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
auto in = reinterpret_cast<const float*>(samples[j]); auto in = reinterpret_cast<const float*>(samples[j]);
// Raw samples sometimes aren't within [-1, 1] // Raw samples sometimes aren't within [-1, 1]
float scaled_sample = xe::saturate_signed(in[i]) * scale; float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale;
// Convert the sample and output it in big endian. // Convert the sample and output it in big endian.
auto sample = static_cast<int16_t>(scaled_sample); auto sample = static_cast<int16_t>(scaled_sample);

View File

@ -72,20 +72,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
return (value + multiple - 1) / multiple * multiple; return (value + multiple - 1) / multiple * multiple;
} }
// Using the same conventions as in shading languages, returning 0 for NaN. // For NaN, returns min_value (or, if it's NaN too, max_value).
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is // If either of the boundaries is zero, and if the value is at that boundary or
// always returned. Also -0 is not < +0, so +0 is also chosen for it. // exceeds it, the result will have the sign of that boundary. If both
// boundaries are zero, which sign is selected among the argument signs is not
// explicitly defined.
template <typename T> template <typename T>
constexpr T saturate_unsigned(T value) { T clamp_float(T value, T min_value, T max_value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(0.0f), value)); float clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
} }
// This diverges from the GPU NaN rules for signed normalized formats (NaN // Using the same conventions as in shading languages, returning 0 for NaN.
// should be converted to 0, not to -1), but this expectation is not needed most // 0 is always returned as positive.
// of time, and cannot be met for free (unlike for 0...1 clamping).
template <typename T> template <typename T>
constexpr T saturate_signed(T value) { T saturate(T value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(-1.0f), value)); return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
} }
// Gets the next power of two value that is greater than or equal to the given // Gets the next power of two value that is greater than or equal to the given
@ -365,12 +367,6 @@ inline uint64_t rotate_right(uint64_t v, uint8_t sh) {
} }
#endif // XE_PLATFORM_WIN32 #endif // XE_PLATFORM_WIN32
template <typename T>
T clamp(T value, T min_value, T max_value) {
const T t = value < min_value ? min_value : value;
return t > max_value ? max_value : t;
}
#if XE_ARCH_AMD64 #if XE_ARCH_AMD64
// Utilities for SSE values. // Utilities for SSE values.
template <int N> template <int N>

View File

@ -16,12 +16,37 @@
#include <functional> #include <functional>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <type_traits>
#include "xenia/base/byte_order.h" #include "xenia/base/byte_order.h"
namespace xe { namespace xe {
namespace memory { namespace memory {
// For variable declarations (not return values or `this` pointer).
// Not propagated.
#define XE_RESTRICT_VAR __restrict
// Aliasing-safe bit reinterpretation.
// For more complex cases such as non-trivially-copyable types, write copying
// code respecting the requirements for them externally instead of using these
// functions.
template <typename Dst, typename Src>
void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) {
static_assert(sizeof(Dst) == sizeof(Src));
static_assert(std::is_trivially_copyable_v<Dst>);
static_assert(std::is_trivially_copyable_v<Src>);
std::memcpy(&dst, &src, sizeof(Dst));
}
template <typename Dst, typename Src>
Dst Reinterpret(const Src& XE_RESTRICT_VAR src) {
Dst dst;
Reinterpret(dst, src);
return dst;
}
#if XE_PLATFORM_ANDROID #if XE_PLATFORM_ANDROID
void AndroidInitialize(); void AndroidInitialize();
void AndroidShutdown(); void AndroidShutdown();

View File

@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") {
auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch(); auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch();
auto error3 = wsys - wxsys; auto error3 = wsys - wxsys;
REQUIRE(error1 < 10ms); // In AppVeyor, the difference often can be as large as roughly 16ms.
REQUIRE(error1 > -10ms); REQUIRE(error1 < 20ms);
REQUIRE(error2 < 10ms); REQUIRE(error1 > -20ms);
REQUIRE(error2 > -10ms); REQUIRE(error2 < 20ms);
REQUIRE(error2 > -20ms);
REQUIRE(error3 < duration); REQUIRE(error3 < duration);
REQUIRE(error3 > -duration); REQUIRE(error3 > -duration);
} }

View File

@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height)); ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) { if (ImGui::IsItemActive()) {
function_pane_width += io.MouseDelta.x; function_pane_width += io.MouseDelta.x;
function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX); function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX);
} }
ImGui::SameLine(); ImGui::SameLine();
ImGui::BeginChild("##source_pane", ImGui::BeginChild("##source_pane",
@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height)); ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) { if (ImGui::IsItemActive()) {
source_pane_width += io.MouseDelta.x; source_pane_width += io.MouseDelta.x;
source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX); source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX);
} }
ImGui::SameLine(); ImGui::SameLine();
ImGui::BeginChild("##registers_pane", ImGui::BeginChild("##registers_pane",
@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height)); ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) { if (ImGui::IsItemActive()) {
registers_pane_width += io.MouseDelta.x; registers_pane_width += io.MouseDelta.x;
registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX); registers_pane_width =
xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX);
} }
ImGui::SameLine(); ImGui::SameLine();
ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true); ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true);
@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth)); ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth));
if (ImGui::IsItemActive()) { if (ImGui::IsItemActive()) {
bottom_panes_height -= io.MouseDelta.y; bottom_panes_height -= io.MouseDelta.y;
bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX); bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX);
} }
ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height), ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height),
true); true);
@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, bottom_panes_height)); ImVec2(kSplitterWidth, bottom_panes_height));
if (ImGui::IsItemActive()) { if (ImGui::IsItemActive()) {
breakpoints_pane_width -= io.MouseDelta.x; breakpoints_pane_width -= io.MouseDelta.x;
breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX); breakpoints_pane_width =
xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX);
} }
ImGui::SameLine(); ImGui::SameLine();
ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true); ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true);

View File

@ -455,9 +455,9 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
// Scratch register writeback. // Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) { if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
// Enabled - write to address. // Enabled - write to address.
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32; uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
uint32_t mem_addr = scratch_addr + (scratch_reg * 4); uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value); xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
} }
@ -467,7 +467,7 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
// This will block the command processor the next time it WAIT_MEM_REGs // This will block the command processor the next time it WAIT_MEM_REGs
// and allow us to synchronize the memory. // and allow us to synchronize the memory.
case XE_GPU_REG_COHER_STATUS_HOST: { case XE_GPU_REG_COHER_STATUS_HOST: {
regs.values[index].u32 |= UINT32_C(0x80000000); regs.values[index] |= UINT32_C(0x80000000);
} break; } break;
case XE_GPU_REG_DC_LUT_RW_INDEX: { case XE_GPU_REG_DC_LUT_RW_INDEX: {
@ -478,12 +478,12 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
case XE_GPU_REG_DC_LUT_SEQ_COLOR: { case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
// Should be in the 256-entry table writing mode. // Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
// enable mask is blue, green, red. // enable mask is blue, green, red.
bool write_gamma_ramp_component = bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry = reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@ -505,7 +505,11 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
} }
if (++gamma_ramp_rw_component_ >= 3) { if (++gamma_ramp_rw_component_ >= 3) {
gamma_ramp_rw_component_ = 0; gamma_ramp_rw_component_ = 0;
++gamma_ramp_rw_index.rw_index; reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
++new_gamma_ramp_rw_index.rw_index;
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
} }
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
OnGammaRamp256EntryTableValueWritten(); OnGammaRamp256EntryTableValueWritten();
@ -514,14 +518,14 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
case XE_GPU_REG_DC_LUT_PWL_DATA: { case XE_GPU_REG_DC_LUT_PWL_DATA: {
// Should be in the PWL writing mode. // Should be in the PWL writing mode.
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// Bit 7 of the index is ignored for PWL. // Bit 7 of the index is ignored for PWL.
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F; uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
// DC_LUT_PWL_DATA is likely in the red, green, blue order because // DC_LUT_PWL_DATA is likely in the red, green, blue order because
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red. // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
bool write_gamma_ramp_component = bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
reg::DC_LUT_PWL_DATA& gamma_ramp_entry = reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@ -534,13 +538,17 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
} }
if (++gamma_ramp_rw_component_ >= 3) { if (++gamma_ramp_rw_component_ >= 3) {
gamma_ramp_rw_component_ = 0; gamma_ramp_rw_component_ = 0;
reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
// TODO(Triang3l): Should this increase beyond 7 bits for PWL? // TODO(Triang3l): Should this increase beyond 7 bits for PWL?
// Direct3D 9 explicitly sets rw_index to 0x80 after writing the last // Direct3D 9 explicitly sets rw_index to 0x80 after writing the last
// PWL entry. However, the DC_LUT_RW_INDEX documentation says that for // PWL entry. However, the DC_LUT_RW_INDEX documentation says that for
// PWL, the bit 7 is ignored. // PWL, the bit 7 is ignored.
gamma_ramp_rw_index.rw_index = new_gamma_ramp_rw_index.rw_index =
(gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) | (gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) |
((gamma_ramp_rw_index_pwl + 1) & 0x7F); ((gamma_ramp_rw_index_pwl + 1) & 0x7F);
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
} }
if (write_gamma_ramp_component) { if (write_gamma_ramp_component) {
OnGammaRampPWLValueWritten(); OnGammaRampPWLValueWritten();
@ -549,10 +557,10 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
case XE_GPU_REG_DC_LUT_30_COLOR: { case XE_GPU_REG_DC_LUT_30_COLOR: {
// Should be in the 256-entry table writing mode. // Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>(); auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
uint32_t gamma_ramp_write_enable_mask = uint32_t gamma_ramp_write_enable_mask =
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111; regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
if (gamma_ramp_write_enable_mask) { if (gamma_ramp_write_enable_mask) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry = reg::DC_LUT_30_COLOR& gamma_ramp_entry =
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index]; gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
@ -567,11 +575,16 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red; gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red;
} }
} }
++gamma_ramp_rw_index.rw_index;
// TODO(Triang3l): Should this reset the component write index? If this // TODO(Triang3l): Should this reset the component write index? If this
// increase is assumed to behave like a full DC_LUT_RW_INDEX write, it // increase is assumed to behave like a full DC_LUT_RW_INDEX write, it
// probably should. // probably should. Currently this also calls WriteRegister for
// DC_LUT_RW_INDEX, which resets gamma_ramp_rw_component_ as well.
gamma_ramp_rw_component_ = 0; gamma_ramp_rw_component_ = 0;
reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
++new_gamma_ramp_rw_index.rw_index;
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
if (gamma_ramp_write_enable_mask) { if (gamma_ramp_write_enable_mask) {
OnGammaRamp256EntryTableValueWritten(); OnGammaRamp256EntryTableValueWritten();
} }
@ -583,7 +596,7 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// chrispy: rearrange check order, place set after checks // chrispy: rearrange check order, place set after checks
if (XE_LIKELY(index < RegisterFile::kRegisterCount)) { if (XE_LIKELY(index < RegisterFile::kRegisterCount)) {
register_file_->values[index].u32 = value; register_file_->values[index] = value;
// quick pre-test // quick pre-test
// todo: figure out just how unlikely this is. if very (it ought to be, // todo: figure out just how unlikely this is. if very (it ought to be,
@ -708,10 +721,11 @@ void CommandProcessor::MakeCoherent() {
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
RegisterFile* regs = register_file_; volatile uint32_t* regs_volatile = register_file_->values;
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>(); auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
if (!status_host.status) { if (!status_host.status) {
return; return;
@ -731,7 +745,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action); base_host + size_host, size_host, action);
// Mark coherent. // Mark coherent.
status_host.status = 0; regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
} }
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
@ -752,4 +766,4 @@ void CommandProcessor::InitializeTrace() {
#define COMMAND_PROCESSOR CommandProcessor #define COMMAND_PROCESSOR CommandProcessor
#include "pm4_command_processor_implement.h" #include "pm4_command_processor_implement.h"
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -1768,7 +1768,7 @@ void D3D12CommandProcessor::WriteRegisterForceinline(uint32_t index,
__m128i is_above_lower = _mm_cmpgt_epi16(to_rangecheck, lower_bounds); __m128i is_above_lower = _mm_cmpgt_epi16(to_rangecheck, lower_bounds);
__m128i is_below_upper = _mm_cmplt_epi16(to_rangecheck, upper_bounds); __m128i is_below_upper = _mm_cmplt_epi16(to_rangecheck, upper_bounds);
__m128i is_within_range = _mm_and_si128(is_above_lower, is_below_upper); __m128i is_within_range = _mm_and_si128(is_above_lower, is_below_upper);
register_file_->values[index].u32 = value; register_file_->values[index] = value;
uint32_t movmask = static_cast<uint32_t>(_mm_movemask_epi8(is_within_range)); uint32_t movmask = static_cast<uint32_t>(_mm_movemask_epi8(is_within_range));
@ -2047,7 +2047,7 @@ void D3D12CommandProcessor::WritePossiblySpecialRegistersFromMem(
for (uint32_t index = start_index; index < end; ++index, ++base) { for (uint32_t index = start_index; index < end; ++index, ++base) {
uint32_t value = xe::load_and_swap<uint32_t>(base); uint32_t value = xe::load_and_swap<uint32_t>(base);
register_file_->values[index].u32 = value; register_file_->values[index] = value;
unsigned expr = 0; unsigned expr = 0;
@ -2780,8 +2780,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
vfetch_bits_remaining = xe::clear_lowest_bit(vfetch_bits_remaining); vfetch_bits_remaining = xe::clear_lowest_bit(vfetch_bits_remaining);
uint32_t vfetch_index = i * 32 + j; uint32_t vfetch_index = i * 32 + j;
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t vfetch_constant =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) { switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex: case xenos::FetchConstantType::kVertex:
break; break;
@ -3554,10 +3554,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
// Blend factor. // Blend factor.
float blend_factor[] = { float blend_factor[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
}; };
// std::memcmp instead of != so in case of NaN, every draw won't be // std::memcmp instead of != so in case of NaN, every draw won't be
// invalidating it. // invalidating it.
@ -3599,7 +3599,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>(); auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>(); auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3753,10 +3753,10 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
// Tessellation factor range, plus 1.0 according to the images in // Tessellation factor range, plus 1.0 according to the images in
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
float tessellation_factor_min = auto tessellation_factor_min =
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f; regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
float tessellation_factor_max = auto tessellation_factor_max =
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f; regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
update_dirty_floatmask(system_constants_.tessellation_factor_range_min, update_dirty_floatmask(system_constants_.tessellation_factor_range_min,
tessellation_factor_min); tessellation_factor_min);
@ -3804,12 +3804,12 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
&user_clip_plane_index)) { &user_clip_plane_index)) {
user_clip_planes_remaining = user_clip_planes_remaining =
xe::clear_lowest_bit(user_clip_planes_remaining); xe::clear_lowest_bit(user_clip_planes_remaining);
const float* user_clip_plane = const void* user_clip_plane_regs =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32; &regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane, if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float))) { 4 * sizeof(float))) {
dirty = true; dirty = true;
std::memcpy(user_clip_plane_write_ptr, user_clip_plane, std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float)); 4 * sizeof(float));
} }
user_clip_plane_write_ptr += 4; user_clip_plane_write_ptr += 4;
@ -3974,9 +3974,8 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
color_exp_bias -= 5; color_exp_bias -= 5;
} }
} }
float color_exp_bias_scale; auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) = int32_t(0x3F800000 + (color_exp_bias << 23)));
0x3F800000 + (color_exp_bias << 23);
update_dirty_floatmask(system_constants_.color_exp_bias[i], update_dirty_floatmask(system_constants_.color_exp_bias[i],
color_exp_bias_scale); color_exp_bias_scale);
@ -4028,7 +4027,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
#endif #endif
uint32_t blend_factors_ops = uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
update_dirty_uint32_cmp(system_constants_.edram_rt_blend_factors_ops[i], update_dirty_uint32_cmp(system_constants_.edram_rt_blend_factors_ops[i],
blend_factors_ops); blend_factors_ops);
@ -4060,22 +4059,22 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
if (primitive_polygonal) { if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable) { if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale = poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset; poly_offset_back_offset = poly_offset_front_offset;
} }
@ -4153,26 +4152,26 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
} }
} }
update_dirty_floatmask(system_constants_.edram_blend_constant[0], update_dirty_floatmask(system_constants_.edram_blend_constant[0],
regs[XE_GPU_REG_RB_BLEND_RED].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_RED));
system_constants_.edram_blend_constant[0] = system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
update_dirty_floatmask(system_constants_.edram_blend_constant[1], update_dirty_floatmask(system_constants_.edram_blend_constant[1],
regs[XE_GPU_REG_RB_BLEND_GREEN].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN));
system_constants_.edram_blend_constant[1] = system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
update_dirty_floatmask(system_constants_.edram_blend_constant[2], update_dirty_floatmask(system_constants_.edram_blend_constant[2],
regs[XE_GPU_REG_RB_BLEND_BLUE].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE));
system_constants_.edram_blend_constant[2] = system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
update_dirty_floatmask(system_constants_.edram_blend_constant[3], update_dirty_floatmask(system_constants_.edram_blend_constant[3],
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
system_constants_.edram_blend_constant[3] = system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
} }
dirty |= ArchFloatMaskSignbit(dirty_float_mask); dirty |= ArchFloatMaskSignbit(dirty_float_mask);
@ -4266,10 +4265,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
// These are the constant base addresses/ranges for shaders. // These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox // We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts. // Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex = const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map(); vertex_shader->constant_register_map();
@ -4344,8 +4343,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
xe::clear_lowest_bit(float_constant_map_entry); xe::clear_lowest_bit(float_constant_map_entry);
std::memcpy(float_constants, std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
4 * sizeof(float)); 4 * sizeof(float));
float_constants += 4 * sizeof(float); float_constants += 4 * sizeof(float);
} }
@ -4376,8 +4374,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
xe::clear_lowest_bit(float_constant_map_entry); xe::clear_lowest_bit(float_constant_map_entry);
std::memcpy(float_constants, std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
4 * sizeof(float)); 4 * sizeof(float));
float_constants += 4 * sizeof(float); float_constants += 4 * sizeof(float);
} }
@ -4397,8 +4394,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
return false; return false;
} }
xe::smallcpy_const<kBoolLoopConstantsSize>( xe::smallcpy_const<kBoolLoopConstantsSize>(
bool_loop_constants, bool_loop_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031]);
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32);
cbuffer_binding_bool_loop_.up_to_date = true; cbuffer_binding_bool_loop_.up_to_date = true;
current_graphics_root_up_to_date_ &= current_graphics_root_up_to_date_ &=
@ -4414,7 +4410,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
return false; return false;
} }
xe::smallcpy_const<kFetchConstantsSize>( xe::smallcpy_const<kFetchConstantsSize>(
fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32); fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
cbuffer_binding_fetch_.up_to_date = true; cbuffer_binding_fetch_.up_to_date = true;
current_graphics_root_up_to_date_ &= current_graphics_root_up_to_date_ &=
@ -5152,4 +5148,4 @@ void D3D12CommandProcessor::WriteGammaRampSRV(
#undef COMMAND_PROCESSOR #undef COMMAND_PROCESSOR
} // namespace d3d12 } // namespace d3d12
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -679,8 +679,8 @@ void D3D12TextureCache::PrefetchSamplerParameters(
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const { const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters; SamplerParameters parameters;
@ -1160,8 +1160,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out, D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
xenos::TextureFormat& format_out) { xenos::TextureFormat& format_out) {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
TextureKey key; TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr); BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 || if (!key.is_valid || key.base_page == 0 ||

View File

@ -15,6 +15,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/cvar.h" #include "xenia/base/cvar.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/ucode.h" #include "xenia/gpu/ucode.h"
@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
point_size_ = value[0]; point_size_ = value[0];
} }
if (value_mask & 0b0100) { if (value_mask & 0b0100) {
vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]); vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
} }
} }
} }
@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
xenos::Endian index_endian = vgt_dma_size.swap_mode; xenos::Endian index_endian = vgt_dma_size.swap_mode;
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) { if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
xenos::IndexFormat index_format = vgt_draw_initiator.index_size; xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32; uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
uint32_t index_buffer_read_count = uint32_t index_buffer_read_count =
std::min(uint32_t(vgt_draw_initiator.num_indices), std::min(uint32_t(vgt_draw_initiator.num_indices),
uint32_t(vgt_dma_size.num_words)); uint32_t(vgt_dma_size.num_words));
@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f; : 1.0f;
float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena float viewport_y_offset =
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 pa_cl_vte_cntl.vport_y_offset_ena
: 0.0f; ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f;
int32_t point_vertex_min_diameter_float = 0; int32_t point_vertex_min_diameter_float = 0;
int32_t point_vertex_max_diameter_float = 0; int32_t point_vertex_max_diameter_float = 0;
float point_constant_radius_y = 0.0f; float point_constant_radius_y = 0.0f;
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>(); auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
*reinterpret_cast<float*>(&point_vertex_min_diameter_float) = point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
*reinterpret_cast<float*>(&point_vertex_max_diameter_float) = point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
point_constant_radius_y = point_constant_radius_y =
float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f); float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
} }
@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
// Vertex-specified diameter. Clamped effectively as a signed integer in // Vertex-specified diameter. Clamped effectively as a signed integer in
// the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN // the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
// to the maximum. // to the maximum.
point_radius_y = position_y_export_sink.point_size().value(); point_radius_y =
*reinterpret_cast<int32_t*>(&point_radius_y) = std::min( 0.5f *
point_vertex_max_diameter_float, xe::memory::Reinterpret<float>(std::min(
std::max(point_vertex_min_diameter_float, point_vertex_max_diameter_float,
*reinterpret_cast<const int32_t*>(&point_radius_y))); std::max(point_vertex_min_diameter_float,
point_radius_y *= 0.5f; xe::memory::Reinterpret<int32_t>(
position_y_export_sink.point_size().value()))));
} else { } else {
// Constant radius. // Constant radius.
point_radius_y = point_constant_radius_y; point_radius_y = point_constant_radius_y;
@ -329,7 +332,7 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
float window_y_offset_f = float(window_y_offset); float window_y_offset_f = float(window_y_offset);
float yoffset = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; float yoffset = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
// First calculate all the integer.0 or integer.5 offsetting exactly at full // First calculate all the integer.0 or integer.5 offsetting exactly at full
// precision. // precision.
@ -347,11 +350,10 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
sm3 = yoffset; sm3 = yoffset;
} }
sm4 = pa_cl_vte_cntl.vport_y_scale_ena sm4 = pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) ? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
: 1.0f; : 1.0f;
viewport_bottom = sm1 + sm2 + sm3 + sm4; viewport_bottom = sm1 + sm2 + sm3 + sm4;
// Using floor, or, rather, truncation (because maxing with zero anyway) // Using floor, or, rather, truncation (because maxing with zero anyway)
// similar to how viewport scissoring behaves on real AMD, Intel and Nvidia // similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
// GPUs on Direct3D 12 (but not WARP), also like in // GPUs on Direct3D 12 (but not WARP), also like in
@ -366,4 +368,4 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
} }
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -9,8 +9,6 @@
#include "xenia/gpu/draw_util.h" #include "xenia/gpu/draw_util.h"
#include <cstring>
#include "xenia/base/cvar.h" #include "xenia/base/cvar.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
@ -93,22 +91,21 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
// ones that are rendered (except for shadow volumes). // ones that are rendered (except for shadow volumes).
if (pa_su_sc_mode_cntl.poly_offset_front_enable && if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
!pa_su_sc_mode_cntl.cull_front) { !pa_su_sc_mode_cntl.cull_front) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
scale = roundToNearestOrderOfMagnitude(scale); scale = roundToNearestOrderOfMagnitude(scale);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable && if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
!pa_su_sc_mode_cntl.cull_back && !scale && !offset) { !pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
// Non-triangle primitives use the front offset, but it's toggled via // Non-triangle primitives use the front offset, but it's toggled via
// poly_offset_para_enable. // poly_offset_para_enable.
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
} }
scale_out = scale; scale_out = scale;
@ -143,7 +140,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
} }
// Check if a color target is actually written. // Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t rts_remaining = shader.writes_color_targets(); uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index; uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) { while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
@ -306,7 +303,6 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
// Obtain the original viewport values in a normalized way. // Obtain the original viewport values in a normalized way.
float scale_xy[] = { float scale_xy[] = {
pa_cl_vte_cntl.vport_x_scale_ena ? args->PA_CL_VPORT_XSCALE : 1.0f, pa_cl_vte_cntl.vport_x_scale_ena ? args->PA_CL_VPORT_XSCALE : 1.0f,
pa_cl_vte_cntl.vport_y_scale_ena ? args->PA_CL_VPORT_YSCALE : 1.0f, pa_cl_vte_cntl.vport_y_scale_ena ? args->PA_CL_VPORT_YSCALE : 1.0f,
}; };
@ -392,16 +388,11 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
float offset_axis = offset_base_xy[i] + offset_add_xy[i]; float offset_axis = offset_base_xy[i] + offset_add_xy[i];
float scale_axis = scale_xy[i]; float scale_axis = scale_xy[i];
float scale_axis_abs = std::abs(scale_xy[i]); float scale_axis_abs = std::abs(scale_xy[i]);
float axis_0 = offset_axis - scale_axis_abs;
float axis_1 = offset_axis + scale_axis_abs;
float axis_max_unscaled_float = float(xy_max_unscaled[i]); float axis_max_unscaled_float = float(xy_max_unscaled[i]);
// max(0.0f, xy) drops NaN and < 0 - max picks the first argument in the uint32_t axis_0_int = uint32_t(xe::clamp_float(
// !(a < b) case (always for NaN), min as float (axis_max_unscaled_float offset_axis - scale_axis_abs, 0.0f, axis_max_unscaled_float));
// is well below 2^24) to safely drop very large values. uint32_t axis_1_int = uint32_t(xe::clamp_float(
uint32_t axis_0_int = offset_axis + scale_axis_abs, 0.0f, axis_max_unscaled_float));
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_0)));
uint32_t axis_1_int =
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_1)));
uint32_t axis_extent_int = axis_1_int - axis_0_int; uint32_t axis_extent_int = axis_1_int - axis_0_int;
viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale; viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale;
viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale; viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale;
@ -507,8 +498,8 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
// extension. But cases when this really matters are yet to be found - // extension. But cases when this really matters are yet to be found -
// trying to fix this will result in more correct depth values, but // trying to fix this will result in more correct depth values, but
// incorrect clipping. // incorrect clipping.
z_min = xe::saturate_unsigned(host_clip_offset_z); z_min = xe::saturate(host_clip_offset_z);
z_max = xe::saturate_unsigned(host_clip_offset_z + host_clip_scale_z); z_max = xe::saturate(host_clip_offset_z + host_clip_scale_z);
// Direct3D 12 doesn't allow reverse depth range - on some drivers it // Direct3D 12 doesn't allow reverse depth range - on some drivers it
// works, on some drivers it doesn't, actually, but it was never // works, on some drivers it doesn't, actually, but it was never
// explicitly allowed by the specification. // explicitly allowed by the specification.
@ -730,7 +721,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
return 0; return 0;
} }
uint32_t normalized_color_mask = 0; uint32_t normalized_color_mask = 0;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
// Exclude the render targets not statically written to by the pixel shader. // Exclude the render targets not statically written to by the pixel shader.
// If the shader doesn't write to a render target, it shouldn't be written // If the shader doesn't write to a render target, it shouldn't be written
@ -776,10 +767,16 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
? regs.Get<reg::SQ_VS_CONST>().base ? regs.Get<reg::SQ_VS_CONST>().base
: regs.Get<reg::SQ_PS_CONST>().base; : regs.Get<reg::SQ_PS_CONST>().base;
for (uint32_t constant_index : shader.memexport_stream_constants()) { for (uint32_t constant_index : shader.memexport_stream_constants()) {
const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>( xenos::xe_gpu_memexport_stream_t stream =
XE_GPU_REG_SHADER_CONSTANT_000_X + regs.GetMemExportStream(float_constants_base + constant_index);
(float_constants_base + constant_index) * 4); // Safety checks for stream constants potentially not set up if the export
if (!stream.index_count) { // isn't done on the control flow path taken by the shader (not checking the
// Y component because the index is more likely to be constructed
// arbitrarily).
// The hardware validates the upper bits of eA according to the
// IPR2015-00325 sequencer specification.
if (stream.const_0x1 != 0x1 || stream.const_0x4b0 != 0x4B0 ||
stream.const_0x96 != 0x96 || !stream.index_count) {
continue; continue;
} }
const FormatInfo& format_info = const FormatInfo& format_info =
@ -821,7 +818,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
} }
// Add a new range if haven't expanded an existing one. // Add a new range if haven't expanded an existing one.
if (!range_reused) { if (!range_reused) {
ranges_out.emplace_back(stream.base_address, stream_size_bytes); ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
} }
} }
} }
@ -943,8 +940,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
// Get the extent of pixels covered by the resolve rectangle, according to the // Get the extent of pixels covered by the resolve rectangle, according to the
// top-left rasterization rule. // top-left rasterization rule.
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU. // D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) { if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
XELOGE("Unsupported resolve vertex buffer format"); XELOGE("Unsupported resolve vertex buffer format");
assert_always(); assert_always();
@ -997,10 +993,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
GetScissor(regs, scissor, false); GetScissor(regs, scissor, false);
int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]); int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]);
int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]); int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]);
x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right); x0 = std::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom); y0 = std::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right); x1 = std::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom); y1 = std::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
assert_true(x0 <= x1 && y0 <= y1); assert_true(x0 <= x1 && y0 <= y1);
@ -1114,7 +1110,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
} }
// Calculate the destination memory extent. // Calculate the destination memory extent.
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
uint32_t copy_dest_base_adjusted = rb_copy_dest_base; uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
uint32_t copy_dest_extent_start, copy_dest_extent_end; uint32_t copy_dest_extent_start, copy_dest_extent_end;
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>(); auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
@ -1284,9 +1280,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
info_out.copy_dest_info.copy_dest_swap = false; info_out.copy_dest_info.copy_dest_swap = false;
} }
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
#if 0 #if 0
XELOGD( XELOGD(
"Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially " "Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "
@ -1377,4 +1374,4 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader(
} // namespace draw_util } // namespace draw_util
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -373,12 +373,12 @@ struct GetViewportInfoArgs {
pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>(); pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>();
PA_CL_VPORT_XSCALE = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; PA_CL_VPORT_XSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE);
PA_CL_VPORT_YSCALE = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; PA_CL_VPORT_YSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE);
PA_CL_VPORT_ZSCALE = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; PA_CL_VPORT_ZSCALE = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE);
PA_CL_VPORT_XOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; PA_CL_VPORT_XOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET);
PA_CL_VPORT_YOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; PA_CL_VPORT_YOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
PA_CL_VPORT_ZOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; PA_CL_VPORT_ZOFFSET = regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>(); pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
depth_format = regs.Get<reg::RB_DEPTH_INFO>().depth_format; depth_format = regs.Get<reg::RB_DEPTH_INFO>().depth_format;
} }
@ -767,4 +767,4 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe
#endif // XENIA_GPU_DRAW_UTIL_H_ #endif // XENIA_GPU_DRAW_UTIL_H_

View File

@ -17,6 +17,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -1103,10 +1104,10 @@ struct Src : OperandAddress {
} }
static Src LI(int32_t x) { return LI(x, x, x, x); } static Src LI(int32_t x) { return LI(x, x, x, x); }
static Src LF(float x, float y, float z, float w) { static Src LF(float x, float y, float z, float w) {
return LU(*reinterpret_cast<const uint32_t*>(&x), return LU(xe::memory::Reinterpret<uint32_t>(x),
*reinterpret_cast<const uint32_t*>(&y), xe::memory::Reinterpret<uint32_t>(y),
*reinterpret_cast<const uint32_t*>(&z), xe::memory::Reinterpret<uint32_t>(z),
*reinterpret_cast<const uint32_t*>(&w)); xe::memory::Reinterpret<uint32_t>(w));
} }
static Src LF(float x) { return LF(x, x, x, x); } static Src LF(float x) { return LF(x, x, x, x); }
static Src LP(const uint32_t* xyzw) { static Src LP(const uint32_t* xyzw) {
@ -1223,12 +1224,10 @@ struct Src : OperandAddress {
bool negate) { bool negate) {
if (is_integer) { if (is_integer) {
if (absolute) { if (absolute) {
*reinterpret_cast<int32_t*>(&value) = value = uint32_t(std::abs(int32_t(value)));
std::abs(*reinterpret_cast<const int32_t*>(&value));
} }
if (negate) { if (negate) {
*reinterpret_cast<int32_t*>(&value) = value = uint32_t(-int32_t(value));
-*reinterpret_cast<const int32_t*>(&value);
} }
} else { } else {
if (absolute) { if (absolute) {

View File

@ -258,7 +258,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
} }
assert_true(r < RegisterFile::kRegisterCount); assert_true(r < RegisterFile::kRegisterCount);
return register_file()->values[r].u32; return register_file()->values[r];
} }
void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
@ -276,7 +276,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
} }
assert_true(r < RegisterFile::kRegisterCount); assert_true(r < RegisterFile::kRegisterCount);
this->register_file()->values[r].u32 = value; this->register_file()->values[r] = value;
} }
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) { void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {
@ -379,4 +379,4 @@ bool GraphicsSystem::Restore(ByteStream* stream) {
} }
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -67,7 +67,7 @@ struct PacketAction {
union { union {
struct { struct {
uint32_t index; uint32_t index;
RegisterFile::RegisterValue value; uint32_t value;
} register_write; } register_write;
struct { struct {
uint64_t value; uint64_t value;
@ -194,7 +194,7 @@ struct PacketAction {
PacketAction action; PacketAction action;
action.type = Type::kRegisterWrite; action.type = Type::kRegisterWrite;
action.register_write.index = index; action.register_write.index = index;
action.register_write.value.u32 = value; action.register_write.value = value;
return action; return action;
} }

View File

@ -706,23 +706,27 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_WAIT_REG_MEM(
uint32_t ref = reader_.ReadAndSwap<uint32_t>(); uint32_t ref = reader_.ReadAndSwap<uint32_t>();
uint32_t mask = reader_.ReadAndSwap<uint32_t>(); uint32_t mask = reader_.ReadAndSwap<uint32_t>();
uint32_t wait = reader_.ReadAndSwap<uint32_t>(); uint32_t wait = reader_.ReadAndSwap<uint32_t>();
bool is_memory = (wait_info & 0x10) != 0;
assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
const volatile uint32_t& value_ref =
is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
poll_reg_addr & ~uint32_t(0x3)))
: register_file_->values[poll_reg_addr];
bool matched = false; bool matched = false;
do { do {
uint32_t value; uint32_t value = value_ref;
if (wait_info & 0x10) { if (is_memory) {
// Memory. trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3); sizeof(uint32_t));
poll_reg_addr &= ~0x3; value = xenos::GpuSwap(value,
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr)); static_cast<xenos::Endian>(poll_reg_addr & 0x3));
value = GpuSwap(value, endianness);
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
} else { } else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
MakeCoherent(); MakeCoherent();
value = register_file_->values[poll_reg_addr].u32; value = value_ref;
} }
} }
matched = MatchValueAndRef(value & mask, ref, wait_info); matched = MatchValueAndRef(value & mask, ref, wait_info);
@ -758,17 +762,17 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_RMW(uint32_t packet,
uint32_t rmw_info = reader_.ReadAndSwap<uint32_t>(); uint32_t rmw_info = reader_.ReadAndSwap<uint32_t>();
uint32_t and_mask = reader_.ReadAndSwap<uint32_t>(); uint32_t and_mask = reader_.ReadAndSwap<uint32_t>();
uint32_t or_mask = reader_.ReadAndSwap<uint32_t>(); uint32_t or_mask = reader_.ReadAndSwap<uint32_t>();
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; uint32_t value = register_file_->values[rmw_info & 0x1FFF];
if ((rmw_info >> 31) & 0x1) { if ((rmw_info >> 31) & 0x1) {
// & reg // & reg
value &= register_file_->values[and_mask & 0x1FFF].u32; value &= register_file_->values[and_mask & 0x1FFF];
} else { } else {
// & imm // & imm
value &= and_mask; value &= and_mask;
} }
if ((rmw_info >> 30) & 0x1) { if ((rmw_info >> 30) & 0x1) {
// | reg // | reg
value |= register_file_->values[or_mask & 0x1FFF].u32; value |= register_file_->values[or_mask & 0x1FFF];
} else { } else {
// | imm // | imm
value |= or_mask; value |= or_mask;
@ -788,7 +792,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_TO_MEM(
uint32_t reg_val; uint32_t reg_val;
assert_true(reg_addr < RegisterFile::kRegisterCount); assert_true(reg_addr < RegisterFile::kRegisterCount);
reg_val = register_file_->values[reg_addr].u32; reg_val = register_file_->values[reg_addr];
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3); auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
mem_addr &= ~0x3; mem_addr &= ~0x3;
@ -836,7 +840,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE(
} else { } else {
// Register. // Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount); assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32; value = register_file_->values[poll_reg_addr];
} }
bool matched = MatchValueAndRef(value & mask, ref, wait_info); bool matched = MatchValueAndRef(value & mask, ref, wait_info);
@ -858,7 +862,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE(
} }
XE_FORCEINLINE XE_FORCEINLINE
void COMMAND_PROCESSOR::WriteEventInitiator(uint32_t value) XE_RESTRICT { void COMMAND_PROCESSOR::WriteEventInitiator(uint32_t value) XE_RESTRICT {
register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR].u32 = value; register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR] = value;
} }
bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE( bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE(
uint32_t packet, uint32_t count) XE_RESTRICT { uint32_t packet, uint32_t count) XE_RESTRICT {
@ -898,10 +902,8 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_SHD(
data_value = GpuSwap(data_value, endianness); data_value = GpuSwap(data_value, endianness);
uint8_t* write_destination = memory_->TranslatePhysical(address); uint8_t* write_destination = memory_->TranslatePhysical(address);
if (address > 0x1FFFFFFF) { if (address > 0x1FFFFFFF) {
uint32_t writeback_base = uint32_t writeback_base = register_file_->values[XE_GPU_REG_WRITEBACK_BASE];
register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32; uint32_t writeback_size = register_file_->values[XE_GPU_REG_WRITEBACK_SIZE];
uint32_t writeback_size =
register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32;
uint32_t writeback_offset = address - writeback_base; uint32_t writeback_offset = address - writeback_base;
// check whether the guest has written writeback base. if they haven't, skip // check whether the guest has written writeback base. if they haven't, skip
// the offset check // the offset check
@ -967,7 +969,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_ZPD(
if (fake_sample_count >= 0) { if (fake_sample_count >= 0) {
auto* pSampleCounts = auto* pSampleCounts =
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>( memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32); register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
// and used to detect a finished query. // and used to detect a finished query.
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@ -1003,7 +1005,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
vgt_draw_initiator.value = reader_.ReadAndSwap<uint32_t>(); vgt_draw_initiator.value = reader_.ReadAndSwap<uint32_t>();
--count_remaining; --count_remaining;
register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR].u32 = register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR] =
vgt_draw_initiator.value; vgt_draw_initiator.value;
bool draw_succeeded = true; bool draw_succeeded = true;
// TODO(Triang3l): Remove IndexBufferInfo and replace handling of all this // TODO(Triang3l): Remove IndexBufferInfo and replace handling of all this
@ -1025,7 +1027,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
} }
uint32_t vgt_dma_base = reader_.ReadAndSwap<uint32_t>(); uint32_t vgt_dma_base = reader_.ReadAndSwap<uint32_t>();
--count_remaining; --count_remaining;
register_file_->values[XE_GPU_REG_VGT_DMA_BASE].u32 = vgt_dma_base; register_file_->values[XE_GPU_REG_VGT_DMA_BASE] = vgt_dma_base;
reg::VGT_DMA_SIZE vgt_dma_size; reg::VGT_DMA_SIZE vgt_dma_size;
assert_not_zero(count_remaining); assert_not_zero(count_remaining);
if (!count_remaining) { if (!count_remaining) {
@ -1034,7 +1036,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw(
} }
vgt_dma_size.value = reader_.ReadAndSwap<uint32_t>(); vgt_dma_size.value = reader_.ReadAndSwap<uint32_t>();
--count_remaining; --count_remaining;
register_file_->values[XE_GPU_REG_VGT_DMA_SIZE].u32 = vgt_dma_size.value; register_file_->values[XE_GPU_REG_VGT_DMA_SIZE] = vgt_dma_size.value;
uint32_t index_size_bytes = uint32_t index_size_bytes =
vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16 vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16
@ -1341,10 +1343,10 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_VIZ_QUERY(
// The scan converter writes the internal result back to the register here. // The scan converter writes the internal result back to the register here.
// We just fake it and say it was visible in case it is read back. // We just fake it and say it was visible in case it is read back.
if (id < 32) { if (id < 32) {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |= register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
uint32_t(1) << id; << id;
} else { } else {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |= register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
uint32_t(1) << (id - 32); uint32_t(1) << (id - 32);
} }
} }
@ -1423,4 +1425,4 @@ void COMMAND_PROCESSOR::ExecutePacket(uint32_t ptr, uint32_t count) {
} }
} while (reader_.read_count()); } while (reader_.read_count());
reader_ = old_reader; reader_ = old_reader;
} }

View File

@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 = uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety. // The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & guest_index_base =
~uint32_t((1 << index_size_log2) - 1); regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2; << index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize || if (guest_index_base > SharedMemory::kBufferSize ||
@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 = uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety. // The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & guest_index_base =
~uint32_t((1 << index_size_log2) - 1); regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2; << index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize || if (guest_index_base > SharedMemory::kBufferSize ||

View File

@ -12,8 +12,12 @@
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/memory.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -34,39 +38,53 @@ class RegisterFile {
static const RegisterInfo* GetRegisterInfo(uint32_t index); static const RegisterInfo* GetRegisterInfo(uint32_t index);
static bool IsValidRegister(uint32_t index); static bool IsValidRegister(uint32_t index);
static constexpr size_t kRegisterCount = 0x5003; static constexpr size_t kRegisterCount = 0x5003;
union RegisterValue { uint32_t values[kRegisterCount];
uint32_t u32;
float f32; const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
}; uint32_t& operator[](uint32_t reg) { return values[reg]; }
RegisterValue values[kRegisterCount];
const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](Register reg) const { return values[reg]; }
RegisterValue& operator[](Register reg) { return values[reg]; }
template <typename T> template <typename T>
const T& Get(uint32_t reg) const { T Get(uint32_t reg) const {
return *reinterpret_cast<const T*>(&values[reg]); return xe::memory::Reinterpret<T>(values[reg]);
} }
template <typename T> template <typename T>
T& Get(uint32_t reg) { T Get(Register reg) const {
return *reinterpret_cast<T*>(&values[reg]); return Get<T>(static_cast<uint32_t>(reg));
} }
template <typename T> template <typename T>
const T& Get(Register reg) const { T Get() const {
return *reinterpret_cast<const T*>(&values[reg]); return Get<T>(T::register_index);
} }
template <typename T>
T& Get(Register reg) { xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
return *reinterpret_cast<T*>(&values[reg]); assert_true(index < 96);
xenos::xe_gpu_vertex_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
} }
template <typename T>
const T& Get() const { xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
return *reinterpret_cast<const T*>(&values[T::register_index]); assert_true(index < 32);
xenos::xe_gpu_texture_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
} }
template <typename T>
T& Get() { xenos::xe_gpu_memexport_stream_t GetMemExportStream(
return *reinterpret_cast<T*>(&values[T::register_index]); uint32_t float_constant_index) const {
assert_true(float_constant_index < 512);
xenos::xe_gpu_memexport_stream_t stream;
std::memcpy(
&stream,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
sizeof(stream));
return stream;
} }
}; };

View File

@ -21,10 +21,7 @@ void ShaderInterpreter::Execute() {
state_.Reset(); state_.Reset();
const uint32_t* bool_constants = const uint32_t* bool_constants =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32; &register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
const xenos::LoopConstant* loop_constants =
reinterpret_cast<const xenos::LoopConstant*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
bool exec_ended = false; bool exec_ended = false;
uint32_t cf_index_next = 1; uint32_t cf_index_next = 1;
@ -133,8 +130,8 @@ void ShaderInterpreter::Execute() {
cf_index_next = cf_loop_start.address(); cf_index_next = cf_loop_start.address();
continue; continue;
} }
xenos::LoopConstant loop_constant = auto loop_constant = register_file_.Get<xenos::LoopConstant>(
loop_constants[cf_loop_start.loop_id()]; XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
state_.loop_constants[state_.loop_stack_depth] = loop_constant; state_.loop_constants[state_.loop_stack_depth] = loop_constant;
uint32_t& loop_iterator_ref = uint32_t& loop_iterator_ref =
state_.loop_iterators[state_.loop_stack_depth]; state_.loop_iterators[state_.loop_stack_depth];
@ -163,8 +160,11 @@ void ShaderInterpreter::Execute() {
&cf_instr); &cf_instr);
xenos::LoopConstant loop_constant = xenos::LoopConstant loop_constant =
state_.loop_constants[state_.loop_stack_depth - 1]; state_.loop_constants[state_.loop_stack_depth - 1];
assert_true(loop_constant.value == assert_zero(
loop_constants[cf_loop_end.loop_id()].value); std::memcmp(&loop_constant,
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
cf_loop_end.loop_id()],
sizeof(loop_constant)));
uint32_t loop_iterator = uint32_t loop_iterator =
++state_.loop_iterators[state_.loop_stack_depth - 1]; ++state_.loop_iterators[state_.loop_stack_depth - 1];
if (loop_iterator < loop_constant.count && if (loop_iterator < loop_constant.count &&
@ -250,28 +250,31 @@ void ShaderInterpreter::Execute() {
} }
} }
const float* ShaderInterpreter::GetFloatConstant( const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const { uint32_t address, bool is_relative, bool relative_address_is_a0) const {
static const float zero[4] = {};
int32_t index = int32_t(address); int32_t index = int32_t(address);
if (is_relative) { if (is_relative) {
index += relative_address_is_a0 ? state_.address_register index += relative_address_is_a0 ? state_.address_register
: state_.GetLoopAddress(); : state_.GetLoopAddress();
} }
if (index < 0) { if (index < 0) {
return zero; return std::array<float, 4>();
} }
auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>( auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
: XE_GPU_REG_SQ_PS_CONST); : XE_GPU_REG_SQ_PS_CONST);
if (uint32_t(index) > base_and_size_minus_1.size) { if (uint32_t(index) > base_and_size_minus_1.size) {
return zero; return std::array<float, 4>();
} }
index += base_and_size_minus_1.base; index += base_and_size_minus_1.base;
if (index >= 512) { if (index >= 512) {
return zero; return std::array<float, 4>();
} }
return &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32; std::array<float, 4> value;
std::memcpy(value.data(),
&register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
sizeof(float) * 4);
return value;
} }
void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
@ -290,6 +293,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
const float* vector_src_ptr; const float* vector_src_ptr;
uint32_t vector_src_register = instr.src_reg(1 + i); uint32_t vector_src_register = instr.src_reg(1 + i);
bool vector_src_absolute = false; bool vector_src_absolute = false;
std::array<float, 4> vector_src_float_constant;
if (instr.src_is_temp(1 + i)) { if (instr.src_is_temp(1 + i)) {
vector_src_ptr = GetTempRegister( vector_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(vector_src_register), ucode::AluInstruction::src_temp_reg(vector_src_register),
@ -297,9 +301,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
vector_src_register); vector_src_register);
} else { } else {
vector_src_ptr = GetFloatConstant( vector_src_float_constant = GetFloatConstant(
vector_src_register, instr.src_const_is_addressed(1 + i), vector_src_register, instr.src_const_is_addressed(1 + i),
instr.is_const_address_register_relative()); instr.is_const_address_register_relative());
vector_src_ptr = vector_src_float_constant.data();
} }
uint32_t vector_src_absolute_mask = uint32_t vector_src_absolute_mask =
~(uint32_t(vector_src_absolute) << 31); ~(uint32_t(vector_src_absolute) << 31);
@ -334,16 +339,18 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break; } break;
case ucode::AluVectorOpcode::kMax: { case ucode::AluVectorOpcode::kMax: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i] vector_result[i] =
? vector_operands[0][i] std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
: vector_operands[1][i]; ? vector_operands[0][i]
: vector_operands[1][i];
} }
} break; } break;
case ucode::AluVectorOpcode::kMin: { case ucode::AluVectorOpcode::kMin: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] < vector_operands[1][i] vector_result[i] =
? vector_operands[0][i] std::isless(vector_operands[0][i], vector_operands[1][i])
: vector_operands[1][i]; ? vector_operands[0][i]
: vector_operands[1][i];
} }
} break; } break;
case ucode::AluVectorOpcode::kSeq: { case ucode::AluVectorOpcode::kSeq: {
@ -354,14 +361,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break; } break;
case ucode::AluVectorOpcode::kSgt: { case ucode::AluVectorOpcode::kSgt: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_result[i] = float(
float(vector_operands[0][i] > vector_operands[1][i]); std::isgreater(vector_operands[0][i], vector_operands[1][i]));
} }
} break; } break;
case ucode::AluVectorOpcode::kSge: { case ucode::AluVectorOpcode::kSge: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_result[i] = float(std::isgreaterequal(vector_operands[0][i],
float(vector_operands[0][i] >= vector_operands[1][i]); vector_operands[1][i]));
} }
} break; } break;
case ucode::AluVectorOpcode::kSne: { case ucode::AluVectorOpcode::kSne: {
@ -407,14 +414,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break; } break;
case ucode::AluVectorOpcode::kCndGe: { case ucode::AluVectorOpcode::kCndGe: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= 0.0f vector_result[i] = std::isgreaterequal(vector_operands[0][i], 0.0f)
? vector_operands[1][i] ? vector_operands[1][i]
: vector_operands[2][i]; : vector_operands[2][i];
} }
} break; } break;
case ucode::AluVectorOpcode::kCndGt: { case ucode::AluVectorOpcode::kCndGt: {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] > 0.0f vector_result[i] = std::isgreater(vector_operands[0][i], 0.0f)
? vector_operands[1][i] ? vector_operands[1][i]
: vector_operands[2][i]; : vector_operands[2][i];
} }
@ -466,32 +473,38 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z); float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z);
// Result is T coordinate, S coordinate, 2 * major axis, face ID. // Result is T coordinate, S coordinate, 2 * major axis, face ID.
if (z_abs >= x_abs && z_abs >= y_abs) { if (z_abs >= x_abs && z_abs >= y_abs) {
bool z_negative = std::isless(z, 0.0f);
vector_result[0] = -y; vector_result[0] = -y;
vector_result[1] = z < 0.0f ? -x : x; vector_result[1] = z_negative ? -x : x;
vector_result[2] = z; vector_result[2] = z;
vector_result[3] = z < 0.0f ? 5.0f : 4.0f; vector_result[3] = z_negative ? 5.0f : 4.0f;
} else if (y_abs >= x_abs) { } else if (y_abs >= x_abs) {
vector_result[0] = y < 0.0f ? -z : z; bool y_negative = std::isless(y, 0.0f);
vector_result[0] = y_negative ? -z : z;
vector_result[1] = x; vector_result[1] = x;
vector_result[2] = y; vector_result[2] = y;
vector_result[3] = y < 0.0f ? 3.0f : 2.0f; vector_result[3] = y_negative ? 3.0f : 2.0f;
} else { } else {
bool x_negative = std::isless(x, 0.0f);
vector_result[0] = -y; vector_result[0] = -y;
vector_result[1] = x < 0.0f ? z : -z; vector_result[1] = x_negative ? z : -z;
vector_result[2] = x; vector_result[2] = x;
vector_result[3] = x < 0.0f ? 1.0f : 0.0f; vector_result[3] = x_negative ? 1.0f : 0.0f;
} }
vector_result[2] *= 2.0f; vector_result[2] *= 2.0f;
} break; } break;
case ucode::AluVectorOpcode::kMax4: { case ucode::AluVectorOpcode::kMax4: {
if (vector_operands[0][0] >= vector_operands[0][1] && if (std::isgreaterequal(vector_operands[0][0], vector_operands[0][1]) &&
vector_operands[0][0] >= vector_operands[0][2] && std::isgreaterequal(vector_operands[0][0], vector_operands[0][2]) &&
vector_operands[0][0] >= vector_operands[0][3]) { std::isgreaterequal(vector_operands[0][0], vector_operands[0][3])) {
vector_result[0] = vector_operands[0][0]; vector_result[0] = vector_operands[0][0];
} else if (vector_operands[0][1] >= vector_operands[0][2] && } else if (std::isgreaterequal(vector_operands[0][1],
vector_operands[0][1] >= vector_operands[0][3]) { vector_operands[0][2]) &&
std::isgreaterequal(vector_operands[0][1],
vector_operands[0][3])) {
vector_result[0] = vector_operands[0][1]; vector_result[0] = vector_operands[0][1];
} else if (vector_operands[0][2] >= vector_operands[0][3]) { } else if (std::isgreaterequal(vector_operands[0][2],
vector_operands[0][3])) {
vector_result[0] = vector_operands[0][2]; vector_result[0] = vector_operands[0][2];
} else { } else {
vector_result[0] = vector_operands[0][3]; vector_result[0] = vector_operands[0][3];
@ -517,21 +530,21 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
case ucode::AluVectorOpcode::kSetpGtPush: { case ucode::AluVectorOpcode::kSetpGtPush: {
state_.predicate = state_.predicate = vector_operands[0][3] == 0.0f &&
vector_operands[0][3] == 0.0f && vector_operands[1][3] > 0.0f; std::isgreater(vector_operands[1][3], 0.0f);
vector_result[0] = vector_result[0] = (vector_operands[0][0] == 0.0f &&
(vector_operands[0][0] == 0.0f && vector_operands[1][0] > 0.0f) std::isgreater(vector_operands[1][0], 0.0f))
? 0.0f ? 0.0f
: vector_operands[0][0] + 1.0f; : vector_operands[0][0] + 1.0f;
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
case ucode::AluVectorOpcode::kSetpGePush: { case ucode::AluVectorOpcode::kSetpGePush: {
state_.predicate = state_.predicate = vector_operands[0][3] == 0.0f &&
vector_operands[0][3] == 0.0f && vector_operands[1][3] >= 0.0f; std::isgreaterequal(vector_operands[1][3], 0.0f);
vector_result[0] = vector_result[0] = (vector_operands[0][0] == 0.0f &&
(vector_operands[0][0] == 0.0f && vector_operands[1][0] >= 0.0f) std::isgreaterequal(vector_operands[1][0], 0.0f))
? 0.0f ? 0.0f
: vector_operands[0][0] + 1.0f; : vector_operands[0][0] + 1.0f;
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
// Not implementing pixel kill currently, the interpreter is currently // Not implementing pixel kill currently, the interpreter is currently
@ -545,19 +558,19 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
case ucode::AluVectorOpcode::kKillGt: { case ucode::AluVectorOpcode::kKillGt: {
vector_result[0] = vector_result[0] = float(
float(vector_operands[0][0] > vector_operands[1][0] || std::isgreater(vector_operands[0][0], vector_operands[1][0]) ||
vector_operands[0][1] > vector_operands[1][1] || std::isgreater(vector_operands[0][1], vector_operands[1][1]) ||
vector_operands[0][2] > vector_operands[1][2] || std::isgreater(vector_operands[0][2], vector_operands[1][2]) ||
vector_operands[0][3] > vector_operands[1][3]); std::isgreater(vector_operands[0][3], vector_operands[1][3]));
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
case ucode::AluVectorOpcode::kKillGe: { case ucode::AluVectorOpcode::kKillGe: {
vector_result[0] = vector_result[0] = float(
float(vector_operands[0][0] >= vector_operands[1][0] || std::isgreaterequal(vector_operands[0][0], vector_operands[1][0]) ||
vector_operands[0][1] >= vector_operands[1][1] || std::isgreaterequal(vector_operands[0][1], vector_operands[1][1]) ||
vector_operands[0][2] >= vector_operands[1][2] || std::isgreaterequal(vector_operands[0][2], vector_operands[1][2]) ||
vector_operands[0][3] >= vector_operands[1][3]); std::isgreaterequal(vector_operands[0][3], vector_operands[1][3]));
replicate_vector_result_x = true; replicate_vector_result_x = true;
} break; } break;
case ucode::AluVectorOpcode::kKillNe: { case ucode::AluVectorOpcode::kKillNe: {
@ -578,14 +591,13 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_result[3] = vector_operands[1][3]; vector_result[3] = vector_operands[1][3];
} break; } break;
case ucode::AluVectorOpcode::kMaxA: { case ucode::AluVectorOpcode::kMaxA: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t(std::floor( state_.address_register = int32_t(std::floor(
std::min(255.0f, std::max(-256.0f, vector_operands[0][3])) + 0.5f)); xe::clamp_float(vector_operands[0][3], -256.0f, 255.0f) + 0.5f));
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i] vector_result[i] =
? vector_operands[0][i] std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
: vector_operands[1][i]; ? vector_operands[0][i]
: vector_operands[1][i];
} }
} break; } break;
default: { default: {
@ -611,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
// r#/c#.w or r#/c#.wx. // r#/c#.w or r#/c#.wx.
const float* scalar_src_ptr; const float* scalar_src_ptr;
uint32_t scalar_src_register = instr.src_reg(3); uint32_t scalar_src_register = instr.src_reg(3);
std::array<float, 4> scalar_src_float_constant;
if (instr.src_is_temp(3)) { if (instr.src_is_temp(3)) {
scalar_src_ptr = GetTempRegister( scalar_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(scalar_src_register), ucode::AluInstruction::src_temp_reg(scalar_src_register),
@ -618,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
scalar_src_register); scalar_src_register);
} else { } else {
scalar_src_ptr = GetFloatConstant( scalar_src_float_constant = GetFloatConstant(
scalar_src_register, instr.src_const_is_addressed(3), scalar_src_register, instr.src_const_is_addressed(3),
instr.is_const_address_register_relative()); instr.is_const_address_register_relative());
scalar_src_ptr = scalar_src_float_constant.data();
} }
uint32_t scalar_src_swizzle = instr.src_swizzle(3); uint32_t scalar_src_swizzle = instr.src_swizzle(3);
scalar_operand_component_count = scalar_operand_component_count =
@ -688,7 +702,8 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
case ucode::AluScalarOpcode::kMulsPrev2: { case ucode::AluScalarOpcode::kMulsPrev2: {
if (state_.previous_scalar == -FLT_MAX || if (state_.previous_scalar == -FLT_MAX ||
!std::isfinite(state_.previous_scalar) || !std::isfinite(state_.previous_scalar) ||
!std::isfinite(scalar_operands[1]) || scalar_operands[1] <= 0.0f) { !std::isfinite(scalar_operands[1]) ||
std::islessequal(scalar_operands[1], 0.0f)) {
state_.previous_scalar = -FLT_MAX; state_.previous_scalar = -FLT_MAX;
} else { } else {
// Direct3D 9 behavior (0 or denormal * anything = +0). // Direct3D 9 behavior (0 or denormal * anything = +0).
@ -699,23 +714,26 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} }
} break; } break;
case ucode::AluScalarOpcode::kMaxs: { case ucode::AluScalarOpcode::kMaxs: {
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] state_.previous_scalar =
? scalar_operands[0] std::isgreaterequal(scalar_operands[0], scalar_operands[1])
: scalar_operands[1]; ? scalar_operands[0]
: scalar_operands[1];
} break; } break;
case ucode::AluScalarOpcode::kMins: { case ucode::AluScalarOpcode::kMins: {
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] state_.previous_scalar =
? scalar_operands[0] std::isless(scalar_operands[0], scalar_operands[1])
: scalar_operands[1]; ? scalar_operands[0]
: scalar_operands[1];
} break; } break;
case ucode::AluScalarOpcode::kSeqs: { case ucode::AluScalarOpcode::kSeqs: {
state_.previous_scalar = float(scalar_operands[0] == 0.0f); state_.previous_scalar = float(scalar_operands[0] == 0.0f);
} break; } break;
case ucode::AluScalarOpcode::kSgts: { case ucode::AluScalarOpcode::kSgts: {
state_.previous_scalar = float(scalar_operands[0] > 0.0f); state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
} break; } break;
case ucode::AluScalarOpcode::kSges: { case ucode::AluScalarOpcode::kSges: {
state_.previous_scalar = float(scalar_operands[0] >= 0.0f); state_.previous_scalar =
float(std::isgreaterequal(scalar_operands[0], 0.0f));
} break; } break;
case ucode::AluScalarOpcode::kSnes: { case ucode::AluScalarOpcode::kSnes: {
state_.previous_scalar = float(scalar_operands[0] != 0.0f); state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@ -781,22 +799,20 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]); state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]);
} break; } break;
case ucode::AluScalarOpcode::kMaxAs: { case ucode::AluScalarOpcode::kMaxAs: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t(std::floor( state_.address_register = int32_t(std::floor(
std::min(255.0f, std::max(-256.0f, scalar_operands[0])) + 0.5f)); xe::clamp_float(scalar_operands[0], -256.0f, 255.0f) + 0.5f));
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] state_.previous_scalar =
? scalar_operands[0] std::isgreaterequal(scalar_operands[0], scalar_operands[1])
: scalar_operands[1]; ? scalar_operands[0]
: scalar_operands[1];
} break; } break;
case ucode::AluScalarOpcode::kMaxAsf: { case ucode::AluScalarOpcode::kMaxAsf: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t( state_.address_register = int32_t(
std::floor(std::min(255.0f, std::max(-256.0f, scalar_operands[0])))); std::floor(xe::clamp_float(scalar_operands[0], -256.0f, 255.0f)));
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] state_.previous_scalar =
? scalar_operands[0] std::isgreaterequal(scalar_operands[0], scalar_operands[1])
: scalar_operands[1]; ? scalar_operands[0]
: scalar_operands[1];
} break; } break;
case ucode::AluScalarOpcode::kSubs: case ucode::AluScalarOpcode::kSubs:
case ucode::AluScalarOpcode::kSubsc0: case ucode::AluScalarOpcode::kSubsc0:
@ -815,11 +831,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = float(!state_.predicate); state_.previous_scalar = float(!state_.predicate);
} break; } break;
case ucode::AluScalarOpcode::kSetpGt: { case ucode::AluScalarOpcode::kSetpGt: {
state_.predicate = scalar_operands[0] > 0.0f; state_.predicate = std::isgreater(scalar_operands[0], 0.0f);
state_.previous_scalar = float(!state_.predicate); state_.previous_scalar = float(!state_.predicate);
} break; } break;
case ucode::AluScalarOpcode::kSetpGe: { case ucode::AluScalarOpcode::kSetpGe: {
state_.predicate = scalar_operands[0] >= 0.0f; state_.predicate = std::isgreaterequal(scalar_operands[0], 0.0f);
state_.previous_scalar = float(!state_.predicate); state_.previous_scalar = float(!state_.predicate);
} break; } break;
case ucode::AluScalarOpcode::kSetpInv: { case ucode::AluScalarOpcode::kSetpInv: {
@ -831,7 +847,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break; } break;
case ucode::AluScalarOpcode::kSetpPop: { case ucode::AluScalarOpcode::kSetpPop: {
float new_counter = scalar_operands[0] - 1.0f; float new_counter = scalar_operands[0] - 1.0f;
state_.predicate = new_counter <= 0.0f; state_.predicate = std::islessequal(new_counter, 0.0f);
state_.previous_scalar = state_.predicate ? 0.0f : new_counter; state_.previous_scalar = state_.predicate ? 0.0f : new_counter;
} break; } break;
case ucode::AluScalarOpcode::kSetpClr: { case ucode::AluScalarOpcode::kSetpClr: {
@ -848,10 +864,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = float(scalar_operands[0] == 0.0f); state_.previous_scalar = float(scalar_operands[0] == 0.0f);
} break; } break;
case ucode::AluScalarOpcode::kKillsGt: { case ucode::AluScalarOpcode::kKillsGt: {
state_.previous_scalar = float(scalar_operands[0] > 0.0f); state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
} break; } break;
case ucode::AluScalarOpcode::kKillsGe: { case ucode::AluScalarOpcode::kKillsGe: {
state_.previous_scalar = float(scalar_operands[0] >= 0.0f); state_.previous_scalar =
float(std::isgreaterequal(scalar_operands[0], 0.0f));
} break; } break;
case ucode::AluScalarOpcode::kKillsNe: { case ucode::AluScalarOpcode::kKillsNe: {
state_.previous_scalar = float(scalar_operands[0] != 0.0f); state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@ -877,11 +894,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
if (instr.vector_clamp()) { if (instr.vector_clamp()) {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = xe::saturate_unsigned(vector_result[i]); vector_result[i] = xe::saturate(vector_result[i]);
} }
} }
float scalar_result = instr.scalar_clamp() float scalar_result = instr.scalar_clamp()
? xe::saturate_unsigned(state_.previous_scalar) ? xe::saturate(state_.previous_scalar)
: state_.previous_scalar; : state_.previous_scalar;
uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask(); uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask();
@ -977,10 +994,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
state_.vfetch_full_last = instr; state_.vfetch_full_last = instr;
} }
xenos::xe_gpu_vertex_fetch_t fetch_constant = xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
*reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>( state_.vfetch_full_last.fetch_constant_index());
&register_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
state_.vfetch_full_last.fetch_constant_index()]);
if (!instr.is_mini_fetch()) { if (!instr.is_mini_fetch()) {
// Get the part of the address that depends on vfetch_full data. // Get the part of the address that depends on vfetch_full data.

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_INTERPRETER_H_ #define XENIA_GPU_SHADER_INTERPRETER_H_
#include <algorithm> #include <algorithm>
#include <array>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
@ -117,8 +118,8 @@ class ShaderInterpreter {
float* GetTempRegister(uint32_t address, bool is_relative) { float* GetTempRegister(uint32_t address, bool is_relative) {
return temp_registers_[GetTempRegisterIndex(address, is_relative)]; return temp_registers_[GetTempRegisterIndex(address, is_relative)];
} }
const float* GetFloatConstant(uint32_t address, bool is_relative, const std::array<float, 4> GetFloatConstant(
bool relative_address_is_a0) const; uint32_t address, bool is_relative, bool relative_address_is_a0) const;
void ExecuteAluInstruction(ucode::AluInstruction instr); void ExecuteAluInstruction(ucode::AluInstruction instr);
void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle, void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,

View File

@ -13,6 +13,8 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "xenia/base/assert.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -101,5 +103,195 @@ spv::Id SpirvBuilder::createTriBuiltinCall(spv::Id result_type,
return result; return result;
} }
SpirvBuilder::IfBuilder::IfBuilder(spv::Id condition, unsigned int control,
SpirvBuilder& builder,
unsigned int thenWeight,
unsigned int elseWeight)
: builder(builder),
condition(condition),
control(control),
thenWeight(thenWeight),
elseWeight(elseWeight),
function(builder.getBuildPoint()->getParent()) {
// Make the blocks, but only put the then-block into the function, the
// else-block and merge-block will be added later, in order, after earlier
// code is emitted.
thenBlock = new spv::Block(builder.getUniqueId(), function);
elseBlock = nullptr;
mergeBlock = new spv::Block(builder.getUniqueId(), function);
// Save the current block, so that we can add in the flow control split when
// makeEndIf is called.
headerBlock = builder.getBuildPoint();
spv::Id headerBlockId = headerBlock->getId();
thenPhiParent = headerBlockId;
elsePhiParent = headerBlockId;
function.addBlock(thenBlock);
builder.setBuildPoint(thenBlock);
}
void SpirvBuilder::IfBuilder::makeBeginElse(bool branchToMerge) {
#ifndef NDEBUG
assert_true(currentBranch == Branch::kThen);
#endif
if (branchToMerge) {
// Close out the "then" by having it jump to the mergeBlock.
thenPhiParent = builder.getBuildPoint()->getId();
builder.createBranch(mergeBlock);
}
// Make the first else block and add it to the function.
elseBlock = new spv::Block(builder.getUniqueId(), function);
function.addBlock(elseBlock);
// Start building the else block.
builder.setBuildPoint(elseBlock);
#ifndef NDEBUG
currentBranch = Branch::kElse;
#endif
}
void SpirvBuilder::IfBuilder::makeEndIf(bool branchToMerge) {
#ifndef NDEBUG
assert_true(currentBranch == Branch::kThen || currentBranch == Branch::kElse);
#endif
if (branchToMerge) {
// Jump to the merge block.
(elseBlock ? elsePhiParent : thenPhiParent) =
builder.getBuildPoint()->getId();
builder.createBranch(mergeBlock);
}
// Go back to the headerBlock and make the flow control split.
builder.setBuildPoint(headerBlock);
builder.createSelectionMerge(mergeBlock, control);
{
spv::Block* falseBlock = elseBlock ? elseBlock : mergeBlock;
std::unique_ptr<spv::Instruction> branch =
std::make_unique<spv::Instruction>(spv::OpBranchConditional);
branch->addIdOperand(condition);
branch->addIdOperand(thenBlock->getId());
branch->addIdOperand(falseBlock->getId());
if (thenWeight || elseWeight) {
branch->addImmediateOperand(thenWeight);
branch->addImmediateOperand(elseWeight);
}
builder.getBuildPoint()->addInstruction(std::move(branch));
thenBlock->addPredecessor(builder.getBuildPoint());
falseBlock->addPredecessor(builder.getBuildPoint());
}
// Add the merge block to the function.
function.addBlock(mergeBlock);
builder.setBuildPoint(mergeBlock);
#ifndef NDEBUG
currentBranch = Branch::kMerge;
#endif
}
spv::Id SpirvBuilder::IfBuilder::createMergePhi(spv::Id then_variable,
spv::Id else_variable) const {
assert_true(builder.getBuildPoint() == mergeBlock);
return builder.createQuadOp(spv::OpPhi, builder.getTypeId(then_variable),
then_variable, getThenPhiParent(), else_variable,
getElsePhiParent());
}
SpirvBuilder::SwitchBuilder::SwitchBuilder(spv::Id selector,
unsigned int selection_control,
SpirvBuilder& builder)
: builder_(builder),
selector_(selector),
selection_control_(selection_control),
function_(builder.getBuildPoint()->getParent()),
header_block_(builder.getBuildPoint()),
default_phi_parent_(builder.getBuildPoint()->getId()) {
merge_block_ = new spv::Block(builder_.getUniqueId(), function_);
}
void SpirvBuilder::SwitchBuilder::makeBeginDefault() {
assert_null(default_block_);
endSegment();
default_block_ = new spv::Block(builder_.getUniqueId(), function_);
function_.addBlock(default_block_);
default_block_->addPredecessor(header_block_);
builder_.setBuildPoint(default_block_);
current_branch_ = Branch::kDefault;
}
void SpirvBuilder::SwitchBuilder::makeBeginCase(unsigned int literal) {
endSegment();
auto case_block = new spv::Block(builder_.getUniqueId(), function_);
function_.addBlock(case_block);
cases_.emplace_back(literal, case_block->getId());
case_block->addPredecessor(header_block_);
builder_.setBuildPoint(case_block);
current_branch_ = Branch::kCase;
}
void SpirvBuilder::SwitchBuilder::addCurrentCaseLiteral(unsigned int literal) {
assert_true(current_branch_ == Branch::kCase);
cases_.emplace_back(literal, cases_.back().second);
}
void SpirvBuilder::SwitchBuilder::makeEndSwitch() {
endSegment();
builder_.setBuildPoint(header_block_);
builder_.createSelectionMerge(merge_block_, selection_control_);
std::unique_ptr<spv::Instruction> switch_instruction =
std::make_unique<spv::Instruction>(spv::OpSwitch);
switch_instruction->addIdOperand(selector_);
if (default_block_) {
switch_instruction->addIdOperand(default_block_->getId());
} else {
switch_instruction->addIdOperand(merge_block_->getId());
merge_block_->addPredecessor(header_block_);
}
for (const std::pair<unsigned int, spv::Id>& case_pair : cases_) {
switch_instruction->addImmediateOperand(case_pair.first);
switch_instruction->addIdOperand(case_pair.second);
}
builder_.getBuildPoint()->addInstruction(std::move(switch_instruction));
function_.addBlock(merge_block_);
builder_.setBuildPoint(merge_block_);
current_branch_ = Branch::kMerge;
}
void SpirvBuilder::SwitchBuilder::endSegment() {
assert_true(current_branch_ == Branch::kSelection ||
current_branch_ == Branch::kDefault ||
current_branch_ == Branch::kCase);
if (current_branch_ == Branch::kSelection) {
return;
}
if (!builder_.getBuildPoint()->isTerminated()) {
builder_.createBranch(merge_block_);
if (current_branch_ == Branch::kDefault) {
default_phi_parent_ = builder_.getBuildPoint()->getId();
}
}
current_branch_ = Branch::kSelection;
}
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -10,7 +10,13 @@
#ifndef XENIA_GPU_SPIRV_BUILDER_H_ #ifndef XENIA_GPU_SPIRV_BUILDER_H_
#define XENIA_GPU_SPIRV_BUILDER_H_ #define XENIA_GPU_SPIRV_BUILDER_H_
#include <memory>
#include <optional>
#include <utility>
#include <vector>
#include "third_party/glslang/SPIRV/SpvBuilder.h" #include "third_party/glslang/SPIRV/SpvBuilder.h"
#include "xenia/base/assert.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -42,6 +48,104 @@ class SpirvBuilder : public spv::Builder {
spv::Id createTriBuiltinCall(spv::Id result_type, spv::Id builtins, spv::Id createTriBuiltinCall(spv::Id result_type, spv::Id builtins,
int entry_point, spv::Id operand1, int entry_point, spv::Id operand1,
spv::Id operand2, spv::Id operand3); spv::Id operand2, spv::Id operand3);
// Helper to use for building nested control flow with if-then-else with
// additions over SpvBuilder::If.
class IfBuilder {
public:
IfBuilder(spv::Id condition, unsigned int control, SpirvBuilder& builder,
unsigned int thenWeight = 0, unsigned int elseWeight = 0);
~IfBuilder() {
#ifndef NDEBUG
assert_true(currentBranch == Branch::kMerge);
#endif
}
void makeBeginElse(bool branchToMerge = true);
void makeEndIf(bool branchToMerge = true);
// If there's no then/else block that branches to the merge block, the phi
// parent is the header block - this simplifies then-only usage.
spv::Id getThenPhiParent() const { return thenPhiParent; }
spv::Id getElsePhiParent() const { return elsePhiParent; }
spv::Id createMergePhi(spv::Id then_variable, spv::Id else_variable) const;
private:
enum class Branch {
kThen,
kElse,
kMerge,
};
IfBuilder(const IfBuilder& ifBuilder) = delete;
IfBuilder& operator=(const IfBuilder& ifBuilder) = delete;
SpirvBuilder& builder;
spv::Id condition;
unsigned int control;
unsigned int thenWeight;
unsigned int elseWeight;
spv::Function& function;
spv::Block* headerBlock;
spv::Block* thenBlock;
spv::Block* elseBlock;
spv::Block* mergeBlock;
spv::Id thenPhiParent;
spv::Id elsePhiParent;
#ifndef NDEBUG
Branch currentBranch = Branch::kThen;
#endif
};
// Simpler and more flexible (such as multiple cases pointing to the same
// block) compared to makeSwitch.
class SwitchBuilder {
public:
SwitchBuilder(spv::Id selector, unsigned int selection_control,
SpirvBuilder& builder);
~SwitchBuilder() { assert_true(current_branch_ == Branch::kMerge); }
void makeBeginDefault();
void makeBeginCase(unsigned int literal);
void addCurrentCaseLiteral(unsigned int literal);
void makeEndSwitch();
// If there's no default block that branches to the merge block, the phi
// parent is the header block - this simplifies case-only usage.
spv::Id getDefaultPhiParent() const { return default_phi_parent_; }
private:
enum class Branch {
kSelection,
kDefault,
kCase,
kMerge,
};
void endSegment();
SpirvBuilder& builder_;
spv::Id selector_;
unsigned int selection_control_;
spv::Function& function_;
spv::Block* header_block_;
spv::Block* merge_block_;
spv::Block* default_block_ = nullptr;
std::vector<std::pair<unsigned int, spv::Id>> cases_;
spv::Id default_phi_parent_;
Branch current_branch_ = Branch::kSelection;
};
}; };
} // namespace gpu } // namespace gpu

View File

@ -30,30 +30,35 @@ namespace gpu {
SpirvShaderTranslator::Features::Features(bool all) SpirvShaderTranslator::Features::Features(bool all)
: spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0),
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
full_draw_index_uint32(all),
vertex_pipeline_stores_and_atomics(all),
fragment_stores_and_atomics(all),
clip_distance(all), clip_distance(all),
cull_distance(all), cull_distance(all),
demote_to_helper_invocation(all),
fragment_shader_sample_interlock(all),
full_draw_index_uint32(all),
image_view_format_swizzle(all), image_view_format_swizzle(all),
signed_zero_inf_nan_preserve_float32(all), signed_zero_inf_nan_preserve_float32(all),
denorm_flush_to_zero_float32(all), denorm_flush_to_zero_float32(all),
rounding_mode_rte_float32(all) {} rounding_mode_rte_float32(all),
fragment_shader_sample_interlock(all),
demote_to_helper_invocation(all) {}
SpirvShaderTranslator::Features::Features( SpirvShaderTranslator::Features::Features(
const ui::vulkan::VulkanProvider::DeviceInfo& device_info) const ui::vulkan::VulkanProvider::DeviceInfo& device_info)
: max_storage_buffer_range(device_info.maxStorageBufferRange), : max_storage_buffer_range(device_info.maxStorageBufferRange),
full_draw_index_uint32(device_info.fullDrawIndexUint32),
vertex_pipeline_stores_and_atomics(
device_info.vertexPipelineStoresAndAtomics),
fragment_stores_and_atomics(device_info.fragmentStoresAndAtomics),
clip_distance(device_info.shaderClipDistance), clip_distance(device_info.shaderClipDistance),
cull_distance(device_info.shaderCullDistance), cull_distance(device_info.shaderCullDistance),
demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation),
fragment_shader_sample_interlock(
device_info.fragmentShaderSampleInterlock),
full_draw_index_uint32(device_info.fullDrawIndexUint32),
image_view_format_swizzle(device_info.imageViewFormatSwizzle), image_view_format_swizzle(device_info.imageViewFormatSwizzle),
signed_zero_inf_nan_preserve_float32( signed_zero_inf_nan_preserve_float32(
device_info.shaderSignedZeroInfNanPreserveFloat32), device_info.shaderSignedZeroInfNanPreserveFloat32),
denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32), denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32),
rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) { rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32),
fragment_shader_sample_interlock(
device_info.fragmentShaderSampleInterlock),
demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation) {
if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
spirv_version = spv::Spv_1_5; spirv_version = spv::Spv_1_5;
} else if (device_info.ext_1_2_VK_KHR_spirv_1_4) { } else if (device_info.ext_1_2_VK_KHR_spirv_1_4) {
@ -117,6 +122,14 @@ void SpirvShaderTranslator::Reset() {
main_interface_.clear(); main_interface_.clear();
var_main_registers_ = spv::NoResult; var_main_registers_ = spv::NoResult;
var_main_memexport_address_ = spv::NoResult;
for (size_t memexport_eM_index = 0;
memexport_eM_index < xe::countof(var_main_memexport_data_);
++memexport_eM_index) {
var_main_memexport_data_[memexport_eM_index] = spv::NoResult;
}
var_main_memexport_data_written_ = spv::NoResult;
main_memexport_allowed_ = spv::NoResult;
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult; var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
var_main_kill_pixel_ = spv::NoResult; var_main_kill_pixel_ = spv::NoResult;
var_main_fsi_color_written_ = spv::NoResult; var_main_fsi_color_written_ = spv::NoResult;
@ -310,6 +323,8 @@ void SpirvShaderTranslator::StartTranslation() {
main_interface_.push_back(uniform_system_constants_); main_interface_.push_back(uniform_system_constants_);
} }
bool memexport_used = IsMemoryExportUsed();
if (!is_depth_only_fragment_shader_) { if (!is_depth_only_fragment_shader_) {
// Common uniform buffer - float constants. // Common uniform buffer - float constants.
uint32_t float_constant_count = uint32_t float_constant_count =
@ -420,9 +435,10 @@ void SpirvShaderTranslator::StartTranslation() {
builder_->addMemberName(type_shared_memory, 0, "shared_memory"); builder_->addMemberName(type_shared_memory, 0, "shared_memory");
builder_->addMemberDecoration(type_shared_memory, 0, builder_->addMemberDecoration(type_shared_memory, 0,
spv::DecorationRestrict); spv::DecorationRestrict);
// TODO(Triang3l): Make writable when memexport is implemented. if (!memexport_used) {
builder_->addMemberDecoration(type_shared_memory, 0, builder_->addMemberDecoration(type_shared_memory, 0,
spv::DecorationNonWritable); spv::DecorationNonWritable);
}
builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset,
0); 0);
builder_->addDecoration(type_shared_memory, builder_->addDecoration(type_shared_memory,
@ -509,6 +525,24 @@ void SpirvShaderTranslator::StartTranslation() {
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
type_register_array, "xe_var_registers"); type_register_array, "xe_var_registers");
} }
if (memexport_used) {
var_main_memexport_address_ = builder_->createVariable(
spv::NoPrecision, spv::StorageClassFunction, type_float4_,
"xe_var_memexport_address", const_float4_0_);
uint8_t memexport_eM_remaining = current_shader().memexport_eM_written();
uint32_t memexport_eM_index;
while (
xe::bit_scan_forward(memexport_eM_remaining, &memexport_eM_index)) {
memexport_eM_remaining &= ~(uint8_t(1) << memexport_eM_index);
var_main_memexport_data_[memexport_eM_index] = builder_->createVariable(
spv::NoPrecision, spv::StorageClassFunction, type_float4_,
fmt::format("xe_var_memexport_data_{}", memexport_eM_index).c_str(),
const_float4_0_);
}
var_main_memexport_data_written_ = builder_->createVariable(
spv::NoPrecision, spv::StorageClassFunction, type_uint_,
"xe_var_memexport_data_written", const_uint_0_);
}
} }
// Write the execution model-specific prologue with access to variables in the // Write the execution model-specific prologue with access to variables in the
@ -647,6 +681,10 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
builder_->setBuildPoint(main_loop_merge_); builder_->setBuildPoint(main_loop_merge_);
} }
// Write data for the last memexport.
ExportToMemory(
current_shader().memexport_eM_potentially_written_before_end());
if (is_vertex_shader()) { if (is_vertex_shader()) {
CompleteVertexOrTessEvalShaderInMain(); CompleteVertexOrTessEvalShaderInMain();
} else if (is_pixel_shader()) { } else if (is_pixel_shader()) {
@ -1077,6 +1115,34 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
builder_->createBranch(main_loop_continue_); builder_->createBranch(main_loop_continue_);
} }
void SpirvShaderTranslator::ProcessAllocInstruction(
const ParsedAllocInstruction& instr, uint8_t export_eM) {
bool start_memexport = instr.type == ucode::AllocType::kMemory &&
current_shader().memexport_eM_written();
if (export_eM || start_memexport) {
CloseExecConditionals();
}
if (export_eM) {
ExportToMemory(export_eM);
// Reset which eM# elements have been written.
builder_->createStore(const_uint_0_, var_main_memexport_data_written_);
// Break dependencies from the previous memexport.
uint8_t export_eM_remaining = export_eM;
uint32_t eM_index;
while (xe::bit_scan_forward(export_eM_remaining, &eM_index)) {
export_eM_remaining &= ~(uint8_t(1) << eM_index);
builder_->createStore(const_float4_0_,
var_main_memexport_data_[eM_index]);
}
}
if (start_memexport) {
// Initialize eA to an invalid address.
builder_->createStore(const_float4_0_, var_main_memexport_address_);
}
}
spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant(
spv::Id scalar, spv::Id vector_type) { spv::Id scalar, spv::Id vector_type) {
bool is_constant = builder_->isConstant(scalar); bool is_constant = builder_->isConstant(scalar);
@ -1205,6 +1271,8 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
} }
void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
Modification shader_modification = GetSpirvShaderModification();
// The edge flag isn't used for any purpose by the translator. // The edge flag isn't used for any purpose by the translator.
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) { if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) {
id_vector_temp_.clear(); id_vector_temp_.clear();
@ -1244,11 +1312,40 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
} }
} }
Modification shader_modification = GetSpirvShaderModification();
// TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip, // TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip,
// start the vertex loop, and load the index there. // start the vertex loop, and load the index there.
// Check if memory export should be allowed for this host vertex of the guest
// primitive to make sure export is done only once for each guest vertex.
if (IsMemoryExportUsed()) {
spv::Id memexport_allowed_for_host_vertex_of_guest_primitive =
spv::NoResult;
if (shader_modification.vertex.host_vertex_shader_type ==
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
// Only for one host vertex for the point.
memexport_allowed_for_host_vertex_of_guest_primitive =
builder_->createBinOp(
spv::OpIEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_,
builder_->createUnaryOp(
spv::OpBitcast, type_uint_,
builder_->createLoad(input_vertex_index_,
spv::NoPrecision)),
builder_->makeUintConstant(3)),
const_uint_0_);
}
if (memexport_allowed_for_host_vertex_of_guest_primitive != spv::NoResult) {
main_memexport_allowed_ =
main_memexport_allowed_ != spv::NoResult
? builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, main_memexport_allowed_,
memexport_allowed_for_host_vertex_of_guest_primitive)
: memexport_allowed_for_host_vertex_of_guest_primitive;
}
}
// Load the vertex index or the tessellation parameters. // Load the vertex index or the tessellation parameters.
if (register_count()) { if (register_count()) {
// TODO(Triang3l): Barycentric coordinates and patch index. // TODO(Triang3l): Barycentric coordinates and patch index.
@ -1272,89 +1369,70 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
builder_->makeUintConstant(static_cast<unsigned int>( builder_->makeUintConstant(static_cast<unsigned int>(
kSysFlag_ComputeOrPrimitiveVertexIndexLoad))), kSysFlag_ComputeOrPrimitiveVertexIndexLoad))),
const_uint_0_); const_uint_0_);
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder load_vertex_index_if(
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock(); load_vertex_index, spv::SelectionControlDontFlattenMask, *builder_);
spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock(); spv::Id loaded_vertex_index;
builder_->createSelectionMerge(&block_load_vertex_index_merge,
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(load_vertex_index,
&block_load_vertex_index_start,
&block_load_vertex_index_merge);
builder_->setBuildPoint(&block_load_vertex_index_start);
// Check if the index is 32-bit.
spv::Id vertex_index_is_32bit = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(static_cast<unsigned int>(
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))),
const_uint_0_);
// Calculate the vertex index address in the shared memory.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
spv::Id vertex_index_address = builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_,
id_vector_temp_),
spv::NoPrecision),
builder_->createBinOp(
spv::OpShiftLeftLogical, type_uint_, vertex_index,
builder_->createTriOp(spv::OpSelect, type_uint_,
vertex_index_is_32bit, const_uint_2,
builder_->makeUintConstant(1))));
// Load the 32 bits containing the whole vertex index or two 16-bit
// vertex indices.
// TODO(Triang3l): Bounds checking.
spv::Id loaded_vertex_index =
LoadUint32FromSharedMemory(builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
vertex_index_address, const_uint_2)));
// Extract the 16-bit index from the loaded 32 bits if needed.
loaded_vertex_index = builder_->createTriOp(
spv::OpSelect, type_uint_, vertex_index_is_32bit,
loaded_vertex_index,
builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index,
builder_->createBinOp(
spv::OpShiftLeftLogical, type_uint_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
vertex_index_address, const_uint_2),
builder_->makeUintConstant(4 - 1)),
builder_->makeUintConstant(16)));
// Endian-swap the loaded index.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
loaded_vertex_index = EndianSwap32Uint(
loaded_vertex_index,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_,
id_vector_temp_),
spv::NoPrecision));
// Get the actual build point for phi.
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
builder_->createBranch(&block_load_vertex_index_merge);
// Select between the loaded index and the original index from Vulkan.
builder_->setBuildPoint(&block_load_vertex_index_merge);
{ {
std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op = // Check if the index is 32-bit.
std::make_unique<spv::Instruction>(builder_->getUniqueId(), spv::Id vertex_index_is_32bit = builder_->createBinOp(
type_uint_, spv::OpPhi); spv::OpINotEqual, type_bool_,
loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index); builder_->createBinOp(
loaded_vertex_index_phi_op->addIdOperand( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
block_load_vertex_index_end.getId()); builder_->makeUintConstant(static_cast<unsigned int>(
loaded_vertex_index_phi_op->addIdOperand(vertex_index); kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))),
loaded_vertex_index_phi_op->addIdOperand( const_uint_0_);
block_load_vertex_index_pre.getId()); // Calculate the vertex index address in the shared memory.
vertex_index = loaded_vertex_index_phi_op->getResultId(); id_vector_temp_.clear();
builder_->getBuildPoint()->addInstruction( id_vector_temp_.push_back(
std::move(loaded_vertex_index_phi_op)); builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
spv::Id vertex_index_address = builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_,
id_vector_temp_),
spv::NoPrecision),
builder_->createBinOp(
spv::OpShiftLeftLogical, type_uint_, vertex_index,
builder_->createTriOp(spv::OpSelect, type_uint_,
vertex_index_is_32bit, const_uint_2,
builder_->makeUintConstant(1))));
// Load the 32 bits containing the whole vertex index or two 16-bit
// vertex indices.
// TODO(Triang3l): Bounds checking.
loaded_vertex_index =
LoadUint32FromSharedMemory(builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
vertex_index_address, const_uint_2)));
// Extract the 16-bit index from the loaded 32 bits if needed.
loaded_vertex_index = builder_->createTriOp(
spv::OpSelect, type_uint_, vertex_index_is_32bit,
loaded_vertex_index,
builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index,
builder_->createBinOp(
spv::OpShiftLeftLogical, type_uint_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
vertex_index_address, const_uint_2),
builder_->makeUintConstant(4 - 1)),
builder_->makeUintConstant(16)));
// Endian-swap the loaded index.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
loaded_vertex_index = EndianSwap32Uint(
loaded_vertex_index,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_,
id_vector_temp_),
spv::NoPrecision));
} }
load_vertex_index_if.makeEndIf();
// Select between the loaded index and the original index from Vulkan.
vertex_index = load_vertex_index_if.createMergePhi(loaded_vertex_index,
vertex_index);
} else { } else {
// TODO(Triang3l): Close line loop primitive. // TODO(Triang3l): Close line loop primitive.
// Load the unswapped index as uint for swapping, or for indirect // Load the unswapped index as uint for swapping, or for indirect
@ -1368,53 +1446,35 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
builder_->makeUintConstant( builder_->makeUintConstant(
static_cast<unsigned int>(kSysFlag_VertexIndexLoad))), static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
const_uint_0_); const_uint_0_);
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder load_vertex_index_if(
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock(); load_vertex_index, spv::SelectionControlDontFlattenMask,
spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock(); *builder_);
builder_->createSelectionMerge(&block_load_vertex_index_merge, spv::Id loaded_vertex_index;
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(load_vertex_index,
&block_load_vertex_index_start,
&block_load_vertex_index_merge);
builder_->setBuildPoint(&block_load_vertex_index_start);
// Load the 32-bit index.
// TODO(Triang3l): Bounds checking.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
spv::Id loaded_vertex_index =
LoadUint32FromSharedMemory(builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_,
builder_->createLoad(
builder_->createAccessChain(
spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision),
builder_->makeUintConstant(2)),
vertex_index)));
// Get the actual build point for phi.
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
builder_->createBranch(&block_load_vertex_index_merge);
// Select between the loaded index and the original index from Vulkan.
builder_->setBuildPoint(&block_load_vertex_index_merge);
{ {
std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op = // Load the 32-bit index.
std::make_unique<spv::Instruction>(builder_->getUniqueId(), // TODO(Triang3l): Bounds checking.
type_uint_, spv::OpPhi); id_vector_temp_.clear();
loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index); id_vector_temp_.push_back(builder_->makeIntConstant(
loaded_vertex_index_phi_op->addIdOperand( kSystemConstantVertexIndexLoadAddress));
block_load_vertex_index_end.getId()); loaded_vertex_index =
loaded_vertex_index_phi_op->addIdOperand(vertex_index); LoadUint32FromSharedMemory(builder_->createUnaryOp(
loaded_vertex_index_phi_op->addIdOperand( spv::OpBitcast, type_int_,
block_load_vertex_index_pre.getId()); builder_->createBinOp(
vertex_index = loaded_vertex_index_phi_op->getResultId(); spv::OpIAdd, type_uint_,
builder_->getBuildPoint()->addInstruction( builder_->createBinOp(
std::move(loaded_vertex_index_phi_op)); spv::OpShiftRightLogical, type_uint_,
builder_->createLoad(
builder_->createAccessChain(
spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision),
builder_->makeUintConstant(2)),
vertex_index)));
} }
load_vertex_index_if.makeEndIf();
// Select between the loaded index and the original index from Vulkan.
vertex_index = load_vertex_index_if.createMergePhi(
loaded_vertex_index, vertex_index);
} }
// Endian-swap the index. // Endian-swap the index.
id_vector_temp_.clear(); id_vector_temp_.clear();
@ -1864,6 +1924,13 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
} }
void SpirvShaderTranslator::StartFragmentShaderInMain() { void SpirvShaderTranslator::StartFragmentShaderInMain() {
// TODO(Triang3l): Allow memory export with resolution scaling only for the
// center host pixel, with sample shading (for depth format conversion) only
// for the bottom-right sample (unlike in Direct3D, the sample mask input
// doesn't include covered samples of the primitive that correspond to other
// invocations, so use the sample that's the most friendly to the half-pixel
// offset).
// Set up pixel killing from within the translated shader without affecting // Set up pixel killing from within the translated shader without affecting
// the control flow (unlike with OpKill), similarly to how pixel killing works // the control flow (unlike with OpKill), similarly to how pixel killing works
// on the Xenos, and also keeping a single critical section exit and return // on the Xenos, and also keeping a single critical section exit and return
@ -2497,6 +2564,26 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
var_main_fsi_color_written_); var_main_fsi_color_written_);
} }
} break; } break;
case InstructionStorageTarget::kExportAddress: {
// spv::NoResult if memory export usage is unsupported or invalid.
target_pointer = var_main_memexport_address_;
} break;
case InstructionStorageTarget::kExportData: {
// spv::NoResult if memory export usage is unsupported or invalid.
target_pointer = var_main_memexport_data_[result.storage_index];
if (target_pointer != spv::NoResult) {
// Mark that the eM# has been written to and needs to be exported.
assert_true(var_main_memexport_data_written_ != spv::NoResult);
builder_->createStore(
builder_->createBinOp(
spv::OpBitwiseOr, type_uint_,
builder_->createLoad(var_main_memexport_data_written_,
spv::NoPrecision),
builder_->makeUintConstant(uint32_t(1)
<< result.storage_index)),
var_main_memexport_data_written_);
}
} break;
default: default:
// TODO(Triang3l): All storage targets. // TODO(Triang3l): All storage targets.
break; break;
@ -2808,40 +2895,25 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
static_cast<unsigned int>(xenos::Endian::k8in32))); static_cast<unsigned int>(xenos::Endian::k8in32)));
spv::Id is_8in16_or_8in32 = spv::Id is_8in16_or_8in32 =
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32);
spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder if_8in16(is_8in16_or_8in32,
assert_false(block_pre_8in16.isTerminated()); spv::SelectionControlMaskNone, *builder_);
spv::Block& block_8in16 = builder_->makeNewBlock(); spv::Id swapped_8in16;
spv::Block& block_8in16_merge = builder_->makeNewBlock();
builder_->createSelectionMerge(&block_8in16_merge,
spv::SelectionControlMaskNone);
builder_->createConditionalBranch(is_8in16_or_8in32, &block_8in16,
&block_8in16_merge);
builder_->setBuildPoint(&block_8in16);
spv::Id swapped_8in16 = builder_->createBinOp(
spv::OpBitwiseOr, type,
builder_->createBinOp(
spv::OpBitwiseAnd, type,
builder_->createBinOp(spv::OpShiftRightLogical, type, value,
const_uint_8_typed),
const_uint_00ff00ff_typed),
builder_->createBinOp(
spv::OpShiftLeftLogical, type,
builder_->createBinOp(spv::OpBitwiseAnd, type, value,
const_uint_00ff00ff_typed),
const_uint_8_typed));
builder_->createBranch(&block_8in16_merge);
builder_->setBuildPoint(&block_8in16_merge);
{ {
std::unique_ptr<spv::Instruction> phi_op = swapped_8in16 = builder_->createBinOp(
std::make_unique<spv::Instruction>(builder_->getUniqueId(), type, spv::OpBitwiseOr, type,
spv::OpPhi); builder_->createBinOp(
phi_op->addIdOperand(swapped_8in16); spv::OpBitwiseAnd, type,
phi_op->addIdOperand(block_8in16.getId()); builder_->createBinOp(spv::OpShiftRightLogical, type, value,
phi_op->addIdOperand(value); const_uint_8_typed),
phi_op->addIdOperand(block_pre_8in16.getId()); const_uint_00ff00ff_typed),
value = phi_op->getResultId(); builder_->createBinOp(
builder_->getBuildPoint()->addInstruction(std::move(phi_op)); spv::OpShiftLeftLogical, type,
builder_->createBinOp(spv::OpBitwiseAnd, type, value,
const_uint_00ff00ff_typed),
const_uint_8_typed));
} }
if_8in16.makeEndIf();
value = if_8in16.createMergePhi(swapped_8in16, value);
// 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap). // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap).
spv::Id is_16in32 = builder_->createBinOp( spv::Id is_16in32 = builder_->createBinOp(
@ -2850,46 +2922,75 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
static_cast<unsigned int>(xenos::Endian::k16in32))); static_cast<unsigned int>(xenos::Endian::k16in32)));
spv::Id is_8in32_or_16in32 = spv::Id is_8in32_or_16in32 =
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32);
spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder if_16in32(is_8in32_or_16in32,
spv::Block& block_16in32 = builder_->makeNewBlock(); spv::SelectionControlMaskNone, *builder_);
spv::Block& block_16in32_merge = builder_->makeNewBlock(); spv::Id swapped_16in32;
builder_->createSelectionMerge(&block_16in32_merge,
spv::SelectionControlMaskNone);
builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32,
&block_16in32_merge);
builder_->setBuildPoint(&block_16in32);
spv::Id swapped_16in32 = builder_->createQuadOp(
spv::OpBitFieldInsert, type,
builder_->createBinOp(spv::OpShiftRightLogical, type, value,
const_uint_16_typed),
value, builder_->makeIntConstant(16), builder_->makeIntConstant(16));
builder_->createBranch(&block_16in32_merge);
builder_->setBuildPoint(&block_16in32_merge);
{ {
std::unique_ptr<spv::Instruction> phi_op = swapped_16in32 = builder_->createQuadOp(
std::make_unique<spv::Instruction>(builder_->getUniqueId(), type, spv::OpBitFieldInsert, type,
spv::OpPhi); builder_->createBinOp(spv::OpShiftRightLogical, type, value,
phi_op->addIdOperand(swapped_16in32); const_uint_16_typed),
phi_op->addIdOperand(block_16in32.getId()); value, builder_->makeIntConstant(16), builder_->makeIntConstant(16));
phi_op->addIdOperand(value);
phi_op->addIdOperand(block_pre_16in32.getId());
value = phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(phi_op));
} }
if_16in32.makeEndIf();
value = if_16in32.createMergePhi(swapped_16in32, value);
return value; return value;
} }
spv::Id SpirvShaderTranslator::EndianSwap128Uint4(spv::Id value,
spv::Id endian) {
// Change 8-in-64 and 8-in-128 to 8-in-32, and then swap within 32 bits.
spv::Id is_8in64 = builder_->createBinOp(
spv::OpIEqual, type_bool_, endian,
builder_->makeUintConstant(
static_cast<unsigned int>(xenos::Endian128::k8in64)));
uint_vector_temp_.clear();
uint_vector_temp_.push_back(1);
uint_vector_temp_.push_back(0);
uint_vector_temp_.push_back(3);
uint_vector_temp_.push_back(2);
value = builder_->createTriOp(
spv::OpSelect, type_uint4_, is_8in64,
builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
uint_vector_temp_),
value);
spv::Id is_8in128 = builder_->createBinOp(
spv::OpIEqual, type_bool_, endian,
builder_->makeUintConstant(
static_cast<unsigned int>(xenos::Endian128::k8in128)));
uint_vector_temp_.clear();
uint_vector_temp_.push_back(3);
uint_vector_temp_.push_back(2);
uint_vector_temp_.push_back(1);
uint_vector_temp_.push_back(0);
value = builder_->createTriOp(
spv::OpSelect, type_uint4_, is_8in128,
builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
uint_vector_temp_),
value);
endian = builder_->createTriOp(
spv::OpSelect, type_uint_,
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in64, is_8in128),
builder_->makeUintConstant(
static_cast<unsigned int>(xenos::Endian128::k8in32)),
endian);
return EndianSwap32Uint(value, endian);
}
spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
spv::Id address_dwords_int) { spv::Id address_dwords_int) {
spv::Block& head_block = *builder_->getBuildPoint();
assert_false(head_block.isTerminated());
spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
? spv::StorageClassStorageBuffer ? spv::StorageClassStorageBuffer
: spv::StorageClassUniform; : spv::StorageClassUniform;
uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2();
if (!buffer_count_log2) { uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
if (!binding_count_log2) {
// Single binding - load directly. // Single binding - load directly.
id_vector_temp_.clear(); id_vector_temp_.clear();
// The only SSBO struct member. // The only SSBO struct member.
@ -2903,8 +3004,10 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
// The memory is split into multiple bindings - check which binding to load // The memory is split into multiple bindings - check which binding to load
// from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
// array with the variable itself because it needs VK_EXT_descriptor_indexing. // array with the variable itself because it needs non-uniform storage buffer
uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; // indexing.
uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
spv::Id binding_index = builder_->createBinOp( spv::Id binding_index = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_, spv::OpShiftRightLogical, type_uint_,
builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
@ -2913,51 +3016,119 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
spv::OpBitwiseAnd, type_int_, address_dwords_int, spv::OpBitwiseAnd, type_int_, address_dwords_int,
builder_->makeIntConstant( builder_->makeIntConstant(
int((uint32_t(1) << binding_address_bits) - 1))); int((uint32_t(1) << binding_address_bits) - 1)));
uint32_t buffer_count = 1 << buffer_count_log2;
spv::Block* switch_case_blocks[512 / 128]; auto value_phi_op = std::make_unique<spv::Instruction>(
for (uint32_t i = 0; i < buffer_count; ++i) { builder_->getUniqueId(), type_uint_, spv::OpPhi);
switch_case_blocks[i] = &builder_->makeNewBlock(); // Zero if out of bounds.
} value_phi_op->addIdOperand(const_uint_0_);
spv::Block& switch_merge_block = builder_->makeNewBlock(); value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
spv::Id value_phi_result = builder_->getUniqueId();
std::unique_ptr<spv::Instruction> value_phi_op = SpirvBuilder::SwitchBuilder binding_switch(
std::make_unique<spv::Instruction>(value_phi_result, type_uint_, binding_index, spv::SelectionControlDontFlattenMask, *builder_);
spv::OpPhi); uint32_t binding_count = uint32_t(1) << binding_count_log2;
builder_->createSelectionMerge(&switch_merge_block,
spv::SelectionControlDontFlattenMask); id_vector_temp_.clear();
{ id_vector_temp_.push_back(spv::NoResult);
std::unique_ptr<spv::Instruction> switch_op = // The only SSBO struct member.
std::make_unique<spv::Instruction>(spv::OpSwitch); id_vector_temp_.push_back(const_int_0_);
switch_op->addIdOperand(binding_index); id_vector_temp_.push_back(binding_address);
// Highest binding index is the default case.
switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId()); for (uint32_t i = 0; i < binding_count; ++i) {
switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block); binding_switch.makeBeginCase(i);
for (uint32_t i = 0; i < buffer_count - 1; ++i) { id_vector_temp_[0] = builder_->makeIntConstant(int(i));
switch_op->addImmediateOperand(int(i));
switch_op->addIdOperand(switch_case_blocks[i]->getId());
switch_case_blocks[i]->addPredecessor(&head_block);
}
builder_->getBuildPoint()->addInstruction(std::move(switch_op));
}
for (uint32_t i = 0; i < buffer_count; ++i) {
builder_->setBuildPoint(switch_case_blocks[i]);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
// The only SSBO struct member.
id_vector_temp_.push_back(const_int_0_);
id_vector_temp_.push_back(binding_address);
value_phi_op->addIdOperand(builder_->createLoad( value_phi_op->addIdOperand(builder_->createLoad(
builder_->createAccessChain(storage_class, buffers_shared_memory_, builder_->createAccessChain(storage_class, buffers_shared_memory_,
id_vector_temp_), id_vector_temp_),
spv::NoPrecision)); spv::NoPrecision));
value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
builder_->createBranch(&switch_merge_block);
} }
builder_->setBuildPoint(&switch_merge_block);
binding_switch.makeEndSwitch();
spv::Id value_phi_result = value_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(value_phi_op)); builder_->getBuildPoint()->addInstruction(std::move(value_phi_op));
return value_phi_result; return value_phi_result;
} }
void SpirvShaderTranslator::StoreUint32ToSharedMemory(
spv::Id value, spv::Id address_dwords_int, spv::Id replace_mask) {
spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
? spv::StorageClassStorageBuffer
: spv::StorageClassUniform;
spv::Id keep_mask = spv::NoResult;
if (replace_mask != spv::NoResult) {
keep_mask = builder_->createUnaryOp(spv::OpNot, type_uint_, replace_mask);
value = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, value,
replace_mask);
}
auto store = [&](spv::Id pointer) {
if (replace_mask != spv::NoResult) {
// Don't touch the other bits in the buffer, just modify the needed bits
// in the most up to date uint32 at the address.
spv::Id const_scope_device = builder_->makeUintConstant(
static_cast<unsigned int>(spv::ScopeDevice));
spv::Id const_semantics_relaxed = const_uint_0_;
builder_->createQuadOp(spv::OpAtomicAnd, type_uint_, pointer,
const_scope_device, const_semantics_relaxed,
keep_mask);
builder_->createQuadOp(spv::OpAtomicOr, type_uint_, pointer,
const_scope_device, const_semantics_relaxed,
value);
} else {
builder_->createStore(value, pointer);
}
};
uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
if (!binding_count_log2) {
// Single binding - store directly.
id_vector_temp_.clear();
// The only SSBO struct member.
id_vector_temp_.push_back(const_int_0_);
id_vector_temp_.push_back(address_dwords_int);
store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
id_vector_temp_));
return;
}
// The memory is split into multiple bindings - check which binding to store
// to. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
// array with the variable itself because it needs non-uniform storage buffer
// indexing.
uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
spv::Id binding_index = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_,
builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
builder_->makeUintConstant(binding_address_bits));
spv::Id binding_address = builder_->createBinOp(
spv::OpBitwiseAnd, type_int_, address_dwords_int,
builder_->makeIntConstant(
int((uint32_t(1) << binding_address_bits) - 1)));
SpirvBuilder::SwitchBuilder binding_switch(
binding_index, spv::SelectionControlDontFlattenMask, *builder_);
uint32_t binding_count = uint32_t(1) << binding_count_log2;
id_vector_temp_.clear();
id_vector_temp_.push_back(spv::NoResult);
// The only SSBO struct member.
id_vector_temp_.push_back(const_int_0_);
id_vector_temp_.push_back(binding_address);
for (uint32_t i = 0; i < binding_count; ++i) {
binding_switch.makeBeginCase(i);
id_vector_temp_[0] = builder_->makeIntConstant(int(i));
store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
id_vector_temp_));
}
binding_switch.makeEndSwitch();
}
spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma, spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
bool gamma_pre_saturated) { bool gamma_pre_saturated) {
spv::Id value_type = builder_->getTypeId(gamma); spv::Id value_type = builder_->getTypeId(gamma);

View File

@ -323,17 +323,28 @@ class SpirvShaderTranslator : public ShaderTranslator {
explicit Features( explicit Features(
const ui::vulkan::VulkanProvider::DeviceInfo& device_info); const ui::vulkan::VulkanProvider::DeviceInfo& device_info);
explicit Features(bool all = false); explicit Features(bool all = false);
unsigned int spirv_version; unsigned int spirv_version;
uint32_t max_storage_buffer_range; uint32_t max_storage_buffer_range;
bool full_draw_index_uint32;
bool vertex_pipeline_stores_and_atomics;
bool fragment_stores_and_atomics;
bool clip_distance; bool clip_distance;
bool cull_distance; bool cull_distance;
bool demote_to_helper_invocation;
bool fragment_shader_sample_interlock;
bool full_draw_index_uint32;
bool image_view_format_swizzle; bool image_view_format_swizzle;
bool signed_zero_inf_nan_preserve_float32; bool signed_zero_inf_nan_preserve_float32;
bool denorm_flush_to_zero_float32; bool denorm_flush_to_zero_float32;
bool rounding_mode_rte_float32; bool rounding_mode_rte_float32;
bool fragment_shader_sample_interlock;
bool demote_to_helper_invocation;
}; };
SpirvShaderTranslator(const Features& features, SpirvShaderTranslator(const Features& features,
@ -424,6 +435,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
void ProcessLoopEndInstruction( void ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) override; const ParsedLoopEndInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessAllocInstruction(const ParsedAllocInstruction& instr,
uint8_t export_eM) override;
void ProcessVertexFetchInstruction( void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override; const ParsedVertexFetchInstruction& instr) override;
@ -470,6 +483,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
Shader::IsHostVertexShaderTypeDomain( Shader::IsHostVertexShaderTypeDomain(
GetSpirvShaderModification().vertex.host_vertex_shader_type); GetSpirvShaderModification().vertex.host_vertex_shader_type);
} }
bool IsSpirvComputeShader() const {
return is_vertex_shader() &&
GetSpirvShaderModification().vertex.host_vertex_shader_type ==
Shader::HostVertexShaderType::kMemExportCompute;
}
bool IsExecutionModeEarlyFragmentTests() const { bool IsExecutionModeEarlyFragmentTests() const {
return is_pixel_shader() && return is_pixel_shader() &&
@ -567,24 +585,48 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs, spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
spv::Id operand_1_abs); spv::Id operand_1_abs);
// Conditionally discard the current fragment. Changes the build point. // Conditionally discard the current fragment. Changes the build point.
void KillPixel(spv::Id condition); void KillPixel(spv::Id condition,
uint8_t memexport_eM_potentially_written_before);
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component // Return type is a xe::bit_count(result.GetUsedResultComponents())-component
// float vector or a single float, depending on whether it's a reduction // float vector or a single float, depending on whether it's a reduction
// instruction (check getTypeId of the result), or returns spv::NoResult if // instruction (check getTypeId of the result), or returns spv::NoResult if
// nothing to store. // nothing to store.
spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, spv::Id ProcessVectorAluOperation(
bool& predicate_written); const ParsedAluInstruction& instr,
uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
// Returns a float value to write to the previous scalar register and to the // Returns a float value to write to the previous scalar register and to the
// destination. If the return value is ps itself (in the retain_prev case), // destination. If the return value is ps itself (in the retain_prev case),
// returns spv::NoResult (handled as a special case, so if it's retain_prev, // returns spv::NoResult (handled as a special case, so if it's retain_prev,
// but don't need to write to anywhere, no OpLoad(ps) will be done). // but don't need to write to anywhere, no OpLoad(ps) will be done).
spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, spv::Id ProcessScalarAluOperation(
bool& predicate_written); const ParsedAluInstruction& instr,
uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
// Perform endian swap of a uint scalar or vector. // Perform endian swap of a uint scalar or vector.
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian); spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
// Perform endian swap of a uint4 vector.
spv::Id EndianSwap128Uint4(spv::Id value, spv::Id endian);
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
// If `replace_mask` is provided, the bits specified in the mask will be
// replaced with those from the value via OpAtomicAnd/Or.
// Bits of `value` not in `replace_mask` will be ignored.
void StoreUint32ToSharedMemory(spv::Id value, spv::Id address_dwords_int,
spv::Id replace_mask = spv::NoResult);
bool IsMemoryExportSupported() const {
if (is_pixel_shader()) {
return features_.fragment_stores_and_atomics;
}
return features_.vertex_pipeline_stores_and_atomics ||
IsSpirvComputeShader();
}
bool IsMemoryExportUsed() const {
return current_shader().memexport_eM_written() && IsMemoryExportSupported();
}
void ExportToMemory(uint8_t export_eM);
// The source may be a floating-point scalar or a vector. // The source may be a floating-point scalar or a vector.
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated); spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
@ -605,7 +647,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
void SampleTexture(spv::Builder::TextureParameters& texture_parameters, void SampleTexture(spv::Builder::TextureParameters& texture_parameters,
spv::ImageOperandsMask image_operands_mask, spv::ImageOperandsMask image_operands_mask,
spv::Id image_unsigned, spv::Id image_signed, spv::Id image_unsigned, spv::Id image_signed,
spv::Id sampler, spv::Id is_all_signed, spv::Id sampler, spv::Id is_any_unsigned,
spv::Id is_any_signed, spv::Id& result_unsigned_out, spv::Id is_any_signed, spv::Id& result_unsigned_out,
spv::Id& result_signed_out, spv::Id& result_signed_out,
spv::Id lerp_factor = spv::NoResult, spv::Id lerp_factor = spv::NoResult,
@ -872,6 +914,21 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id var_main_tfetch_gradients_v_; spv::Id var_main_tfetch_gradients_v_;
// float4[register_count()]. // float4[register_count()].
spv::Id var_main_registers_; spv::Id var_main_registers_;
// Memory export variables are created only when needed.
// float4.
spv::Id var_main_memexport_address_;
// Each is float4.
spv::Id var_main_memexport_data_[ucode::kMaxMemExportElementCount];
// Bit field of which eM# elements have been written so far by the invocation
// since the last memory write - uint.
spv::Id var_main_memexport_data_written_;
// If memory export is disabled in certain invocations or (if emulating some
// primitive types without a geometry shader) at specific guest vertex loop
// iterations because the translated shader is executed multiple times for the
// same guest vertex or pixel, this contains whether memory export is allowed
// in the current execution of the translated code.
// bool.
spv::Id main_memexport_allowed_;
// VS only - float3 (special exports). // VS only - float3 (special exports).
spv::Id var_main_point_size_edge_flag_kill_vertex_; spv::Id var_main_point_size_edge_flag_kill_vertex_;
// PS, only when needed - bool. // PS, only when needed - bool.

View File

@ -39,31 +39,23 @@ spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value,
const_float_vectors_0_[num_components - 1], value); const_float_vectors_0_[num_components - 1], value);
} }
void SpirvShaderTranslator::KillPixel(spv::Id condition) { void SpirvShaderTranslator::KillPixel(
// Same calls as in spv::Builder::If. spv::Id condition, uint8_t memexport_eM_potentially_written_before) {
spv::Function& function = builder_->getBuildPoint()->getParent(); SpirvBuilder::IfBuilder kill_if(condition, spv::SelectionControlMaskNone,
spv::Block* kill_block = new spv::Block(builder_->getUniqueId(), function); *builder_);
spv::Block* merge_block = new spv::Block(builder_->getUniqueId(), function); {
spv::Block& header_block = *builder_->getBuildPoint(); // Perform outstanding memory exports before the invocation becomes inactive
// and storage writes are disabled.
function.addBlock(kill_block); ExportToMemory(memexport_eM_potentially_written_before);
builder_->setBuildPoint(kill_block); if (var_main_kill_pixel_ != spv::NoResult) {
// Kill without influencing the control flow in the translated shader. builder_->createStore(builder_->makeBoolConstant(true),
if (var_main_kill_pixel_ != spv::NoResult) { var_main_kill_pixel_);
builder_->createStore(builder_->makeBoolConstant(true), }
var_main_kill_pixel_); if (features_.demote_to_helper_invocation) {
builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
}
} }
if (features_.demote_to_helper_invocation) { kill_if.makeEndIf();
builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
}
builder_->createBranch(merge_block);
builder_->setBuildPoint(&header_block);
builder_->createSelectionMerge(merge_block, spv::SelectionControlMaskNone);
builder_->createConditionalBranch(condition, kill_block, merge_block);
function.addBlock(merge_block);
builder_->setBuildPoint(merge_block);
} }
void SpirvShaderTranslator::ProcessAluInstruction( void SpirvShaderTranslator::ProcessAluInstruction(
@ -89,12 +81,12 @@ void SpirvShaderTranslator::ProcessAluInstruction(
// Whether the instruction has changed the predicate, and it needs to be // Whether the instruction has changed the predicate, and it needs to be
// checked again later. // checked again later.
bool predicate_written_vector = false; bool predicate_written_vector = false;
spv::Id vector_result = spv::Id vector_result = ProcessVectorAluOperation(
ProcessVectorAluOperation(instr, predicate_written_vector); instr, memexport_eM_potentially_written_before, predicate_written_vector);
bool predicate_written_scalar = false; bool predicate_written_scalar = false;
spv::Id scalar_result = spv::Id scalar_result = ProcessScalarAluOperation(
ProcessScalarAluOperation(instr, predicate_written_scalar); instr, memexport_eM_potentially_written_before, predicate_written_scalar);
if (scalar_result != spv::NoResult) { if (scalar_result != spv::NoResult) {
EnsureBuildPointAvailable(); EnsureBuildPointAvailable();
builder_->createStore(scalar_result, var_main_previous_scalar_); builder_->createStore(scalar_result, var_main_previous_scalar_);
@ -118,7 +110,8 @@ void SpirvShaderTranslator::ProcessAluInstruction(
} }
spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
const ParsedAluInstruction& instr, bool& predicate_written) { const ParsedAluInstruction& instr,
uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
predicate_written = false; predicate_written = false;
uint32_t used_result_components = uint32_t used_result_components =
@ -564,7 +557,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
spv::Id ma_z_result[4] = {}, ma_yx_result[4] = {}; spv::Id ma_z_result[4] = {}, ma_yx_result[4] = {};
// Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)). // Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)).
spv::Builder::If ma_z_if( SpirvBuilder::IfBuilder ma_z_if(
builder_->createBinOp( builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, spv::OpLogicalAnd, type_bool_,
builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_,
@ -596,14 +589,13 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
} }
} }
} }
spv::Block& ma_z_end_block = *builder_->getBuildPoint();
ma_z_if.makeBeginElse(); ma_z_if.makeBeginElse();
{ {
spv::Id ma_y_result[4] = {}, ma_x_result[4] = {}; spv::Id ma_y_result[4] = {}, ma_x_result[4] = {};
// The major axis is not Z - create an inner conditional to check if the // The major axis is not Z - create an inner conditional to check if the
// major axis is Y (abs(y) >= abs(x)). // major axis is Y (abs(y) >= abs(x)).
spv::Builder::If ma_y_if( SpirvBuilder::IfBuilder ma_y_if(
builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_,
operand_abs[1], operand_abs[0]), operand_abs[1], operand_abs[0]),
spv::SelectionControlMaskNone, *builder_); spv::SelectionControlMaskNone, *builder_);
@ -629,7 +621,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
} }
} }
} }
spv::Block& ma_y_end_block = *builder_->getBuildPoint();
ma_y_if.makeBeginElse(); ma_y_if.makeBeginElse();
{ {
// The major axis is X. // The major axis is X.
@ -654,7 +645,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
} }
} }
} }
spv::Block& ma_x_end_block = *builder_->getBuildPoint();
ma_y_if.makeEndIf(); ma_y_if.makeEndIf();
// The major axis is Y or X - choose the options of the result from Y // The major axis is Y or X - choose the options of the result from Y
@ -663,18 +653,10 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
if (!(used_result_components & (1 << i))) { if (!(used_result_components & (1 << i))) {
continue; continue;
} }
std::unique_ptr<spv::Instruction> phi_op = ma_yx_result[i] =
std::make_unique<spv::Instruction>(builder_->getUniqueId(), ma_y_if.createMergePhi(ma_y_result[i], ma_x_result[i]);
type_float_, spv::OpPhi);
phi_op->addIdOperand(ma_y_result[i]);
phi_op->addIdOperand(ma_y_end_block.getId());
phi_op->addIdOperand(ma_x_result[i]);
phi_op->addIdOperand(ma_x_end_block.getId());
ma_yx_result[i] = phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(phi_op));
} }
} }
spv::Block& ma_yx_end_block = *builder_->getBuildPoint();
ma_z_if.makeEndIf(); ma_z_if.makeEndIf();
// Choose the result options from Z and YX cases. // Choose the result options from Z and YX cases.
@ -683,15 +665,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
if (!(used_result_components & (1 << i))) { if (!(used_result_components & (1 << i))) {
continue; continue;
} }
std::unique_ptr<spv::Instruction> phi_op = id_vector_temp_.push_back(
std::make_unique<spv::Instruction>(builder_->getUniqueId(), ma_z_if.createMergePhi(ma_z_result[i], ma_yx_result[i]));
type_float_, spv::OpPhi);
phi_op->addIdOperand(ma_z_result[i]);
phi_op->addIdOperand(ma_z_end_block.getId());
phi_op->addIdOperand(ma_yx_result[i]);
phi_op->addIdOperand(ma_yx_end_block.getId());
id_vector_temp_.push_back(phi_op->getResultId());
builder_->getBuildPoint()->addInstruction(std::move(phi_op));
} }
assert_true(id_vector_temp_.size() == used_result_component_count); assert_true(id_vector_temp_.size() == used_result_component_count);
if (used_result_components & 0b0100) { if (used_result_components & 0b0100) {
@ -799,14 +774,16 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
case ucode::AluVectorOpcode::kKillGt: case ucode::AluVectorOpcode::kKillGt:
case ucode::AluVectorOpcode::kKillGe: case ucode::AluVectorOpcode::kKillGe:
case ucode::AluVectorOpcode::kKillNe: { case ucode::AluVectorOpcode::kKillNe: {
KillPixel(builder_->createUnaryOp( KillPixel(
spv::OpAny, type_bool_, builder_->createUnaryOp(
builder_->createBinOp( spv::OpAny, type_bool_,
spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_, builder_->createBinOp(
GetOperandComponents(operand_storage[0], instr.vector_operands[0], spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_,
0b1111), GetOperandComponents(operand_storage[0],
GetOperandComponents(operand_storage[1], instr.vector_operands[1], instr.vector_operands[0], 0b1111),
0b1111)))); GetOperandComponents(operand_storage[1],
instr.vector_operands[1], 0b1111))),
memexport_eM_potentially_written_before);
return const_float_0_; return const_float_0_;
} }
@ -892,7 +869,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
} }
spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
const ParsedAluInstruction& instr, bool& predicate_written) { const ParsedAluInstruction& instr,
uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
predicate_written = false; predicate_written = false;
spv::Id operand_storage[2] = {}; spv::Id operand_storage[2] = {};
@ -1044,10 +1022,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
spv::OpLogicalAnd, type_bool_, condition, spv::OpLogicalAnd, type_bool_, condition,
builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b, builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b,
const_float_0_)); const_float_0_));
spv::Block& pre_multiply_if_block = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder multiply_if(
condition, spv::SelectionControlMaskNone, *builder_);
spv::Id product; spv::Id product;
spv::Builder::If multiply_if(condition, spv::SelectionControlMaskNone,
*builder_);
{ {
// Multiplication case. // Multiplication case.
spv::Id a = instr.scalar_operands[0].GetComponent(0) != spv::Id a = instr.scalar_operands[0].GetComponent(0) !=
@ -1061,21 +1038,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
product = ZeroIfAnyOperandIsZero( product = ZeroIfAnyOperandIsZero(
product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs); product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs);
} }
spv::Block& multiply_end_block = *builder_->getBuildPoint();
multiply_if.makeEndIf(); multiply_if.makeEndIf();
// Merge - choose between the product and -FLT_MAX. // Merge - choose between the product and -FLT_MAX.
{ return multiply_if.createMergePhi(product, const_float_max_neg);
std::unique_ptr<spv::Instruction> phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_float_, spv::OpPhi);
phi_op->addIdOperand(product);
phi_op->addIdOperand(multiply_end_block.getId());
phi_op->addIdOperand(const_float_max_neg);
phi_op->addIdOperand(pre_multiply_if_block.getId());
spv::Id phi_result = phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(phi_op));
return phi_result;
}
} }
case ucode::AluScalarOpcode::kMaxs: case ucode::AluScalarOpcode::kMaxs:
@ -1300,12 +1265,13 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
case ucode::AluScalarOpcode::kKillsNe: case ucode::AluScalarOpcode::kKillsNe:
case ucode::AluScalarOpcode::kKillsOne: { case ucode::AluScalarOpcode::kKillsOne: {
KillPixel(builder_->createBinOp( KillPixel(builder_->createBinOp(
spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_,
GetOperandComponents(operand_storage[0], instr.scalar_operands[0], GetOperandComponents(operand_storage[0],
0b0001), instr.scalar_operands[0], 0b0001),
instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne
? const_float_1_ ? const_float_1_
: const_float_0_)); : const_float_0_),
memexport_eM_potentially_written_before);
return const_float_0_; return const_float_0_;
} }

View File

@ -1145,31 +1145,18 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
z_coordinate_ref = builder_->createNoContractionBinOp( z_coordinate_ref = builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_, z_coordinate_ref, z_offset); spv::OpFAdd, type_float_, z_coordinate_ref, z_offset);
} }
spv::Block& block_dimension_head = *builder_->getBuildPoint();
spv::Block& block_dimension_merge = builder_->makeNewBlock();
spv::Block& block_dimension_3d = builder_->makeNewBlock();
builder_->createSelectionMerge(&block_dimension_merge,
spv::SelectionControlDontFlattenMask);
assert_true(data_is_3d != spv::NoResult); assert_true(data_is_3d != spv::NoResult);
builder_->createConditionalBranch(data_is_3d, &block_dimension_3d, SpirvBuilder::IfBuilder if_data_is_3d(
&block_dimension_merge); data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
builder_->setBuildPoint(&block_dimension_3d); spv::Id z_3d;
assert_true(z_size != spv::NoResult);
spv::Id z_3d = builder_->createNoContractionBinOp(
spv::OpFDiv, type_float_, z_coordinate_ref, z_size);
builder_->createBranch(&block_dimension_merge);
builder_->setBuildPoint(&block_dimension_merge);
{ {
std::unique_ptr<spv::Instruction> z_phi_op = assert_true(z_size != spv::NoResult);
std::make_unique<spv::Instruction>(builder_->getUniqueId(), z_3d = builder_->createNoContractionBinOp(spv::OpFDiv, type_float_,
type_float_, spv::OpPhi); z_coordinate_ref, z_size);
z_phi_op->addIdOperand(z_3d);
z_phi_op->addIdOperand(block_dimension_3d.getId());
z_phi_op->addIdOperand(z_coordinate_ref);
z_phi_op->addIdOperand(block_dimension_head.getId());
z_coordinate_ref = z_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(z_phi_op));
} }
if_data_is_3d.makeEndIf();
z_coordinate_ref =
if_data_is_3d.createMergePhi(z_3d, z_coordinate_ref);
} else { } else {
// Denormalize the Z coordinate for a stacked texture, and apply the // Denormalize the Z coordinate for a stacked texture, and apply the
// offset. // offset.
@ -1394,63 +1381,39 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
// OpSampledImage must be in the same block as where its result is used. // OpSampledImage must be in the same block as where its result is used.
if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) {
// Check if the texture is 3D or stacked. // Check if the texture is 3D or stacked.
spv::Block& block_dimension_head = *builder_->getBuildPoint();
spv::Block& block_dimension_3d_start = builder_->makeNewBlock();
spv::Block& block_dimension_stacked_start = builder_->makeNewBlock();
spv::Block& block_dimension_merge = builder_->makeNewBlock();
builder_->createSelectionMerge(&block_dimension_merge,
spv::SelectionControlDontFlattenMask);
assert_true(data_is_3d != spv::NoResult); assert_true(data_is_3d != spv::NoResult);
builder_->createConditionalBranch(data_is_3d, SpirvBuilder::IfBuilder if_data_is_3d(
&block_dimension_3d_start, data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
&block_dimension_stacked_start); spv::Id lod_3d;
// 3D.
builder_->setBuildPoint(&block_dimension_3d_start);
id_vector_temp_.clear();
for (uint32_t i = 0; i < 3; ++i) {
id_vector_temp_.push_back(coordinates[i]);
}
texture_parameters.coords =
builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
spv::Id lod_3d = QueryTextureLod(texture_parameters,
image_3d_unsigned, image_3d_signed,
sampler, swizzled_signs_all_signed);
// Get the actual build point for phi.
spv::Block& block_dimension_3d_end = *builder_->getBuildPoint();
builder_->createBranch(&block_dimension_merge);
// 2D stacked.
builder_->setBuildPoint(&block_dimension_stacked_start);
id_vector_temp_.clear();
for (uint32_t i = 0; i < 2; ++i) {
id_vector_temp_.push_back(coordinates[i]);
}
texture_parameters.coords =
builder_->createCompositeConstruct(type_float2_, id_vector_temp_);
spv::Id lod_stacked = QueryTextureLod(
texture_parameters, image_2d_array_or_cube_unsigned,
image_2d_array_or_cube_signed, sampler,
swizzled_signs_all_signed);
// Get the actual build point for phi.
spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint();
builder_->createBranch(&block_dimension_merge);
// Choose between the 3D and the stacked result based on the actual
// data dimensionality.
builder_->setBuildPoint(&block_dimension_merge);
{ {
std::unique_ptr<spv::Instruction> dimension_phi_op = // 3D.
std::make_unique<spv::Instruction>(builder_->getUniqueId(), id_vector_temp_.clear();
type_float_, spv::OpPhi); for (uint32_t i = 0; i < 3; ++i) {
dimension_phi_op->addIdOperand(lod_3d); id_vector_temp_.push_back(coordinates[i]);
dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); }
dimension_phi_op->addIdOperand(lod_stacked); texture_parameters.coords = builder_->createCompositeConstruct(
dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); type_float3_, id_vector_temp_);
result[0] = dimension_phi_op->getResultId(); lod_3d = QueryTextureLod(texture_parameters, image_3d_unsigned,
builder_->getBuildPoint()->addInstruction( image_3d_signed, sampler,
std::move(dimension_phi_op)); swizzled_signs_all_signed);
} }
if_data_is_3d.makeBeginElse();
spv::Id lod_stacked;
{
// 2D stacked.
id_vector_temp_.clear();
for (uint32_t i = 0; i < 2; ++i) {
id_vector_temp_.push_back(coordinates[i]);
}
texture_parameters.coords = builder_->createCompositeConstruct(
type_float2_, id_vector_temp_);
lod_stacked = QueryTextureLod(texture_parameters,
image_2d_array_or_cube_unsigned,
image_2d_array_or_cube_signed,
sampler, swizzled_signs_all_signed);
}
if_data_is_3d.makeEndIf();
result[0] = if_data_is_3d.createMergePhi(lod_3d, lod_stacked);
} else { } else {
uint32_t lod_query_coordinate_component_count = uint32_t lod_query_coordinate_component_count =
instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2; instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2;
@ -1512,6 +1475,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
} }
} }
} }
spv::Id is_any_unsigned = builder_->createUnaryOp(
spv::OpLogicalNot, type_bool_, is_all_signed);
// Load the fetch constant word 4, needed unconditionally for LOD // Load the fetch constant word 4, needed unconditionally for LOD
// biasing, for result exponent biasing, and conditionally for stacked // biasing, for result exponent biasing, and conditionally for stacked
@ -1765,273 +1730,247 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
// component, 2 gradient components, two fetches if the Z axis is // component, 2 gradient components, two fetches if the Z axis is
// linear-filtered). // linear-filtered).
spv::Block& block_dimension_head = *builder_->getBuildPoint();
spv::Block& block_dimension_3d_start = builder_->makeNewBlock();
spv::Block& block_dimension_stacked_start = builder_->makeNewBlock();
spv::Block& block_dimension_merge = builder_->makeNewBlock();
builder_->createSelectionMerge(&block_dimension_merge,
spv::SelectionControlDontFlattenMask);
assert_true(data_is_3d != spv::NoResult); assert_true(data_is_3d != spv::NoResult);
builder_->createConditionalBranch(data_is_3d, SpirvBuilder::IfBuilder if_data_is_3d(
&block_dimension_3d_start, data_is_3d, spv::SelectionControlDontFlattenMask, *builder_);
&block_dimension_stacked_start);
// 3D.
builder_->setBuildPoint(&block_dimension_3d_start);
if (use_computed_lod) {
texture_parameters.gradX = gradients_h;
texture_parameters.gradY = gradients_v;
}
id_vector_temp_.clear();
for (uint32_t i = 0; i < 3; ++i) {
id_vector_temp_.push_back(coordinates[i]);
}
texture_parameters.coords =
builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
spv::Id sample_result_unsigned_3d, sample_result_signed_3d; spv::Id sample_result_unsigned_3d, sample_result_signed_3d;
SampleTexture(texture_parameters, image_operands_mask, {
image_3d_unsigned, image_3d_signed, sampler, // 3D.
is_all_signed, is_any_signed, sample_result_unsigned_3d, if (use_computed_lod) {
sample_result_signed_3d); texture_parameters.gradX = gradients_h;
// Get the actual build point after the SampleTexture call for phi. texture_parameters.gradY = gradients_v;
spv::Block& block_dimension_3d_end = *builder_->getBuildPoint();
builder_->createBranch(&block_dimension_merge);
// 2D stacked.
builder_->setBuildPoint(&block_dimension_stacked_start);
if (use_computed_lod) {
// Extract 2D gradients for stacked textures which are 2D arrays.
uint_vector_temp_.clear();
uint_vector_temp_.push_back(0);
uint_vector_temp_.push_back(1);
texture_parameters.gradX = builder_->createRvalueSwizzle(
spv::NoPrecision, type_float2_, gradients_h, uint_vector_temp_);
texture_parameters.gradY = builder_->createRvalueSwizzle(
spv::NoPrecision, type_float2_, gradients_v, uint_vector_temp_);
}
// Check if linear filtering is needed.
bool vol_mag_filter_is_fetch_const =
instr.attributes.vol_mag_filter ==
xenos::TextureFilter::kUseFetchConst;
bool vol_min_filter_is_fetch_const =
instr.attributes.vol_min_filter ==
xenos::TextureFilter::kUseFetchConst;
bool vol_mag_filter_is_linear =
instr.attributes.vol_mag_filter == xenos::TextureFilter::kLinear;
bool vol_min_filter_is_linear =
instr.attributes.vol_min_filter == xenos::TextureFilter::kLinear;
spv::Id vol_filter_is_linear = spv::NoResult;
if (use_computed_lod &&
(vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const ||
vol_mag_filter_is_linear != vol_min_filter_is_linear)) {
// Check if minifying along layers (derivative > 1 along any axis).
spv::Id layer_max_gradient = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax,
builder_->createCompositeExtract(gradients_h, type_float_, 2),
builder_->createCompositeExtract(gradients_v, type_float_, 2));
if (!instr.attributes.unnormalized_coordinates) {
// Denormalize the gradient if provided as normalized.
assert_true(size[2] != spv::NoResult);
layer_max_gradient = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, layer_max_gradient, size[2]);
} }
// For NaN, considering that magnification is being done. id_vector_temp_.clear();
spv::Id is_minifying_z = builder_->createBinOp( for (uint32_t i = 0; i < 3; ++i) {
spv::OpFOrdLessThan, type_bool_, layer_max_gradient, id_vector_temp_.push_back(coordinates[i]);
builder_->makeFloatConstant(1.0f));
// Choose what filter is actually used, the minification or the
// magnification one.
spv::Id vol_mag_filter_is_linear_loaded =
vol_mag_filter_is_fetch_const
? builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_,
fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 0)),
const_uint_0_)
: builder_->makeBoolConstant(vol_mag_filter_is_linear);
spv::Id vol_min_filter_is_linear_loaded =
vol_min_filter_is_fetch_const
? builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_,
fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 1)),
const_uint_0_)
: builder_->makeBoolConstant(vol_min_filter_is_linear);
vol_filter_is_linear =
builder_->createTriOp(spv::OpSelect, type_bool_, is_minifying_z,
vol_min_filter_is_linear_loaded,
vol_mag_filter_is_linear_loaded);
} else {
// No gradients, or using the same filter overrides for magnifying
// and minifying. Assume always magnifying if no gradients (LOD 0,
// always <= 0). LOD is within 2D layers, not between them (unlike
// in 3D textures, which have mips with depth reduced), so it
// shouldn't have effect on filtering between layers.
if (vol_mag_filter_is_fetch_const) {
vol_filter_is_linear = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 0)),
const_uint_0_);
} }
texture_parameters.coords = builder_->createCompositeConstruct(
type_float3_, id_vector_temp_);
SampleTexture(texture_parameters, image_operands_mask,
image_3d_unsigned, image_3d_signed, sampler,
is_any_unsigned, is_any_signed,
sample_result_unsigned_3d, sample_result_signed_3d);
} }
spv::Id layer_coordinate = coordinates[2]; if_data_is_3d.makeBeginElse();
// Linear filtering may be needed either based on a dynamic condition
// (the filtering mode is taken from the fetch constant, or it's
// different for magnification and minification), or on a static one
// (with gradients - specified in the instruction for both
// magnification and minification as linear, without gradients -
// specified for magnification as linear).
// If the filter is linear, subtract 0.5 from the Z coordinate of the
// first layer in filtering because 0.5 is in the middle of it.
if (vol_filter_is_linear != spv::NoResult) {
layer_coordinate = builder_->createTriOp(
spv::OpSelect, type_float_, vol_filter_is_linear,
builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, layer_coordinate,
builder_->makeFloatConstant(0.5f)),
layer_coordinate);
} else if (vol_mag_filter_is_linear) {
layer_coordinate = builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, layer_coordinate,
builder_->makeFloatConstant(0.5f));
}
// Sample the first layer, needed regardless of whether filtering is
// needed.
// Floor the array layer (Vulkan does rounding to nearest or + 0.5 and
// floor even for the layer index, but on the Xenos, addressing is
// similar to that of 3D textures). This is needed for both point and
// linear filtering (with linear, 0.5 was subtracted previously).
spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
layer_coordinate);
id_vector_temp_.clear();
id_vector_temp_.push_back(coordinates[0]);
id_vector_temp_.push_back(coordinates[1]);
id_vector_temp_.push_back(layer_0_coordinate);
texture_parameters.coords =
builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked; spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked;
SampleTexture(texture_parameters, image_operands_mask, {
image_2d_array_or_cube_unsigned, // 2D stacked.
image_2d_array_or_cube_signed, sampler, is_all_signed, if (use_computed_lod) {
is_any_signed, sample_result_unsigned_stacked, // Extract 2D gradients for stacked textures which are 2D arrays.
sample_result_signed_stacked); uint_vector_temp_.clear();
// Sample the second layer if linear filtering is potentially needed uint_vector_temp_.push_back(0);
// (conditionally or unconditionally, depending on whether the filter uint_vector_temp_.push_back(1);
// needs to be chosen at runtime), and filter. texture_parameters.gradX =
if (vol_filter_is_linear != spv::NoResult || builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
vol_mag_filter_is_linear) { gradients_h, uint_vector_temp_);
spv::Block& block_z_head = *builder_->getBuildPoint(); texture_parameters.gradY =
spv::Block& block_z_linear = (vol_filter_is_linear != spv::NoResult) builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
? builder_->makeNewBlock() gradients_v, uint_vector_temp_);
: block_z_head;
spv::Block& block_z_merge = (vol_filter_is_linear != spv::NoResult)
? builder_->makeNewBlock()
: block_z_head;
if (vol_filter_is_linear != spv::NoResult) {
builder_->createSelectionMerge(
&block_z_merge, spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(
vol_filter_is_linear, &block_z_linear, &block_z_merge);
builder_->setBuildPoint(&block_z_linear);
} }
spv::Id layer_1_coordinate = builder_->createBinOp( // Check if linear filtering is needed.
spv::OpFAdd, type_float_, layer_0_coordinate, bool vol_mag_filter_is_fetch_const =
builder_->makeFloatConstant(1.0f)); instr.attributes.vol_mag_filter ==
xenos::TextureFilter::kUseFetchConst;
bool vol_min_filter_is_fetch_const =
instr.attributes.vol_min_filter ==
xenos::TextureFilter::kUseFetchConst;
bool vol_mag_filter_is_linear = instr.attributes.vol_mag_filter ==
xenos::TextureFilter::kLinear;
bool vol_min_filter_is_linear = instr.attributes.vol_min_filter ==
xenos::TextureFilter::kLinear;
spv::Id vol_filter_is_linear = spv::NoResult;
if (use_computed_lod &&
(vol_mag_filter_is_fetch_const ||
vol_min_filter_is_fetch_const ||
vol_mag_filter_is_linear != vol_min_filter_is_linear)) {
// Check if minifying along layers (derivative > 1 along any
// axis).
spv::Id layer_max_gradient = builder_->createBinBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax,
builder_->createCompositeExtract(gradients_h, type_float_, 2),
builder_->createCompositeExtract(gradients_v, type_float_,
2));
if (!instr.attributes.unnormalized_coordinates) {
// Denormalize the gradient if provided as normalized.
assert_true(size[2] != spv::NoResult);
layer_max_gradient = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_, layer_max_gradient, size[2]);
}
// For NaN, considering that magnification is being done.
spv::Id is_minifying_z = builder_->createBinOp(
spv::OpFOrdLessThan, type_bool_, layer_max_gradient,
builder_->makeFloatConstant(1.0f));
// Choose what filter is actually used, the minification or the
// magnification one.
spv::Id vol_mag_filter_is_linear_loaded =
vol_mag_filter_is_fetch_const
? builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_,
fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 0)),
const_uint_0_)
: builder_->makeBoolConstant(vol_mag_filter_is_linear);
spv::Id vol_min_filter_is_linear_loaded =
vol_min_filter_is_fetch_const
? builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_,
fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 1)),
const_uint_0_)
: builder_->makeBoolConstant(vol_min_filter_is_linear);
vol_filter_is_linear = builder_->createTriOp(
spv::OpSelect, type_bool_, is_minifying_z,
vol_min_filter_is_linear_loaded,
vol_mag_filter_is_linear_loaded);
} else {
// No gradients, or using the same filter overrides for magnifying
// and minifying. Assume always magnifying if no gradients (LOD 0,
// always <= 0). LOD is within 2D layers, not between them (unlike
// in 3D textures, which have mips with depth reduced), so it
// shouldn't have effect on filtering between layers.
if (vol_mag_filter_is_fetch_const) {
vol_filter_is_linear = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4,
builder_->makeUintConstant(UINT32_C(1) << 0)),
const_uint_0_);
}
}
spv::Id layer_coordinate = coordinates[2];
// Linear filtering may be needed either based on a dynamic
// condition (the filtering mode is taken from the fetch constant,
// or it's different for magnification and minification), or on a
// static one (with gradients - specified in the instruction for
// both magnification and minification as linear, without
// gradients - specified for magnification as linear).
// If the filter is linear, subtract 0.5 from the Z coordinate of
// the first layer in filtering because 0.5 is in the middle of it.
if (vol_filter_is_linear != spv::NoResult) {
layer_coordinate = builder_->createTriOp(
spv::OpSelect, type_float_, vol_filter_is_linear,
builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, layer_coordinate,
builder_->makeFloatConstant(0.5f)),
layer_coordinate);
} else if (vol_mag_filter_is_linear) {
layer_coordinate = builder_->createNoContractionBinOp(
spv::OpFSub, type_float_, layer_coordinate,
builder_->makeFloatConstant(0.5f));
}
// Sample the first layer, needed regardless of whether filtering is
// needed.
// Floor the array layer (Vulkan does rounding to nearest or + 0.5
// and floor even for the layer index, but on the Xenos, addressing
// is similar to that of 3D textures). This is needed for both point
// and linear filtering (with linear, 0.5 was subtracted
// previously).
spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor,
layer_coordinate);
id_vector_temp_.clear(); id_vector_temp_.clear();
id_vector_temp_.push_back(coordinates[0]); id_vector_temp_.push_back(coordinates[0]);
id_vector_temp_.push_back(coordinates[1]); id_vector_temp_.push_back(coordinates[1]);
id_vector_temp_.push_back(layer_1_coordinate); id_vector_temp_.push_back(layer_0_coordinate);
texture_parameters.coords = builder_->createCompositeConstruct( texture_parameters.coords = builder_->createCompositeConstruct(
type_float3_, id_vector_temp_); type_float3_, id_vector_temp_);
spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
layer_coordinate);
spv::Id sample_result_unsigned_stacked_filtered;
spv::Id sample_result_signed_stacked_filtered;
SampleTexture( SampleTexture(
texture_parameters, image_operands_mask, texture_parameters, image_operands_mask,
image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed, image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed,
sampler, is_all_signed, is_any_signed, sampler, is_any_unsigned, is_any_signed,
sample_result_unsigned_stacked_filtered,
sample_result_signed_stacked_filtered, layer_lerp_factor,
sample_result_unsigned_stacked, sample_result_signed_stacked); sample_result_unsigned_stacked, sample_result_signed_stacked);
if (vol_filter_is_linear != spv::NoResult) { // Sample the second layer if linear filtering is potentially needed
// Get the actual build point after the SampleTexture call for // (conditionally or unconditionally, depending on whether the
// phi. // filter needs to be chosen at runtime), and filter.
spv::Block& block_z_linear_end = *builder_->getBuildPoint(); if (vol_filter_is_linear != spv::NoResult ||
builder_->createBranch(&block_z_merge); vol_mag_filter_is_linear) {
builder_->setBuildPoint(&block_z_merge); spv::Block& block_z_head = *builder_->getBuildPoint();
{ spv::Block& block_z_linear =
std::unique_ptr<spv::Instruction> filter_phi_op = (vol_filter_is_linear != spv::NoResult)
std::make_unique<spv::Instruction>( ? builder_->makeNewBlock()
builder_->getUniqueId(), type_float4_, spv::OpPhi); : block_z_head;
filter_phi_op->addIdOperand( spv::Block& block_z_merge =
sample_result_unsigned_stacked_filtered); (vol_filter_is_linear != spv::NoResult)
filter_phi_op->addIdOperand(block_z_linear_end.getId()); ? builder_->makeNewBlock()
filter_phi_op->addIdOperand(sample_result_unsigned_stacked); : block_z_head;
filter_phi_op->addIdOperand(block_z_head.getId()); if (vol_filter_is_linear != spv::NoResult) {
sample_result_unsigned_stacked = filter_phi_op->getResultId(); builder_->createSelectionMerge(
builder_->getBuildPoint()->addInstruction( &block_z_merge, spv::SelectionControlDontFlattenMask);
std::move(filter_phi_op)); builder_->createConditionalBranch(
vol_filter_is_linear, &block_z_linear, &block_z_merge);
builder_->setBuildPoint(&block_z_linear);
} }
{ spv::Id layer_1_coordinate = builder_->createBinOp(
std::unique_ptr<spv::Instruction> filter_phi_op = spv::OpFAdd, type_float_, layer_0_coordinate,
std::make_unique<spv::Instruction>( builder_->makeFloatConstant(1.0f));
builder_->getUniqueId(), type_float4_, spv::OpPhi); id_vector_temp_.clear();
filter_phi_op->addIdOperand( id_vector_temp_.push_back(coordinates[0]);
sample_result_signed_stacked_filtered); id_vector_temp_.push_back(coordinates[1]);
filter_phi_op->addIdOperand(block_z_linear_end.getId()); id_vector_temp_.push_back(layer_1_coordinate);
filter_phi_op->addIdOperand(sample_result_signed_stacked); texture_parameters.coords = builder_->createCompositeConstruct(
filter_phi_op->addIdOperand(block_z_head.getId()); type_float3_, id_vector_temp_);
sample_result_signed_stacked = filter_phi_op->getResultId(); spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall(
builder_->getBuildPoint()->addInstruction( type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract,
std::move(filter_phi_op)); layer_coordinate);
spv::Id sample_result_unsigned_stacked_filtered;
spv::Id sample_result_signed_stacked_filtered;
SampleTexture(
texture_parameters, image_operands_mask,
image_2d_array_or_cube_unsigned,
image_2d_array_or_cube_signed, sampler, is_any_unsigned,
is_any_signed, sample_result_unsigned_stacked_filtered,
sample_result_signed_stacked_filtered, layer_lerp_factor,
sample_result_unsigned_stacked, sample_result_signed_stacked);
if (vol_filter_is_linear != spv::NoResult) {
// Get the actual build point after the SampleTexture call for
// phi.
spv::Block& block_z_linear_end = *builder_->getBuildPoint();
builder_->createBranch(&block_z_merge);
builder_->setBuildPoint(&block_z_merge);
{
std::unique_ptr<spv::Instruction> filter_phi_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float4_, spv::OpPhi);
filter_phi_op->addIdOperand(
sample_result_unsigned_stacked_filtered);
filter_phi_op->addIdOperand(block_z_linear_end.getId());
filter_phi_op->addIdOperand(sample_result_unsigned_stacked);
filter_phi_op->addIdOperand(block_z_head.getId());
sample_result_unsigned_stacked = filter_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(filter_phi_op));
}
{
std::unique_ptr<spv::Instruction> filter_phi_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float4_, spv::OpPhi);
filter_phi_op->addIdOperand(
sample_result_signed_stacked_filtered);
filter_phi_op->addIdOperand(block_z_linear_end.getId());
filter_phi_op->addIdOperand(sample_result_signed_stacked);
filter_phi_op->addIdOperand(block_z_head.getId());
sample_result_signed_stacked = filter_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(filter_phi_op));
}
} else {
sample_result_unsigned_stacked =
sample_result_unsigned_stacked_filtered;
sample_result_signed_stacked =
sample_result_signed_stacked_filtered;
} }
} else {
sample_result_unsigned_stacked =
sample_result_unsigned_stacked_filtered;
sample_result_signed_stacked =
sample_result_signed_stacked_filtered;
} }
} }
// Get the actual build point for phi. if_data_is_3d.makeEndIf();
spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint();
builder_->createBranch(&block_dimension_merge);
// Choose between the 3D and the stacked result based on the actual sample_result_unsigned = if_data_is_3d.createMergePhi(
// data dimensionality. sample_result_unsigned_3d, sample_result_unsigned_stacked);
builder_->setBuildPoint(&block_dimension_merge); sample_result_signed = if_data_is_3d.createMergePhi(
{ sample_result_signed_3d, sample_result_signed_stacked);
std::unique_ptr<spv::Instruction> dimension_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_float4_, spv::OpPhi);
dimension_phi_op->addIdOperand(sample_result_unsigned_3d);
dimension_phi_op->addIdOperand(block_dimension_3d_end.getId());
dimension_phi_op->addIdOperand(sample_result_unsigned_stacked);
dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId());
sample_result_unsigned = dimension_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(dimension_phi_op));
}
{
std::unique_ptr<spv::Instruction> dimension_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_float4_, spv::OpPhi);
dimension_phi_op->addIdOperand(sample_result_signed_3d);
dimension_phi_op->addIdOperand(block_dimension_3d_end.getId());
dimension_phi_op->addIdOperand(sample_result_signed_stacked);
dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId());
sample_result_signed = dimension_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(dimension_phi_op));
}
} else { } else {
if (use_computed_lod) { if (use_computed_lod) {
texture_parameters.gradX = gradients_h; texture_parameters.gradX = gradients_h;
@ -2045,7 +1984,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
builder_->createCompositeConstruct(type_float3_, id_vector_temp_); builder_->createCompositeConstruct(type_float3_, id_vector_temp_);
SampleTexture(texture_parameters, image_operands_mask, SampleTexture(texture_parameters, image_operands_mask,
image_2d_array_or_cube_unsigned, image_2d_array_or_cube_unsigned,
image_2d_array_or_cube_signed, sampler, is_all_signed, image_2d_array_or_cube_signed, sampler, is_any_unsigned,
is_any_signed, sample_result_unsigned, is_any_signed, sample_result_unsigned,
sample_result_signed); sample_result_signed);
} }
@ -2095,26 +2034,20 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
spv::OpBitwiseAnd, type_uint_, swizzle_word, spv::OpBitwiseAnd, type_uint_, swizzle_word,
builder_->makeUintConstant(swizzle_bit_0_value << 2)), builder_->makeUintConstant(swizzle_bit_0_value << 2)),
const_uint_0_); const_uint_0_);
spv::Block& block_swizzle_head = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder if_swizzle_constant(
spv::Block& block_swizzle_constant = builder_->makeNewBlock(); swizzle_bit_2, spv::SelectionControlDontFlattenMask, *builder_);
spv::Block& block_swizzle_component = builder_->makeNewBlock(); spv::Id swizzle_result_constant;
spv::Block& block_swizzle_merge = builder_->makeNewBlock(); {
builder_->createSelectionMerge( // Constant values.
&block_swizzle_merge, spv::SelectionControlDontFlattenMask); // Bit 0 - 0 or 1.
builder_->createConditionalBranch(swizzle_bit_2, swizzle_result_constant = builder_->createTriOp(
&block_swizzle_constant, spv::OpSelect, type_float_, swizzle_bit_0, const_float_1,
&block_swizzle_component); const_float_0_);
// Constant values. }
builder_->setBuildPoint(&block_swizzle_constant); if_swizzle_constant.makeBeginElse();
// Bit 0 - 0 or 1.
spv::Id swizzle_result_constant =
builder_->createTriOp(spv::OpSelect, type_float_, swizzle_bit_0,
const_float_1, const_float_0_);
builder_->createBranch(&block_swizzle_merge);
// Fetched components.
spv::Id swizzle_result_component; spv::Id swizzle_result_component;
{ {
builder_->setBuildPoint(&block_swizzle_component); // Fetched components.
// Select whether the result is signed or unsigned (or biased or // Select whether the result is signed or unsigned (or biased or
// gamma-corrected) based on the post-swizzle signedness. // gamma-corrected) based on the post-swizzle signedness.
spv::Id swizzle_sample_result = builder_->createTriOp( spv::Id swizzle_sample_result = builder_->createTriOp(
@ -2146,22 +2079,11 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
swizzle_result_component = builder_->createTriOp( swizzle_result_component = builder_->createTriOp(
spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w, spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w,
swizzle_x_or_y); swizzle_x_or_y);
builder_->createBranch(&block_swizzle_merge);
} }
if_swizzle_constant.makeEndIf();
// Select between the constants and the fetched components. // Select between the constants and the fetched components.
builder_->setBuildPoint(&block_swizzle_merge); result[result_component_index] = if_swizzle_constant.createMergePhi(
{ swizzle_result_constant, swizzle_result_component);
std::unique_ptr<spv::Instruction> swizzle_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_float_, spv::OpPhi);
swizzle_phi_op->addIdOperand(swizzle_result_constant);
swizzle_phi_op->addIdOperand(block_swizzle_constant.getId());
swizzle_phi_op->addIdOperand(swizzle_result_component);
swizzle_phi_op->addIdOperand(block_swizzle_component.getId());
result[result_component_index] = swizzle_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(swizzle_phi_op));
}
} }
} }
@ -2441,58 +2363,43 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding(
void SpirvShaderTranslator::SampleTexture( void SpirvShaderTranslator::SampleTexture(
spv::Builder::TextureParameters& texture_parameters, spv::Builder::TextureParameters& texture_parameters,
spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned, spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned,
spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed, spv::Id image_signed, spv::Id sampler, spv::Id is_any_unsigned,
spv::Id is_any_signed, spv::Id& result_unsigned_out, spv::Id is_any_signed, spv::Id& result_unsigned_out,
spv::Id& result_signed_out, spv::Id lerp_factor, spv::Id& result_signed_out, spv::Id lerp_factor,
spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) { spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) {
for (uint32_t i = 0; i < 2; ++i) { for (uint32_t i = 0; i < 2; ++i) {
spv::Block& block_sign_head = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder sign_if(i ? is_any_signed : is_any_unsigned,
spv::Block& block_sign = builder_->makeNewBlock(); spv::SelectionControlDontFlattenMask,
spv::Block& block_sign_merge = builder_->makeNewBlock(); *builder_);
builder_->createSelectionMerge(&block_sign_merge, spv::Id sign_result;
spv::SelectionControlDontFlattenMask); {
// Unsigned (i == 0) - if there are any non-signed components. spv::Id image = i ? image_signed : image_unsigned;
// Signed (i == 1) - if there are any signed components. // OpSampledImage must be in the same block as where its result is used.
builder_->createConditionalBranch(i ? is_any_signed : is_all_signed, texture_parameters.sampler = builder_->createBinOp(
i ? &block_sign : &block_sign_merge, spv::OpSampledImage,
i ? &block_sign_merge : &block_sign); builder_->makeSampledImageType(builder_->getTypeId(image)), image,
builder_->setBuildPoint(&block_sign); sampler);
spv::Id image = i ? image_signed : image_unsigned; sign_result = builder_->createTextureCall(
// OpSampledImage must be in the same block as where its result is used. spv::NoPrecision, type_float4_, false, false, false, false, false,
texture_parameters.sampler = builder_->createBinOp( texture_parameters, image_operands_mask);
spv::OpSampledImage, if (lerp_factor != spv::NoResult) {
builder_->makeSampledImageType(builder_->getTypeId(image)), image, spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned;
sampler); if (lerp_first != spv::NoResult) {
spv::Id result = builder_->createTextureCall( spv::Id lerp_difference = builder_->createNoContractionBinOp(
spv::NoPrecision, type_float4_, false, false, false, false, false, spv::OpVectorTimesScalar, type_float4_,
texture_parameters, image_operands_mask); builder_->createNoContractionBinOp(spv::OpFSub, type_float4_,
if (lerp_factor != spv::NoResult) { sign_result, lerp_first),
spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned; lerp_factor);
if (lerp_first != spv::NoResult) { sign_result = builder_->createNoContractionBinOp(
spv::Id lerp_difference = builder_->createNoContractionBinOp( spv::OpFAdd, type_float4_, sign_result, lerp_difference);
spv::OpVectorTimesScalar, type_float4_, }
builder_->createNoContractionBinOp(spv::OpFSub, type_float4_,
result, lerp_first),
lerp_factor);
result = builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_,
result, lerp_difference);
} }
} }
builder_->createBranch(&block_sign_merge); sign_if.makeEndIf();
builder_->setBuildPoint(&block_sign_merge); // This may overwrite the first lerp endpoint for the sign (such usage of
{ // this function is allowed).
std::unique_ptr<spv::Instruction> phi_op = (i ? result_signed_out : result_unsigned_out) =
std::make_unique<spv::Instruction>(builder_->getUniqueId(), sign_if.createMergePhi(sign_result, const_float4_0_);
type_float4_, spv::OpPhi);
phi_op->addIdOperand(result);
phi_op->addIdOperand(block_sign.getId());
phi_op->addIdOperand(const_float4_0_);
phi_op->addIdOperand(block_sign_head.getId());
// This may overwrite the first lerp endpoint for the sign (such usage of
// this function is allowed).
(i ? result_signed_out : result_unsigned_out) = phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(phi_op));
}
} }
} }
@ -2500,48 +2407,33 @@ spv::Id SpirvShaderTranslator::QueryTextureLod(
spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned, spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned,
spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) { spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) {
// OpSampledImage must be in the same block as where its result is used. // OpSampledImage must be in the same block as where its result is used.
spv::Block& block_sign_head = *builder_->getBuildPoint(); SpirvBuilder::IfBuilder if_signed(
spv::Block& block_sign_signed = builder_->makeNewBlock(); is_all_signed, spv::SelectionControlDontFlattenMask, *builder_);
spv::Block& block_sign_unsigned = builder_->makeNewBlock(); spv::Id lod_signed;
spv::Block& block_sign_merge = builder_->makeNewBlock();
builder_->createSelectionMerge(&block_sign_merge,
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(is_all_signed, &block_sign_signed,
&block_sign_unsigned);
builder_->setBuildPoint(&block_sign_signed);
texture_parameters.sampler = builder_->createBinOp(
spv::OpSampledImage,
builder_->makeSampledImageType(builder_->getTypeId(image_signed)),
image_signed, sampler);
spv::Id lod_signed = builder_->createCompositeExtract(
builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters,
false),
type_float_, 1);
builder_->createBranch(&block_sign_merge);
builder_->setBuildPoint(&block_sign_unsigned);
texture_parameters.sampler = builder_->createBinOp(
spv::OpSampledImage,
builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)),
image_unsigned, sampler);
spv::Id lod_unsigned = builder_->createCompositeExtract(
builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters,
false),
type_float_, 1);
builder_->createBranch(&block_sign_merge);
builder_->setBuildPoint(&block_sign_merge);
spv::Id result;
{ {
std::unique_ptr<spv::Instruction> sign_phi_op = texture_parameters.sampler = builder_->createBinOp(
std::make_unique<spv::Instruction>(builder_->getUniqueId(), type_float_, spv::OpSampledImage,
spv::OpPhi); builder_->makeSampledImageType(builder_->getTypeId(image_signed)),
sign_phi_op->addIdOperand(lod_signed); image_signed, sampler);
sign_phi_op->addIdOperand(block_sign_signed.getId()); lod_signed = builder_->createCompositeExtract(
sign_phi_op->addIdOperand(lod_unsigned); builder_->createTextureQueryCall(spv::OpImageQueryLod,
sign_phi_op->addIdOperand(block_sign_unsigned.getId()); texture_parameters, false),
result = sign_phi_op->getResultId(); type_float_, 1);
builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op));
} }
return result; if_signed.makeBeginElse();
spv::Id lod_unsigned;
{
texture_parameters.sampler = builder_->createBinOp(
spv::OpSampledImage,
builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)),
image_unsigned, sampler);
lod_unsigned = builder_->createCompositeExtract(
builder_->createTextureQueryCall(spv::OpImageQueryLod,
texture_parameters, false),
type_float_, 1);
}
if_signed.makeEndIf();
return if_signed.createMergePhi(lod_signed, lod_unsigned);
} }
} // namespace gpu } // namespace gpu

View File

@ -0,0 +1,950 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_shader_translator.h"
#include <array>
#include <cstdint>
#include <functional>
#include <memory>
#include <optional>
#include <utility>
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/gpu/ucode.h"
namespace xe {
namespace gpu {
void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
if (!export_eM) {
return;
}
assert_zero(export_eM & ~current_shader().memexport_eM_written());
if (!IsMemoryExportSupported()) {
return;
}
// Check if memory export is allowed in this guest shader invocation.
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
if (main_memexport_allowed_ != spv::NoResult) {
if_memexport_allowed.emplace(main_memexport_allowed_,
spv::SelectionControlDontFlattenMask,
*builder_);
}
// If the pixel was killed (but the actual killing on the SPIR-V side has not
// been performed yet because the device doesn't support demotion to helper
// invocation that doesn't interfere with control flow), the current
// invocation is not considered active anymore.
std::optional<SpirvBuilder::IfBuilder> if_pixel_not_killed;
if (var_main_kill_pixel_ != spv::NoResult) {
if_pixel_not_killed.emplace(
builder_->createUnaryOp(
spv::OpLogicalNot, type_bool_,
builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision)),
spv::SelectionControlDontFlattenMask, *builder_);
}
// Check if the address with the correct sign and exponent was written, and
// that the index doesn't overflow the mantissa bits.
// all((eA_vector >> uvec4(30, 23, 23, 23)) == uvec4(0x1, 0x96, 0x96, 0x96))
spv::Id eA_vector = builder_->createUnaryOp(
spv::OpBitcast, type_uint4_,
builder_->createLoad(var_main_memexport_address_, spv::NoPrecision));
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeUintConstant(30));
id_vector_temp_.push_back(builder_->makeUintConstant(23));
id_vector_temp_.push_back(id_vector_temp_.back());
id_vector_temp_.push_back(id_vector_temp_.back());
spv::Id address_validation_shift =
builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeUintConstant(0x1));
id_vector_temp_.push_back(builder_->makeUintConstant(0x96));
id_vector_temp_.push_back(id_vector_temp_.back());
id_vector_temp_.push_back(id_vector_temp_.back());
spv::Id address_validation_value =
builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
SpirvBuilder::IfBuilder if_address_valid(
builder_->createUnaryOp(
spv::OpAll, type_bool_,
builder_->createBinOp(
spv::OpIEqual, type_bool4_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint4_,
eA_vector, address_validation_shift),
address_validation_value)),
spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
using EMIdArray = std::array<spv::Id, ucode::kMaxMemExportElementCount>;
auto for_each_eM = [&](std::function<void(uint32_t eM_index)> fn) {
uint8_t eM_remaining = export_eM;
uint32_t eM_index;
while (xe::bit_scan_forward(eM_remaining, &eM_index)) {
eM_remaining &= ~(uint8_t(1) << eM_index);
fn(eM_index);
}
};
// Load the original eM.
EMIdArray eM_original;
for_each_eM([&](uint32_t eM_index) {
eM_original[eM_index] = builder_->createLoad(
var_main_memexport_data_[eM_index], spv::NoPrecision);
});
// Swap red and blue if needed.
spv::Id format_info =
builder_->createCompositeExtract(eA_vector, type_uint_, 2);
spv::Id swap_red_blue = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
builder_->makeUintConstant(uint32_t(1) << 19)),
const_uint_0_);
EMIdArray eM_swapped;
uint_vector_temp_.clear();
uint_vector_temp_.push_back(2);
uint_vector_temp_.push_back(1);
uint_vector_temp_.push_back(0);
uint_vector_temp_.push_back(3);
for_each_eM([&](uint32_t eM_index) {
eM_swapped[eM_index] = builder_->createTriOp(
spv::OpSelect, type_float4_, swap_red_blue,
builder_->createRvalueSwizzle(spv::NoPrecision, type_float4_,
eM_original[eM_index], uint_vector_temp_),
eM_original[eM_index]);
});
// Extract the numeric format.
spv::Id is_signed = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
builder_->makeUintConstant(uint32_t(1) << 16)),
const_uint_0_);
spv::Id is_norm = builder_->createBinOp(
spv::OpIEqual, type_bool_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
builder_->makeUintConstant(uint32_t(1) << 17)),
const_uint_0_);
// Perform format packing.
auto flush_nan = [&](const EMIdArray& eM) -> EMIdArray {
EMIdArray eM_flushed;
for_each_eM([&](uint32_t eM_index) {
spv::Id element_unflushed = eM[eM_index];
unsigned int component_count =
builder_->getNumComponents(element_unflushed);
eM_flushed[eM_index] = builder_->createTriOp(
spv::OpSelect, type_float_vectors_[component_count - 1],
builder_->createUnaryOp(spv::OpIsNan,
type_bool_vectors_[component_count - 1],
element_unflushed),
const_float_vectors_0_[component_count - 1], element_unflushed);
});
return eM_flushed;
};
auto make_float_constant_vectors =
[&](float value) -> std::array<spv::Id, 4> {
std::array<spv::Id, 4> const_vectors;
const_vectors[0] = builder_->makeFloatConstant(value);
id_vector_temp_.clear();
id_vector_temp_.push_back(const_vectors[0]);
for (unsigned int component_count_minus_1 = 1; component_count_minus_1 < 4;
++component_count_minus_1) {
id_vector_temp_.push_back(const_vectors[0]);
const_vectors[component_count_minus_1] = builder_->makeCompositeConstant(
type_float_vectors_[component_count_minus_1], id_vector_temp_);
}
return const_vectors;
};
std::array<spv::Id, 4> const_float_vectors_minus_1 =
make_float_constant_vectors(-1.0f);
std::array<spv::Id, 4> const_float_vectors_minus_0_5 =
make_float_constant_vectors(-0.5f);
std::array<spv::Id, 4> const_float_vectors_0_5 =
make_float_constant_vectors(0.5f);
// The widths must be without holes (R, RG, RGB, RGBA), and expecting the
// widths to add up to the size of the stored texel (8, 16 or 32 bits), as the
// unused upper bits will contain junk from the sign extension of X if the
// number is signed.
auto pack_8_16_32 = [&](std::array<uint32_t, 4> widths) -> EMIdArray {
unsigned int component_count;
std::array<uint32_t, 4> offsets{};
for (component_count = 0; component_count < widths.size();
++component_count) {
if (!widths[component_count]) {
break;
}
// Only formats for which max + 0.5 can be represented exactly.
assert(widths[component_count] <= 23);
if (component_count) {
offsets[component_count] =
offsets[component_count - 1] + widths[component_count - 1];
}
}
assert_not_zero(component_count);
// Extract the needed components.
EMIdArray eM_unflushed = eM_swapped;
if (component_count < 4) {
if (component_count == 1) {
for_each_eM([&](uint32_t eM_index) {
eM_unflushed[eM_index] = builder_->createCompositeExtract(
eM_unflushed[eM_index], type_float_, 0);
});
} else {
uint_vector_temp_.clear();
for (unsigned int component_index = 0;
component_index < component_count; ++component_index) {
uint_vector_temp_.push_back(component_index);
}
for_each_eM([&](uint32_t eM_index) {
eM_unflushed[eM_index] = builder_->createRvalueSwizzle(
spv::NoPrecision, type_float_vectors_[component_count - 1],
eM_unflushed[eM_index], uint_vector_temp_);
});
}
}
// Flush NaNs.
EMIdArray eM_flushed = flush_nan(eM_unflushed);
// Convert to integers.
SpirvBuilder::IfBuilder if_signed(
is_signed, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray eM_signed;
{
// Signed.
SpirvBuilder::IfBuilder if_norm(
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray eM_norm;
{
// Signed normalized.
id_vector_temp_.clear();
for (unsigned int component_index = 0;
component_index < component_count; ++component_index) {
id_vector_temp_.push_back(builder_->makeFloatConstant(
float((uint32_t(1) << (widths[component_index] - 1)) - 1)));
}
spv::Id const_max_value =
component_count > 1
? builder_->makeCompositeConstant(
type_float_vectors_[component_count - 1], id_vector_temp_)
: id_vector_temp_.front();
for_each_eM([&](uint32_t eM_index) {
eM_norm[eM_index] = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_vectors_[component_count - 1],
builder_->createTriBuiltinCall(
type_float_vectors_[component_count - 1],
ext_inst_glsl_std_450_, GLSLstd450FClamp,
eM_flushed[eM_index],
const_float_vectors_minus_1[component_count - 1],
const_float_vectors_1_[component_count - 1]),
const_max_value);
});
}
if_norm.makeEndIf();
// All phi instructions must be in the beginning of the block.
for_each_eM([&](uint32_t eM_index) {
eM_signed[eM_index] =
if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
});
// Convert to signed integer, adding plus/minus 0.5 before truncating
// according to the Direct3D format conversion rules.
for_each_eM([&](uint32_t eM_index) {
eM_signed[eM_index] = builder_->createUnaryOp(
spv::OpBitcast, type_uint_vectors_[component_count - 1],
builder_->createUnaryOp(
spv::OpConvertFToS, type_int_vectors_[component_count - 1],
builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_vectors_[component_count - 1],
eM_signed[eM_index],
builder_->createTriOp(
spv::OpSelect, type_float_vectors_[component_count - 1],
builder_->createBinOp(
spv::OpFOrdLessThan,
type_bool_vectors_[component_count - 1],
eM_signed[eM_index],
const_float_vectors_0_[component_count - 1]),
const_float_vectors_minus_0_5[component_count - 1],
const_float_vectors_0_5[component_count - 1]))));
});
}
if_signed.makeBeginElse();
EMIdArray eM_unsigned;
{
SpirvBuilder::IfBuilder if_norm(
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray eM_norm;
{
// Unsigned normalized.
id_vector_temp_.clear();
for (unsigned int component_index = 0;
component_index < component_count; ++component_index) {
id_vector_temp_.push_back(builder_->makeFloatConstant(
float((uint32_t(1) << widths[component_index]) - 1)));
}
spv::Id const_max_value =
component_count > 1
? builder_->makeCompositeConstant(
type_float_vectors_[component_count - 1], id_vector_temp_)
: id_vector_temp_.front();
for_each_eM([&](uint32_t eM_index) {
eM_norm[eM_index] = builder_->createNoContractionBinOp(
spv::OpFMul, type_float_vectors_[component_count - 1],
builder_->createTriBuiltinCall(
type_float_vectors_[component_count - 1],
ext_inst_glsl_std_450_, GLSLstd450FClamp,
eM_flushed[eM_index],
const_float_vectors_0_[component_count - 1],
const_float_vectors_1_[component_count - 1]),
const_max_value);
});
}
if_norm.makeEndIf();
// All phi instructions must be in the beginning of the block.
for_each_eM([&](uint32_t eM_index) {
eM_unsigned[eM_index] =
if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
});
// Convert to unsigned integer, adding 0.5 before truncating according to
// the Direct3D format conversion rules.
for_each_eM([&](uint32_t eM_index) {
eM_unsigned[eM_index] = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_vectors_[component_count - 1],
builder_->createNoContractionBinOp(
spv::OpFAdd, type_float_vectors_[component_count - 1],
eM_unsigned[eM_index],
const_float_vectors_0_5[component_count - 1]));
});
}
if_signed.makeEndIf();
EMIdArray eM_unpacked;
for_each_eM([&](uint32_t eM_index) {
eM_unpacked[eM_index] =
if_signed.createMergePhi(eM_signed[eM_index], eM_unsigned[eM_index]);
});
// Pack into a 32-bit value, and pad to a 4-component vector for the phi.
EMIdArray eM_packed;
for_each_eM([&](uint32_t eM_index) {
spv::Id element_unpacked = eM_unpacked[eM_index];
eM_packed[eM_index] = component_count > 1
? builder_->createCompositeExtract(
element_unpacked, type_uint_, 0)
: element_unpacked;
for (unsigned int component_index = 1; component_index < component_count;
++component_index) {
eM_packed[eM_index] = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, eM_packed[eM_index],
builder_->createCompositeExtract(element_unpacked, type_uint_,
component_index),
builder_->makeUintConstant(offsets[component_index]),
builder_->makeUintConstant(widths[component_index]));
}
id_vector_temp_.clear();
id_vector_temp_.resize(4, const_uint_0_);
id_vector_temp_.front() = eM_packed[eM_index];
eM_packed[eM_index] =
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
});
return eM_packed;
};
SpirvBuilder::SwitchBuilder format_switch(
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
builder_->makeUintConstant(8),
builder_->makeUintConstant(6)),
spv::SelectionControlDontFlattenMask, *builder_);
struct FormatCase {
EMIdArray eM_packed;
uint32_t element_bytes_log2;
spv::Id phi_parent;
};
std::vector<FormatCase> format_cases;
// Must be called at the end of the switch case segment for the correct phi
// parent.
auto add_format_case = [&](const EMIdArray& eM_packed,
uint32_t element_bytes_log2) {
FormatCase& format_case = format_cases.emplace_back();
format_case.eM_packed = eM_packed;
format_case.element_bytes_log2 = element_bytes_log2;
format_case.phi_parent = builder_->getBuildPoint()->getId();
};
// k_8, k_8_A, k_8_B
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_8));
// TODO(Triang3l): Investigate how input should be treated for k_8_A, k_8_B.
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_8_A));
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_8_B));
add_format_case(pack_8_16_32({8}), 0);
// k_1_5_5_5
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_1_5_5_5));
add_format_case(pack_8_16_32({5, 5, 5, 1}), 1);
// k_5_6_5
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_5_6_5));
add_format_case(pack_8_16_32({5, 6, 5}), 1);
// k_6_5_5
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_6_5_5));
add_format_case(pack_8_16_32({5, 5, 6}), 1);
// k_8_8_8_8, k_8_8_8_8_A, k_8_8_8_8_AS_16_16_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8));
// TODO(Triang3l): Investigate how input should be treated for k_8_8_8_8_A.
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_A));
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16));
add_format_case(pack_8_16_32({8, 8, 8, 8}), 2);
// k_2_10_10_10, k_2_10_10_10_AS_16_16_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_2_10_10_10));
format_switch.addCurrentCaseLiteral(static_cast<unsigned int>(
xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16));
add_format_case(pack_8_16_32({10, 10, 10, 2}), 2);
// k_8_8
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_8_8));
add_format_case(pack_8_16_32({8, 8}), 1);
// k_4_4_4_4
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_4_4_4_4));
add_format_case(pack_8_16_32({4, 4, 4, 4}), 1);
// k_10_11_11, k_10_11_11_AS_16_16_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11));
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16));
add_format_case(pack_8_16_32({11, 11, 10}), 2);
// k_11_11_10, k_11_11_10_AS_16_16_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10));
format_switch.addCurrentCaseLiteral(
static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16));
add_format_case(pack_8_16_32({10, 11, 11}), 2);
// k_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16));
add_format_case(pack_8_16_32({16}), 1);
// k_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16_16));
add_format_case(pack_8_16_32({16, 16}), 2);
// k_16_16_16_16
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16));
{
// Flush NaNs.
EMIdArray fixed16_flushed = flush_nan(eM_swapped);
// Convert to integers.
SpirvBuilder::IfBuilder if_signed(
is_signed, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray fixed16_signed;
{
// Signed.
SpirvBuilder::IfBuilder if_norm(
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray fixed16_norm;
{
// Signed normalized.
id_vector_temp_.clear();
id_vector_temp_.resize(4, builder_->makeFloatConstant(
float((uint32_t(1) << (16 - 1)) - 1)));
spv::Id const_snorm16_max_value =
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
for_each_eM([&](uint32_t eM_index) {
fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
spv::OpFMul, type_float4_,
builder_->createTriBuiltinCall(
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
fixed16_flushed[eM_index], const_float_vectors_minus_1[3],
const_float4_1_),
const_snorm16_max_value);
});
}
if_norm.makeEndIf();
// All phi instructions must be in the beginning of the block.
for_each_eM([&](uint32_t eM_index) {
fixed16_signed[eM_index] = if_norm.createMergePhi(
fixed16_norm[eM_index], fixed16_flushed[eM_index]);
});
// Convert to signed integer, adding plus/minus 0.5 before truncating
// according to the Direct3D format conversion rules.
for_each_eM([&](uint32_t eM_index) {
fixed16_signed[eM_index] = builder_->createUnaryOp(
spv::OpBitcast, type_uint4_,
builder_->createUnaryOp(
spv::OpConvertFToS, type_int4_,
builder_->createNoContractionBinOp(
spv::OpFAdd, type_float4_, fixed16_signed[eM_index],
builder_->createTriOp(
spv::OpSelect, type_float4_,
builder_->createBinOp(spv::OpFOrdLessThan, type_bool4_,
fixed16_signed[eM_index],
const_float4_0_),
const_float_vectors_minus_0_5[3],
const_float_vectors_0_5[3]))));
});
}
if_signed.makeBeginElse();
EMIdArray fixed16_unsigned;
{
// Unsigned.
SpirvBuilder::IfBuilder if_norm(
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
EMIdArray fixed16_norm;
{
// Unsigned normalized.
id_vector_temp_.clear();
id_vector_temp_.resize(
4, builder_->makeFloatConstant(float((uint32_t(1) << 16) - 1)));
spv::Id const_unorm16_max_value =
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
for_each_eM([&](uint32_t eM_index) {
fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
spv::OpFMul, type_float4_,
builder_->createTriBuiltinCall(
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
fixed16_flushed[eM_index], const_float4_0_, const_float4_1_),
const_unorm16_max_value);
});
}
if_norm.makeEndIf();
// All phi instructions must be in the beginning of the block.
for_each_eM([&](uint32_t eM_index) {
fixed16_unsigned[eM_index] = if_norm.createMergePhi(
fixed16_norm[eM_index], fixed16_flushed[eM_index]);
});
// Convert to unsigned integer, adding 0.5 before truncating according to
// the Direct3D format conversion rules.
for_each_eM([&](uint32_t eM_index) {
fixed16_unsigned[eM_index] = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint4_,
builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_,
fixed16_unsigned[eM_index],
const_float_vectors_0_5[3]));
});
}
if_signed.makeEndIf();
EMIdArray fixed16_unpacked;
for_each_eM([&](uint32_t eM_index) {
fixed16_unpacked[eM_index] = if_signed.createMergePhi(
fixed16_signed[eM_index], fixed16_unsigned[eM_index]);
});
// Pack into two 32-bit values, and pad to a 4-component vector for the phi.
EMIdArray fixed16_packed;
spv::Id const_uint_16 = builder_->makeUintConstant(16);
for_each_eM([&](uint32_t eM_index) {
spv::Id fixed16_element_unpacked = fixed16_unpacked[eM_index];
id_vector_temp_.clear();
for (uint32_t component_index = 0; component_index < 2;
++component_index) {
id_vector_temp_.push_back(builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_,
builder_->createCompositeExtract(fixed16_element_unpacked,
type_uint_, 2 * component_index),
builder_->createCompositeExtract(
fixed16_element_unpacked, type_uint_, 2 * component_index + 1),
const_uint_16, const_uint_16));
}
for (uint32_t component_index = 2; component_index < 4;
++component_index) {
id_vector_temp_.push_back(const_uint_0_);
}
fixed16_packed[eM_index] =
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
});
add_format_case(fixed16_packed, 3);
}
// TODO(Triang3l): Use the extended range float16 conversion.
// k_16_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16_FLOAT));
{
EMIdArray format_packed_16_float;
for_each_eM([&](uint32_t eM_index) {
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->createCompositeExtract(
eM_swapped[eM_index], type_float_, 0));
id_vector_temp_.push_back(const_float_0_);
spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
builder_->createCompositeConstruct(type_float2_, id_vector_temp_));
id_vector_temp_.clear();
id_vector_temp_.resize(4, const_uint_0_);
id_vector_temp_.front() = format_packed_16_float_x;
format_packed_16_float[eM_index] =
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
});
add_format_case(format_packed_16_float, 1);
}
// k_16_16_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_FLOAT));
{
EMIdArray format_packed_16_16_float;
for_each_eM([&](uint32_t eM_index) {
uint_vector_temp_.clear();
uint_vector_temp_.push_back(0);
uint_vector_temp_.push_back(1);
spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
eM_swapped[eM_index],
uint_vector_temp_));
id_vector_temp_.clear();
id_vector_temp_.resize(4, const_uint_0_);
id_vector_temp_.front() = format_packed_16_16_float_xy;
format_packed_16_16_float[eM_index] =
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
});
add_format_case(format_packed_16_16_float, 2);
}
// k_16_16_16_16_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16_FLOAT));
{
EMIdArray format_packed_16_16_16_16_float;
for_each_eM([&](uint32_t eM_index) {
spv::Id format_packed_16_16_16_16_float_xy_zw[2];
for (uint32_t component_index = 0; component_index < 2;
++component_index) {
uint_vector_temp_.clear();
uint_vector_temp_.push_back(2 * component_index);
uint_vector_temp_.push_back(2 * component_index + 1);
format_packed_16_16_16_16_float_xy_zw[component_index] =
builder_->createUnaryBuiltinCall(
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
eM_swapped[eM_index],
uint_vector_temp_));
}
id_vector_temp_.clear();
id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[0]);
id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[1]);
id_vector_temp_.push_back(const_uint_0_);
id_vector_temp_.push_back(const_uint_0_);
format_packed_16_16_16_16_float[eM_index] =
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
});
add_format_case(format_packed_16_16_16_16_float, 3);
}
// k_32_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_32_FLOAT));
{
EMIdArray format_packed_32_float;
for_each_eM([&](uint32_t eM_index) {
format_packed_32_float[eM_index] = builder_->createUnaryOp(
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
});
add_format_case(format_packed_32_float, 2);
}
// k_32_32_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_32_32_FLOAT));
{
EMIdArray format_packed_32_32_float;
for_each_eM([&](uint32_t eM_index) {
format_packed_32_32_float[eM_index] = builder_->createUnaryOp(
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
});
add_format_case(format_packed_32_32_float, 3);
}
// k_32_32_32_32_FLOAT
format_switch.makeBeginCase(
static_cast<unsigned int>(xenos::ColorFormat::k_32_32_32_32_FLOAT));
{
EMIdArray format_packed_32_32_32_32_float;
for_each_eM([&](uint32_t eM_index) {
format_packed_32_32_32_32_float[eM_index] = builder_->createUnaryOp(
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
});
add_format_case(format_packed_32_32_32_32_float, 4);
}
format_switch.makeEndSwitch();
// Select the result and the element size based on the format.
// Phi must be the first instructions in a block.
EMIdArray eM_packed;
for_each_eM([&](uint32_t eM_index) {
auto eM_packed_phi = std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_uint4_, spv::OpPhi);
// Default case for an invalid format.
eM_packed_phi->addIdOperand(const_uint4_0_);
eM_packed_phi->addIdOperand(format_switch.getDefaultPhiParent());
for (const FormatCase& format_case : format_cases) {
eM_packed_phi->addIdOperand(format_case.eM_packed[eM_index]);
eM_packed_phi->addIdOperand(format_case.phi_parent);
}
eM_packed[eM_index] = eM_packed_phi->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(eM_packed_phi));
});
spv::Id element_bytes_log2;
{
auto element_bytes_log2_phi = std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_uint_, spv::OpPhi);
// Default case for an invalid format (doesn't enter any element size
// conditional, skipped).
element_bytes_log2_phi->addIdOperand(builder_->makeUintConstant(5));
element_bytes_log2_phi->addIdOperand(format_switch.getDefaultPhiParent());
for (const FormatCase& format_case : format_cases) {
element_bytes_log2_phi->addIdOperand(
builder_->makeUintConstant(format_case.element_bytes_log2));
element_bytes_log2_phi->addIdOperand(format_case.phi_parent);
}
element_bytes_log2 = element_bytes_log2_phi->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(element_bytes_log2_phi));
}
// Endian-swap.
spv::Id endian =
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
const_uint_0_, builder_->makeUintConstant(3));
for_each_eM([&](uint32_t eM_index) {
eM_packed[eM_index] = EndianSwap128Uint4(eM_packed[eM_index], endian);
});
// Load the index of eM0 in the stream.
spv::Id eM0_index = builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_,
builder_->createCompositeExtract(eA_vector, type_uint_, 1), const_uint_0_,
builder_->makeUintConstant(23));
// Check how many elements starting from eM0 are within the bounds of the
// stream, and from the eM# that were written, exclude the out-of-bounds ones.
// The index can't be negative, and the index and the count are limited to 23
// bits, so it's safe to use 32-bit signed subtraction and clamping to get the
// remaining eM# count.
spv::Id eM_indices_to_store = builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_,
builder_->createLoad(var_main_memexport_data_written_, spv::NoPrecision),
const_uint_0_,
builder_->createUnaryOp(
spv::OpBitcast, type_uint_,
builder_->createTriBuiltinCall(
type_int_, ext_inst_glsl_std_450_, GLSLstd450SClamp,
builder_->createBinOp(
spv::OpISub, type_int_,
builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_,
builder_->createCompositeExtract(
eA_vector, type_uint_, 3),
const_uint_0_,
builder_->makeUintConstant(23))),
builder_->createUnaryOp(spv::OpBitcast, type_int_,
eM0_index)),
const_int_0_,
builder_->makeIntConstant(ucode::kMaxMemExportElementCount))));
// Get the eM0 address in bytes.
// Left-shift the stream base address by 2 to both convert it from dwords to
// bytes and drop the upper bits.
spv::Id const_uint_2 = builder_->makeUintConstant(2);
spv::Id eM0_address_bytes = builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createBinOp(
spv::OpShiftLeftLogical, type_uint_,
builder_->createCompositeExtract(eA_vector, type_uint_, 0),
const_uint_2),
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, eM0_index,
element_bytes_log2));
// Store based on the element size.
auto store_needed_eM = [&](std::function<void(uint32_t eM_index)> fn) {
for_each_eM([&](uint32_t eM_index) {
SpirvBuilder::IfBuilder if_eM_needed(
builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
eM_indices_to_store,
builder_->makeUintConstant(1u << eM_index)),
const_uint_0_),
spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
fn(eM_index);
if_eM_needed.makeEndIf();
});
};
SpirvBuilder::SwitchBuilder element_size_switch(
element_bytes_log2, spv::SelectionControlDontFlattenMask, *builder_);
element_size_switch.makeBeginCase(0);
{
store_needed_eM([&](uint32_t eM_index) {
spv::Id element_address_bytes =
eM_index != 0 ? builder_->createBinOp(
spv::OpIAdd, type_uint_, eM0_address_bytes,
builder_->makeUintConstant(eM_index))
: eM0_address_bytes;
// replace_shift = 8 * (element_address_bytes & 3)
spv::Id replace_shift = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, const_uint_0_,
element_address_bytes, builder_->makeUintConstant(3), const_uint_2);
StoreUint32ToSharedMemory(
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
builder_->createCompositeExtract(
eM_packed[eM_index], type_uint_, 0),
replace_shift),
builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
element_address_bytes, const_uint_2)),
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
builder_->makeUintConstant(0xFFu),
replace_shift));
});
}
element_size_switch.makeBeginCase(1);
{
spv::Id const_uint_1 = builder_->makeUintConstant(1);
spv::Id eM0_address_words = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_1);
store_needed_eM([&](uint32_t eM_index) {
spv::Id element_address_words =
eM_index != 0 ? builder_->createBinOp(
spv::OpIAdd, type_uint_, eM0_address_words,
builder_->makeUintConstant(eM_index))
: eM0_address_words;
// replace_shift = 16 * (element_address_words & 1)
spv::Id replace_shift = builder_->createQuadOp(
spv::OpBitFieldInsert, type_uint_, const_uint_0_,
element_address_words, builder_->makeUintConstant(4), const_uint_1);
StoreUint32ToSharedMemory(
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
builder_->createCompositeExtract(
eM_packed[eM_index], type_uint_, 0),
replace_shift),
builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
element_address_words, const_uint_1)),
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
builder_->makeUintConstant(0xFFFFu),
replace_shift));
});
}
element_size_switch.makeBeginCase(2);
{
spv::Id eM0_address_dwords = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
store_needed_eM([&](uint32_t eM_index) {
StoreUint32ToSharedMemory(
builder_->createCompositeExtract(eM_packed[eM_index], type_uint_, 0),
builder_->createUnaryOp(
spv::OpBitcast, type_int_,
eM_index != 0 ? builder_->createBinOp(
spv::OpIAdd, type_uint_, eM0_address_dwords,
builder_->makeUintConstant(eM_index))
: eM0_address_dwords));
});
}
element_size_switch.makeBeginCase(3);
{
spv::Id eM0_address_dwords = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
store_needed_eM([&](uint32_t eM_index) {
spv::Id element_value = eM_packed[eM_index];
spv::Id element_address_dwords_int = builder_->createUnaryOp(
spv::OpBitcast, type_int_,
eM_index != 0 ? builder_->createBinOp(
spv::OpIAdd, type_uint_, eM0_address_dwords,
builder_->makeUintConstant(2 * eM_index))
: eM0_address_dwords);
StoreUint32ToSharedMemory(
builder_->createCompositeExtract(element_value, type_uint_, 0),
element_address_dwords_int);
StoreUint32ToSharedMemory(
builder_->createCompositeExtract(element_value, type_uint_, 1),
builder_->createBinOp(spv::OpIAdd, type_int_,
element_address_dwords_int,
builder_->makeIntConstant(1)));
});
}
element_size_switch.makeBeginCase(4);
{
spv::Id eM0_address_dwords = builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
store_needed_eM([&](uint32_t eM_index) {
spv::Id element_value = eM_packed[eM_index];
spv::Id element_address_dwords_int = builder_->createUnaryOp(
spv::OpBitcast, type_int_,
eM_index != 0 ? builder_->createBinOp(
spv::OpIAdd, type_uint_, eM0_address_dwords,
builder_->makeUintConstant(4 * eM_index))
: eM0_address_dwords);
StoreUint32ToSharedMemory(
builder_->createCompositeExtract(element_value, type_uint_, 0),
element_address_dwords_int);
for (uint32_t element_dword_index = 1; element_dword_index < 4;
++element_dword_index) {
StoreUint32ToSharedMemory(
builder_->createCompositeExtract(element_value, type_uint_,
element_dword_index),
builder_->createBinOp(spv::OpIAdd, type_int_,
element_address_dwords_int,
builder_->makeIntConstant(
static_cast<int>(element_dword_index))));
}
});
}
element_size_switch.makeEndSwitch();
// Close the conditionals for whether memory export is allowed in this
// invocation.
if_address_valid.makeEndIf();
if (if_pixel_not_killed.has_value()) {
if_pixel_not_killed->makeEndIf();
}
if (if_memexport_allowed.has_value()) {
if_memexport_allowed->makeEndIf();
}
}
} // namespace gpu
} // namespace xe

File diff suppressed because it is too large Load Diff

View File

@ -330,8 +330,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
uint32_t index_bit = UINT32_C(1) << index; uint32_t index_bit = UINT32_C(1) << index;
textures_remaining = xe::clear_lowest_bit(textures_remaining); textures_remaining = xe::clear_lowest_bit(textures_remaining);
TextureBinding& binding = texture_bindings_[index]; TextureBinding& binding = texture_bindings_[index];
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
TextureKey old_key = binding.key; TextureKey old_key = binding.key;
uint8_t old_swizzled_signs = binding.swizzled_signs; uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs); BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);

View File

@ -19,6 +19,7 @@
#include "xenia/base/filesystem.h" #include "xenia/base/filesystem.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform.h" #include "xenia/base/platform.h"
#include "xenia/base/string.h" #include "xenia/base/string.h"
#include "xenia/base/system.h" #include "xenia/base/system.h"
@ -354,9 +355,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
ImGui::NextColumn(); ImGui::NextColumn();
if (!register_info || if (!register_info ||
register_info->type == RegisterInfo::Type::kDword) { register_info->type == RegisterInfo::Type::kDword) {
ImGui::Text("%.8X", action.register_write.value.u32); ImGui::Text("%.8X", action.register_write.value);
} else { } else {
ImGui::Text("%8f", action.register_write.value.f32); ImGui::Text("%8f", xe::memory::Reinterpret<float>(
action.register_write.value));
} }
ImGui::Columns(1); ImGui::Columns(1);
break; break;
@ -706,10 +708,8 @@ void TraceViewer::DrawTextureInfo(
const Shader::TextureBinding& texture_binding) { const Shader::TextureBinding& texture_binding) {
auto& regs = *graphics_system_->register_file(); auto& regs = *graphics_system_->register_file();
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + xenos::xe_gpu_texture_fetch_t fetch =
texture_binding.fetch_constant * 6; regs.GetTextureFetch(texture_binding.fetch_constant);
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
if (fetch.type != xenos::FetchConstantType::kTexture && if (fetch.type != xenos::FetchConstantType::kTexture &&
(!cvars::gpu_allow_invalid_fetch_constants || (!cvars::gpu_allow_invalid_fetch_constants ||
fetch.type != xenos::FetchConstantType::kInvalidTexture)) { fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
@ -777,9 +777,9 @@ void TraceViewer::DrawFailedTextureInfo(
void TraceViewer::DrawVertexFetcher(Shader* shader, void TraceViewer::DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding, const Shader::VertexBinding& vertex_binding,
const xe_gpu_vertex_fetch_t* fetch) { const xe_gpu_vertex_fetch_t& fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2); const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
uint32_t vertex_count = fetch->size / vertex_binding.stride_words; uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
int column_count = 0; int column_count = 0;
for (const auto& attrib : vertex_binding.attributes) { for (const auto& attrib : vertex_binding.attributes) {
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
@ -880,7 +880,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
#define LOADEL(type, wo) \ #define LOADEL(type, wo) \
GpuSwap(xe::load<type>(vstart + \ GpuSwap(xe::load<type>(vstart + \
(attrib.fetch_instr.attributes.offset + wo) * 4), \ (attrib.fetch_instr.attributes.offset + wo) * 4), \
fetch->endian) fetch.endian)
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
case xenos::VertexFormat::k_32: case xenos::VertexFormat::k_32:
ImGui::Text("%.8X", LOADEL(uint32_t, 0)); ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@ -1062,7 +1062,7 @@ void ProgressBar(float frac, float width, float height = 0,
if (height == 0) { if (height == 0) {
height = ImGui::GetTextLineHeightWithSpacing(); height = ImGui::GetTextLineHeightWithSpacing();
} }
frac = xe::saturate_unsigned(frac); frac = xe::saturate(frac);
auto pos = ImGui::GetCursorScreenPos(); auto pos = ImGui::GetCursorScreenPos();
auto col = ImGui::ColorConvertFloat4ToU32(color); auto col = ImGui::ColorConvertFloat4ToU32(color);
@ -1180,7 +1180,7 @@ void TraceViewer::DrawStateUI() {
} }
auto enable_mode = auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
switch (enable_mode) { switch (enable_mode) {
case ModeControl::kIgnore: case ModeControl::kIgnore:
@ -1202,7 +1202,7 @@ void TraceViewer::DrawStateUI() {
break; break;
} }
case ModeControl::kCopy: { case ModeControl::kCopy: {
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(), ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
copy_dest_base); copy_dest_base);
break; break;
@ -1213,9 +1213,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Viewport State:"); ImGui::BulletText("Viewport State:");
if (true) { if (true) {
ImGui::TreePush((const void*)0); ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if ((pa_su_sc_mode_cntl >> 16) & 1) { if ((pa_su_sc_mode_cntl >> 16) & 1) {
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
int16_t window_offset_x = window_offset & 0x7FFF; int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF; int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) { if (window_offset_x & 0x4000) {
@ -1229,8 +1229,8 @@ void TraceViewer::DrawStateUI() {
} else { } else {
ImGui::BulletText("Window Offset: disabled"); ImGui::BulletText("Window Offset: disabled");
} }
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
ImGui::BulletText( ImGui::BulletText(
"Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF, "Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
(window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
@ -1238,7 +1238,7 @@ void TraceViewer::DrawStateUI() {
(window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF), (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_br >> 16) & 0x7FFF) -
((window_scissor_tl >> 16) & 0x7FFF)); ((window_scissor_tl >> 16) & 0x7FFF));
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF; uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
uint32_t surface_pitch = surface_info & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = (surface_info >> 16) & 0x3; auto surface_msaa = (surface_info >> 16) & 0x3;
@ -1250,7 +1250,7 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Surface Pitch: %d", surface_pitch); ImGui::BulletText("Surface Pitch: %d", surface_pitch);
ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz); ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
@ -1265,14 +1265,20 @@ void TraceViewer::DrawStateUI() {
} }
ImGui::BulletText( ImGui::BulletText(
"Viewport Offset: %f, %f, %f", "Viewport Offset: %f, %f, %f",
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0, : 0.0f,
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0); vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f);
ImGui::BulletText( ImGui::BulletText(
"Viewport Scale: %f, %f, %f", "Viewport Scale: %f, %f, %f",
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, : 1.0f,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f);
if (!vport_xscale_enable) { if (!vport_xscale_enable) {
ImGui::PopStyleColor(); ImGui::PopStyleColor();
} }
@ -1282,7 +1288,7 @@ void TraceViewer::DrawStateUI() {
((vte_control >> 8) & 0x1) ? "y/w0" : "y", ((vte_control >> 8) & 0x1) ? "y/w0" : "y",
((vte_control >> 9) & 0x1) ? "z/w0" : "z", ((vte_control >> 9) & 0x1) ? "z/w0" : "z",
((vte_control >> 10) & 0x1) ? "w0" : "1/w0"); ((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0; bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
ImGui::BulletText("Clip Enabled: %s, DX Clip: %s", ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
@ -1294,11 +1300,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Rasterizer State:"); ImGui::BulletText("Rasterizer State:");
if (true) { if (true) {
ImGui::TreePush((const void*)0); ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
uint32_t pa_sc_screen_scissor_tl = uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
uint32_t pa_sc_screen_scissor_br =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) { if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF; int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@ -1353,7 +1357,7 @@ void TraceViewer::DrawStateUI() {
} }
ImGui::Columns(1); ImGui::Columns(1);
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_pitch = rb_surface_info & 0x3FFF; uint32_t surface_pitch = rb_surface_info & 0x3FFF;
auto surface_msaa = auto surface_msaa =
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3); static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
@ -1362,39 +1366,39 @@ void TraceViewer::DrawStateUI() {
if (enable_mode != ModeControl::kDepth) { if (enable_mode != ModeControl::kDepth) {
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32; uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
if ((color_control & 0x8) != 0) { if ((color_control & 0x8) != 0) {
ImGui::BulletText("Alpha Test: %s %.2f", ImGui::BulletText("Alpha Test: %s %.2f",
kCompareFuncNames[color_control & 0x7], kCompareFuncNames[color_control & 0x7],
regs[XE_GPU_REG_RB_ALPHA_REF].f32); regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
} else { } else {
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
ImGui::BulletText("Alpha Test: disabled"); ImGui::BulletText("Alpha Test: disabled");
ImGui::PopStyleColor(); ImGui::PopStyleColor();
} }
auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32, auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x, ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
blend_color.y, blend_color.z, blend_color.w); blend_color.y, blend_color.z, blend_color.w);
ImGui::SameLine(); ImGui::SameLine();
// TODO small_height (was true) parameter was removed // TODO small_height (was true) parameter was removed
ImGui::ColorButton(nullptr, blend_color); ImGui::ColorButton(nullptr, blend_color);
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t color_info[4] = { uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR_INFO],
regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO],
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO],
regs[XE_GPU_REG_RB_COLOR3_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO],
}; };
uint32_t rb_blendcontrol[4] = { uint32_t rb_blendcontrol[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, regs[XE_GPU_REG_RB_BLENDCONTROL0],
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, regs[XE_GPU_REG_RB_BLENDCONTROL1],
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, regs[XE_GPU_REG_RB_BLENDCONTROL2],
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, regs[XE_GPU_REG_RB_BLENDCONTROL3],
}; };
ImGui::Columns(2); ImGui::Columns(2);
for (int i = 0; i < xe::countof(color_info); ++i) { for (int i = 0; i < xe::countof(color_info); ++i) {
@ -1503,9 +1507,9 @@ void TraceViewer::DrawStateUI() {
} }
if (ImGui::CollapsingHeader("Depth/Stencil Target")) { if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
bool uses_depth = bool uses_depth =
(rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004); (rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
uint32_t stencil_ref = (rb_stencilrefmask & 0xFF); uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
@ -1689,10 +1693,9 @@ void TraceViewer::DrawStateUI() {
draw_info.index_buffer_size, draw_info.index_buffer_size,
kIndexFormatNames[int(draw_info.index_format)], kIndexFormatNames[int(draw_info.index_format)],
kEndiannessNames[int(draw_info.index_endianness)]); kEndiannessNames[int(draw_info.index_endianness)]);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if (pa_su_sc_mode_cntl & (1 << 21)) { if (pa_su_sc_mode_cntl & (1 << 21)) {
uint32_t reset_index = uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
if (draw_info.index_format == xenos::IndexFormat::kInt16) { if (draw_info.index_format == xenos::IndexFormat::kInt16) {
ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF); ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
} else { } else {
@ -1752,30 +1755,16 @@ void TraceViewer::DrawStateUI() {
auto shader = command_processor->active_vertex_shader(); auto shader = command_processor->active_vertex_shader();
if (shader) { if (shader) {
for (const auto& vertex_binding : shader->vertex_bindings()) { for (const auto& vertex_binding : shader->vertex_bindings()) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + xe_gpu_vertex_fetch_t fetch =
(vertex_binding.fetch_constant / 3) * 6; regs.GetVertexFetch(vertex_binding.fetch_constant);
const auto group = assert_true(fetch.endian == xenos::Endian::k8in32);
reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (vertex_binding.fetch_constant % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
assert_true(fetch->endian == xenos::Endian::k8in32);
char tree_root_id[32]; char tree_root_id[32];
sprintf(tree_root_id, "#vertices_root_%d", sprintf(tree_root_id, "#vertices_root_%d",
vertex_binding.fetch_constant); vertex_binding.fetch_constant);
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s", if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
vertex_binding.fetch_constant, fetch->address << 2, vertex_binding.fetch_constant, fetch.address << 2,
fetch->size * 4, fetch.size * 4,
kEndiannessNames[int(fetch->endian)])) { kEndiannessNames[int(fetch.endian)])) {
ImGui::BeginChild("#vertices", ImVec2(0, 300)); ImGui::BeginChild("#vertices", ImVec2(0, 300));
DrawVertexFetcher(shader, vertex_binding, fetch); DrawVertexFetcher(shader, vertex_binding, fetch);
ImGui::EndChild(); ImGui::EndChild();
@ -1823,7 +1812,7 @@ void TraceViewer::DrawStateUI() {
ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6, ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
(i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6); (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1834,8 +1823,9 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) { i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4); ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32, ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
regs[i + 2].f32, regs[i + 3].f32); regs.Get<float>(i + 1), regs.Get<float>(i + 2),
regs.Get<float>(i + 3));
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1848,7 +1838,7 @@ void TraceViewer::DrawStateUI() {
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32, (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31); (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);
@ -1859,7 +1849,7 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) { i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00); ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32); ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn(); ImGui::NextColumn();
} }
ImGui::Columns(1); ImGui::Columns(1);

View File

@ -123,7 +123,7 @@ class TraceViewer : public xe::ui::WindowedApp {
void DrawVertexFetcher(Shader* shader, void DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding, const Shader::VertexBinding& vertex_binding,
const xenos::xe_gpu_vertex_fetch_t* fetch); const xenos::xe_gpu_vertex_fetch_t& fetch);
TraceViewerWindowListener window_listener_; TraceViewerWindowListener window_listener_;

View File

@ -2177,6 +2177,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
return IssueCopy(); return IssueCopy();
} }
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
GetVulkanProvider().device_info();
memexport_ranges_.clear();
// Vertex shader analysis. // Vertex shader analysis.
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader()); auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
if (!vertex_shader) { if (!vertex_shader) {
@ -2184,7 +2189,14 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
return false; return false;
} }
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
bool memexport_used_vertex = vertex_shader->memexport_eM_written() != 0; // TODO(Triang3l): If the shader uses memory export, but
// vertexPipelineStoresAndAtomics is not supported, convert the vertex shader
// to a compute shader and dispatch it after the draw if the draw doesn't use
// tessellation.
if (vertex_shader->memexport_eM_written() != 0 &&
device_info.vertexPipelineStoresAndAtomics) {
draw_util::AddMemExportRanges(regs, *vertex_shader, memexport_ranges_);
}
// Pixel shader analysis. // Pixel shader analysis.
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
@ -2207,12 +2219,15 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
} else { } else {
// Disabling pixel shader for this case is also required by the pipeline // Disabling pixel shader for this case is also required by the pipeline
// cache. // cache.
if (!memexport_used_vertex) { if (memexport_ranges_.empty()) {
// This draw has no effect. // This draw has no effect.
return true; return true;
} }
} }
// TODO(Triang3l): Memory export. if (pixel_shader && pixel_shader->memexport_eM_written() != 0 &&
device_info.fragmentStoresAndAtomics) {
draw_util::AddMemExportRanges(regs, *pixel_shader, memexport_ranges_);
}
uint32_t ps_param_gen_pos = UINT32_MAX; uint32_t ps_param_gen_pos = UINT32_MAX;
uint32_t interpolator_mask = uint32_t interpolator_mask =
@ -2428,9 +2443,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
current_guest_graphics_pipeline_layout_ = pipeline_layout; current_guest_graphics_pipeline_layout_ = pipeline_layout;
} }
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
GetVulkanProvider().device_info();
bool host_render_targets_used = render_target_cache_->GetPath() == bool host_render_targets_used = render_target_cache_->GetPath() ==
RenderTargetCache::Path::kHostRenderTargets; RenderTargetCache::Path::kHostRenderTargets;
@ -2503,8 +2515,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
(uint64_t(1) << (vfetch_index & 63))) { (uint64_t(1) << (vfetch_index & 63))) {
continue; continue;
} }
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( xenos::xe_gpu_vertex_fetch_t vfetch_constant =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) { switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex: case xenos::FetchConstantType::kVertex:
break; break;
@ -2537,9 +2549,39 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
<< (vfetch_index & 63); << (vfetch_index & 63);
} }
// Synchronize the memory pages backing memory scatter export streams, and
// calculate the range that includes the streams for the buffer barrier.
uint32_t memexport_extent_start = UINT32_MAX, memexport_extent_end = 0;
for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
uint32_t memexport_range_base_bytes = memexport_range.base_address_dwords
<< 2;
if (!shared_memory_->RequestRange(memexport_range_base_bytes,
memexport_range.size_bytes)) {
XELOGE(
"Failed to request memexport stream at 0x{:08X} (size {}) in the "
"shared memory",
memexport_range_base_bytes, memexport_range.size_bytes);
return false;
}
memexport_extent_start =
std::min(memexport_extent_start, memexport_range_base_bytes);
memexport_extent_end =
std::max(memexport_extent_end,
memexport_range_base_bytes + memexport_range.size_bytes);
}
// Insert the shared memory barrier if needed. // Insert the shared memory barrier if needed.
// TODO(Triang3l): Memory export. // TODO(Triang3l): Find some PM4 command that can be used for indication of
shared_memory_->Use(VulkanSharedMemory::Usage::kRead); // when memexports should be awaited instead of inserting the barrier in Use
// every time if memory export was done in the previous draw?
if (memexport_extent_start < memexport_extent_end) {
shared_memory_->Use(
VulkanSharedMemory::Usage::kGuestDrawReadWrite,
std::make_pair(memexport_extent_start,
memexport_extent_end - memexport_extent_start));
} else {
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
}
// After all commands that may dispatch, copy or insert barriers, submit the // After all commands that may dispatch, copy or insert barriers, submit the
// barriers (may end the render pass), and (re)enter the render pass before // barriers (may end the render pass), and (re)enter the render pass before
@ -2584,6 +2626,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0); primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
} }
// Invalidate textures in memexported memory and watch for changes.
for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
shared_memory_->RangeWrittenByGpu(memexport_range.base_address_dwords << 2,
memexport_range.size_bytes, false);
}
return true; return true;
} }
@ -3306,10 +3354,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Blend constants. // Blend constants.
float blend_constants[] = { float blend_constants[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
}; };
dynamic_blend_constants_update_needed_ |= dynamic_blend_constants_update_needed_ |=
std::memcmp(dynamic_blend_constants_, blend_constants, std::memcmp(dynamic_blend_constants_, blend_constants,
@ -3455,7 +3503,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
const RegisterFile& regs = *register_file_; const RegisterFile& regs = *register_file_;
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>(); auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3463,7 +3511,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF); regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>(); auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>(); auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
bool edram_fragment_shader_interlock = bool edram_fragment_shader_interlock =
render_target_cache_->GetPath() == render_target_cache_->GetPath() ==
@ -3776,7 +3824,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags; system_constants_.edram_rt_format_flags[i] = format_flags;
uint32_t blend_factors_ops = uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] != dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops; blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3805,22 +3853,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) { if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
} }
if (pa_su_sc_mode_cntl.poly_offset_back_enable) { if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale = poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
} }
} else { } else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) { if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset = poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset; poly_offset_back_offset = poly_offset_front_offset;
} }
@ -3883,21 +3931,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
} }
dirty |= system_constants_.edram_blend_constant[0] != dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] = system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] != dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] = system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] != dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] = system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] != dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] = system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
} }
if (dirty) { if (dirty) {
@ -3924,10 +3972,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
// These are the constant base addresses/ranges for shaders. // These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox // We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts. // Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex = const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map(); vertex_shader->constant_register_map();
@ -4022,8 +4070,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping, std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
sizeof(float) * 4); sizeof(float) * 4);
mapping += sizeof(float) * 4; mapping += sizeof(float) * 4;
} }
@ -4054,8 +4101,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index); float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping, std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + &regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)] (float_constant_index << 2)],
.f32,
sizeof(float) * 4); sizeof(float) * 4);
mapping += sizeof(float) * 4; mapping += sizeof(float) * 4;
} }
@ -4076,7 +4122,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false; return false;
} }
buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize); kBoolLoopConstantsSize);
current_constant_buffers_up_to_date_ |= current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
@ -4094,7 +4140,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false; return false;
} }
buffer_info.range = VkDeviceSize(kFetchConstantsSize); buffer_info.range = VkDeviceSize(kFetchConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize); kFetchConstantsSize);
current_constant_buffers_up_to_date_ |= current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;

View File

@ -744,6 +744,9 @@ class VulkanCommandProcessor final : public CommandProcessor {
// System shader constants. // System shader constants.
SpirvShaderTranslator::SystemConstants system_constants_; SpirvShaderTranslator::SystemConstants system_constants_;
// Temporary storage for memexport stream constants used in the draw.
std::vector<draw_util::MemExportRange> memexport_ranges_;
}; };
} // namespace vulkan } // namespace vulkan

View File

@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
[common_blend_rt_index]), [common_blend_rt_index]),
(((normalized_color_mask & (((normalized_color_mask &
~(uint32_t(0b1111) << (4 * common_blend_rt_index))) ~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
? regs[XE_GPU_REG_RB_COLOR_MASK].u32 ? regs[XE_GPU_REG_RB_COLOR_MASK]
: normalized_color_mask) >> : normalized_color_mask) >>
(4 * common_blend_rt_index)) & (4 * common_blend_rt_index)) &
0b1111, 0b1111,

View File

@ -4156,21 +4156,16 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
builder.createAccessChain(spv::StorageClassPushConstant, builder.createAccessChain(spv::StorageClassPushConstant,
push_constants, id_vector_temp), push_constants, id_vector_temp),
spv::NoPrecision); spv::NoPrecision);
spv::Id stencil_sample_passed = builder.createBinOp( SpirvBuilder::IfBuilder stencil_kill_if(
spv::OpINotEqual, type_bool, builder.createBinOp(
builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, spv::OpIEqual, type_bool,
stencil_mask_constant), builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed,
builder.makeUintConstant(0)); stencil_mask_constant),
spv::Block& stencil_bit_kill_block = builder.makeNewBlock(); builder.makeUintConstant(0)),
spv::Block& stencil_bit_merge_block = builder.makeNewBlock(); spv::SelectionControlMaskNone, builder);
builder.createSelectionMerge(&stencil_bit_merge_block,
spv::SelectionControlMaskNone);
builder.createConditionalBranch(stencil_sample_passed,
&stencil_bit_merge_block,
&stencil_bit_kill_block);
builder.setBuildPoint(&stencil_bit_kill_block);
builder.createNoResultOp(spv::OpKill); builder.createNoResultOp(spv::OpKill);
builder.setBuildPoint(&stencil_bit_merge_block); // OpKill terminates the block.
stencil_kill_if.makeEndIf(false);
} }
} break; } break;
} }

View File

@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
const VulkanShader::SamplerBinding& binding) const { const VulkanShader::SamplerBinding& binding) const {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters; SamplerParameters parameters;
@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
uint32_t& width_scaled_out, uint32_t& height_scaled_out, uint32_t& width_scaled_out, uint32_t& height_scaled_out,
xenos::TextureFormat& format_out) { xenos::TextureFormat& format_out) {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
TextureKey key; TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr); BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 || if (!key.is_valid || key.base_page == 0 ||

View File

@ -8,6 +8,7 @@
*/ */
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/base/memory.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -22,7 +23,7 @@ namespace xenos {
float PWLGammaToLinear(float gamma) { float PWLGammaToLinear(float gamma) {
// Not found in game executables, so just using the logic similar to that in // Not found in game executables, so just using the logic similar to that in
// the Source Engine. // the Source Engine.
gamma = xe::saturate_unsigned(gamma); gamma = xe::saturate(gamma);
float scale, offset; float scale, offset;
// While the compiled code for linear to gamma conversion uses `vcmpgtfp // While the compiled code for linear to gamma conversion uses `vcmpgtfp
// constant, value` comparison (constant > value, or value < constant), it's // constant, value` comparison (constant > value, or value < constant), it's
@ -63,7 +64,7 @@ float PWLGammaToLinear(float gamma) {
} }
float LinearToPWLGamma(float linear) { float LinearToPWLGamma(float linear) {
linear = xe::saturate_unsigned(linear); linear = xe::saturate(linear);
float scale, offset; float scale, offset;
// While the compiled code uses `vcmpgtfp constant, value` comparison // While the compiled code uses `vcmpgtfp constant, value` comparison
// (constant > value, or value < constant), it's preferable to use `value >= // (constant > value, or value < constant), it's preferable to use `value >=
@ -114,8 +115,8 @@ float Float7e3To32(uint32_t f10) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0x7F; mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
} }
uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3); return xe::memory::Reinterpret<float>(
return *reinterpret_cast<const float*>(&f32); uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
} }
// Based on CFloat24 from d3dref9.dll and the 6e4 code from: // Based on CFloat24 from d3dref9.dll and the 6e4 code from:
@ -127,7 +128,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept {
// Positive only, and not -0 or NaN. // Positive only, and not -0 or NaN.
return 0; return 0;
} }
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32); auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
if (f32u32 >= 0x3FFFFFF8) { if (f32u32 >= 0x3FFFFFF8) {
// Saturate. // Saturate.
return 0xFFFFFF; return 0xFFFFFF;
@ -161,8 +162,8 @@ float Float20e4To32(uint32_t f24) noexcept {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF; mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
} }
uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3); return xe::memory::Reinterpret<float>(
return *reinterpret_cast<const float*>(&f32); uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
} }
const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) { const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {
@ -241,4 +242,4 @@ const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type) {
} }
} // namespace xenos } // namespace xenos
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/graphics_util.h" #include "xenia/ui/graphics_util.h"
#include "xenia/ui/presenter.h" #include "xenia/ui/presenter.h"
@ -67,24 +68,19 @@ bool ImmediateDrawer::ScissorToRenderTarget(const ImmediateDraw& immediate_draw,
} }
float render_target_width_float = float(render_target_width); float render_target_width_float = float(render_target_width);
float render_target_height_float = float(render_target_height); float render_target_height_float = float(render_target_height);
// Scale to render target coordinates, drop NaNs (by doing // Scale to render target coordinates, drop NaNs, and clamp to the render
// std::max(0.0f, variable) in this argument order), and clamp to the render
// target size, below which the values are representable as 16p8 fixed-point. // target size, below which the values are representable as 16p8 fixed-point.
float scale_x = render_target_width / coordinate_space_width(); float scale_x = render_target_width / coordinate_space_width();
float scale_y = render_target_height / coordinate_space_height(); float scale_y = render_target_height / coordinate_space_height();
float x0_float = float x0_float = xe::clamp_float(immediate_draw.scissor_left * scale_x, 0.0f,
std::min(render_target_width_float, render_target_width_float);
std::max(0.0f, immediate_draw.scissor_left * scale_x)); float y0_float = xe::clamp_float(immediate_draw.scissor_top * scale_y, 0.0f,
float y0_float = render_target_height_float);
std::min(render_target_height_float,
std::max(0.0f, immediate_draw.scissor_top * scale_y));
// Also make sure the size is non-negative. // Also make sure the size is non-negative.
float x1_float = float x1_float = xe::clamp_float(immediate_draw.scissor_right * scale_x,
std::min(render_target_width_float, x0_float, render_target_width_float);
std::max(x0_float, immediate_draw.scissor_right * scale_x)); float y1_float = xe::clamp_float(immediate_draw.scissor_bottom * scale_y,
float y1_float = y0_float, render_target_height_float);
std::min(render_target_height_float,
std::max(y0_float, immediate_draw.scissor_bottom * scale_y));
// Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered). // Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered).
int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8; int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8;
int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8; int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8;

View File

@ -153,16 +153,16 @@ bool AndroidWindow::OnActivitySurfaceMotionEvent(jobject event) {
// with out-of-bounds coordinates), when moving the mouse outside the // with out-of-bounds coordinates), when moving the mouse outside the
// View, or when starting moving the mouse when the pointer was previously // View, or when starting moving the mouse when the pointer was previously
// outside the View in some cases. // outside the View in some cases.
int32_t mouse_x = int32_t( int32_t mouse_x =
std::min(float(GetActualPhysicalWidth()), int32_t(xe::clamp_float(jni_env->CallFloatMethod(
std::max(0.0f, jni_env->CallFloatMethod( event, jni_ids.motion_event_get_x, 0),
event, jni_ids.motion_event_get_x, 0))) + 0.0f, float(GetActualPhysicalWidth())) +
0.5f); 0.5f);
int32_t mouse_y = int32_t( int32_t mouse_y =
std::min(float(GetActualPhysicalHeight()), int32_t(xe::clamp_float(jni_env->CallFloatMethod(
std::max(0.0f, jni_env->CallFloatMethod( event, jni_ids.motion_event_get_y, 0),
event, jni_ids.motion_event_get_y, 0))) + 0.0f, float(GetActualPhysicalHeight())) +
0.5f); 0.5f);
static const MouseEvent::Button kMouseEventButtons[] = { static const MouseEvent::Button kMouseEventButtons[] = {
MouseEvent::Button::kLeft, MouseEvent::Button::kRight, MouseEvent::Button::kLeft, MouseEvent::Button::kRight,
MouseEvent::Button::kMiddle, MouseEvent::Button::kX1, MouseEvent::Button::kMiddle, MouseEvent::Button::kX1,